| 1 | /* IPA function body analysis. |
| 2 | Copyright (C) 2003-2025 Free Software Foundation, Inc. |
| 3 | Contributed by Jan Hubicka |
| 4 | |
| 5 | This file is part of GCC. |
| 6 | |
| 7 | GCC is free software; you can redistribute it and/or modify it under |
| 8 | the terms of the GNU General Public License as published by the Free |
| 9 | Software Foundation; either version 3, or (at your option) any later |
| 10 | version. |
| 11 | |
| 12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| 13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 15 | for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU General Public License |
| 18 | along with GCC; see the file COPYING3. If not see |
| 19 | <http://www.gnu.org/licenses/>. */ |
| 20 | |
| 21 | #ifndef GCC_IPA_SUMMARY_H |
| 22 | #define GCC_IPA_SUMMARY_H |
| 23 | |
| 24 | #include "sreal.h" |
| 25 | #include "ipa-predicate.h" |
| 26 | |
| 27 | |
| 28 | /* Hints are reasons why IPA heuristics should prefer specializing given |
| 29 | function. They are represented as bitmap of the following values. */ |
| 30 | enum ipa_hints_vals { |
| 31 | /* When specialization turns indirect call into a direct call, |
| 32 | it is good idea to do so. */ |
| 33 | INLINE_HINT_indirect_call = 1, |
| 34 | /* Inlining may make loop iterations or loop stride known. It is good idea |
| 35 | to do so because it enables loop optimizations. */ |
| 36 | INLINE_HINT_loop_iterations = 2, |
| 37 | INLINE_HINT_loop_stride = 4, |
| 38 | /* Inlining within same strongly connected component of callgraph is often |
| 39 | a loss due to increased stack frame usage and prologue setup costs. */ |
| 40 | INLINE_HINT_same_scc = 8, |
| 41 | /* Inlining functions in strongly connected component is not such a great |
| 42 | win. */ |
| 43 | INLINE_HINT_in_scc = 16, |
| 44 | /* If function is declared inline by user, it may be good idea to inline |
| 45 | it. Set by simple_edge_hints in ipa-inline-analysis.cc. */ |
| 46 | INLINE_HINT_declared_inline = 32, |
| 47 | /* Programs are usually still organized for non-LTO compilation and thus |
| 48 | if functions are in different modules, inlining may not be so important. |
| 49 | Set by simple_edge_hints in ipa-inline-analysis.cc. */ |
| 50 | INLINE_HINT_cross_module = 64, |
| 51 | /* We know that the callee is hot by profile. */ |
| 52 | INLINE_HINT_known_hot = 128, |
| 53 | /* There is builtin_constant_p dependent on parameter which is usually |
| 54 | a strong hint to inline. */ |
| 55 | INLINE_HINT_builtin_constant_p = 256 |
| 56 | }; |
| 57 | |
| 58 | typedef int ipa_hints; |
| 59 | |
| 60 | /* Simple description of whether a memory load or a condition refers to a load |
| 61 | from an aggregate and if so, how and where from in the aggregate. |
| 62 | Individual fields have the same meaning like fields with the same name in |
| 63 | struct condition. */ |
| 64 | |
| 65 | struct agg_position_info |
| 66 | { |
| 67 | HOST_WIDE_INT offset; |
| 68 | bool agg_contents; |
| 69 | bool by_ref; |
| 70 | }; |
| 71 | |
| 72 | /* Representation of function body size and time depending on the call |
| 73 | context. We keep simple array of record, every containing of predicate |
| 74 | and time/size to account. */ |
| 75 | class size_time_entry |
| 76 | { |
| 77 | public: |
| 78 | /* Predicate for code to be executed. */ |
| 79 | ipa_predicate exec_predicate; |
| 80 | /* Predicate for value to be constant and optimized out in a specialized copy. |
| 81 | When deciding on specialization this makes it possible to see how much |
| 82 | the executed code paths will simplify. */ |
| 83 | ipa_predicate nonconst_predicate; |
| 84 | int size; |
| 85 | sreal time; |
| 86 | }; |
| 87 | |
| 88 | /* Summary about function and stack frame sizes. We keep this info |
| 89 | for inline clones and also for WPA streaming. For this reason this is not |
| 90 | part of ipa_fn_summary which exists only for offline functions. */ |
| 91 | class ipa_size_summary |
| 92 | { |
| 93 | public: |
| 94 | /* Estimated stack frame consumption by the function. */ |
| 95 | HOST_WIDE_INT estimated_self_stack_size; |
| 96 | /* Size of the function body. */ |
| 97 | int self_size; |
| 98 | /* Estimated size of the function after inlining. */ |
| 99 | int size; |
| 100 | |
| 101 | ipa_size_summary () |
| 102 | : estimated_self_stack_size (0), self_size (0), size (0) |
| 103 | { |
| 104 | } |
| 105 | }; |
| 106 | |
| 107 | /* Structure to capture how frequently some interesting events occur given a |
| 108 | particular predicate. The structure is used to estimate how often we |
| 109 | encounter loops with known iteration count or stride in various |
| 110 | contexts. */ |
| 111 | |
| 112 | struct GTY(()) ipa_freqcounting_predicate |
| 113 | { |
| 114 | /* The described event happens with this frequency... */ |
| 115 | sreal freq; |
| 116 | /* ...when this predicate evaluates to false. */ |
| 117 | ipa_predicate * GTY((skip)) predicate; |
| 118 | }; |
| 119 | |
| 120 | /* Function inlining information. */ |
| 121 | class GTY(()) ipa_fn_summary |
| 122 | { |
| 123 | public: |
| 124 | /* Keep all field empty so summary dumping works during its computation. |
| 125 | This is useful for debugging. */ |
| 126 | ipa_fn_summary () |
| 127 | : min_size (0), |
| 128 | inlinable (false), single_caller (false), |
| 129 | fp_expressions (false), safe_to_inline_to_always_inline (0), |
| 130 | target_info (0), estimated_stack_size (false), |
| 131 | time (0), conds (NULL), |
| 132 | size_time_table (), call_size_time_table (vNULL), |
| 133 | loop_iterations (NULL), loop_strides (NULL), |
| 134 | builtin_constant_p_parms (vNULL), |
| 135 | growth (0), scc_no (0) |
| 136 | { |
| 137 | } |
| 138 | |
| 139 | /* Copy constructor. */ |
| 140 | ipa_fn_summary (const ipa_fn_summary &s) |
| 141 | : min_size (s.min_size), |
| 142 | inlinable (s.inlinable), single_caller (s.single_caller), |
| 143 | fp_expressions (s.fp_expressions), |
| 144 | target_info (s.target_info), |
| 145 | estimated_stack_size (s.estimated_stack_size), |
| 146 | time (s.time), conds (s.conds), size_time_table (), |
| 147 | call_size_time_table (vNULL), |
| 148 | loop_iterations (s.loop_iterations), loop_strides (s.loop_strides), |
| 149 | builtin_constant_p_parms (s.builtin_constant_p_parms), |
| 150 | growth (s.growth), scc_no (s.scc_no) |
| 151 | {} |
| 152 | |
| 153 | /* Default constructor. */ |
| 154 | ~ipa_fn_summary (); |
| 155 | |
| 156 | /* Information about the function body itself. */ |
| 157 | |
| 158 | /* Minimal size increase after inlining. */ |
| 159 | int min_size; |
| 160 | |
| 161 | /* False when there something makes inlining impossible (such as va_arg). */ |
| 162 | unsigned inlinable : 1; |
| 163 | /* True wen there is only one caller of the function before small function |
| 164 | inlining. */ |
| 165 | unsigned int single_caller : 1; |
| 166 | /* True if function contains any floating point expressions. */ |
| 167 | unsigned int fp_expressions : 1; |
| 168 | /* Cache for analysis of can_early_inline_edge_p. */ |
| 169 | unsigned int safe_to_inline_to_always_inline : 2; |
| 170 | /* Like fp_expressions field above, but it's to hold some target specific |
| 171 | information, such as some target specific isa flags. Note that for |
| 172 | offloading target compilers, this field isn't streamed. */ |
| 173 | unsigned int target_info; |
| 174 | |
| 175 | /* Information about function that will result after applying all the |
| 176 | inline decisions present in the callgraph. Generally kept up to |
| 177 | date only for functions that are not inline clones. */ |
| 178 | |
| 179 | /* Estimated stack frame consumption by the function. */ |
| 180 | HOST_WIDE_INT estimated_stack_size; |
| 181 | /* Estimated runtime of function after inlining. */ |
| 182 | sreal GTY((skip)) time; |
| 183 | |
| 184 | /* Conditional size/time information. The summaries are being |
| 185 | merged during inlining. */ |
| 186 | conditions conds; |
| 187 | /* Normal code is accounted in size_time_table, while calls are |
| 188 | accounted in call_size_time_table. This is because calls |
| 189 | are often adjusted by IPA optimizations and thus this summary |
| 190 | is generated from call summary information when needed. */ |
| 191 | auto_vec<size_time_entry> GTY((skip)) size_time_table; |
| 192 | /* Unlike size_time_table that is initialized for all summaries |
| 193 | call_size_time_table is allocated only for functions with |
| 194 | many calls. Use effecient vl_ptr storage. */ |
| 195 | vec<size_time_entry, va_heap, vl_ptr> GTY((skip)) call_size_time_table; |
| 196 | |
| 197 | /* Predicates on when some loops in the function can have known bounds. */ |
| 198 | vec<ipa_freqcounting_predicate, va_gc> *loop_iterations; |
| 199 | /* Predicates on when some loops in the function can have known strides. */ |
| 200 | vec<ipa_freqcounting_predicate, va_gc> *loop_strides; |
| 201 | /* Parameters tested by builtin_constant_p. */ |
| 202 | vec<int, va_heap, vl_ptr> GTY((skip)) builtin_constant_p_parms; |
| 203 | /* Estimated growth for inlining all copies of the function before start |
| 204 | of small functions inlining. |
| 205 | This value will get out of date as the callers are duplicated, but |
| 206 | using up-to-date value in the badness metric mean a lot of extra |
| 207 | expenses. */ |
| 208 | int growth; |
| 209 | /* Number of SCC on the beginning of inlining process. */ |
| 210 | int scc_no; |
| 211 | |
| 212 | /* Record time and size under given predicates. */ |
| 213 | void account_size_time (int, sreal, const ipa_predicate &, |
| 214 | const ipa_predicate &, |
| 215 | bool call = false); |
| 216 | |
| 217 | /* We keep values scaled up, so fractional sizes can be accounted. */ |
| 218 | static const int size_scale = 2; |
| 219 | /* Maximal size of size_time_table before we start to be conservative. */ |
| 220 | static const int max_size_time_table_size = 256; |
| 221 | }; |
| 222 | |
| 223 | class GTY((user)) ipa_fn_summary_t: |
| 224 | public fast_function_summary <ipa_fn_summary *, va_gc> |
| 225 | { |
| 226 | public: |
| 227 | ipa_fn_summary_t (symbol_table *symtab): |
| 228 | fast_function_summary <ipa_fn_summary *, va_gc> (symtab) {} |
| 229 | |
| 230 | static ipa_fn_summary_t *create_ggc (symbol_table *symtab) |
| 231 | { |
| 232 | class ipa_fn_summary_t *summary |
| 233 | = new (ggc_alloc_no_dtor<ipa_fn_summary_t> ()) ipa_fn_summary_t (symtab); |
| 234 | summary->disable_insertion_hook (); |
| 235 | return summary; |
| 236 | } |
| 237 | |
| 238 | /* Remove ipa_fn_summary for all callees of NODE. */ |
| 239 | void remove_callees (cgraph_node *node); |
| 240 | |
| 241 | void insert (cgraph_node *, ipa_fn_summary *) final override; |
| 242 | void remove (cgraph_node *node, ipa_fn_summary *) final override |
| 243 | { |
| 244 | remove_callees (node); |
| 245 | } |
| 246 | |
| 247 | void duplicate (cgraph_node *src, cgraph_node *dst, |
| 248 | ipa_fn_summary *src_data, ipa_fn_summary *dst_data) |
| 249 | final override; |
| 250 | }; |
| 251 | |
| 252 | extern GTY(()) fast_function_summary <ipa_fn_summary *, va_gc> |
| 253 | *ipa_fn_summaries; |
| 254 | |
| 255 | class ipa_size_summary_t: |
| 256 | public fast_function_summary <ipa_size_summary *, va_heap> |
| 257 | { |
| 258 | public: |
| 259 | ipa_size_summary_t (symbol_table *symtab): |
| 260 | fast_function_summary <ipa_size_summary *, va_heap> (symtab) |
| 261 | { |
| 262 | disable_insertion_hook (); |
| 263 | } |
| 264 | |
| 265 | void duplicate (cgraph_node *, cgraph_node *, |
| 266 | ipa_size_summary *src_data, |
| 267 | ipa_size_summary *dst_data) final override |
| 268 | { |
| 269 | *dst_data = *src_data; |
| 270 | } |
| 271 | }; |
| 272 | extern fast_function_summary <ipa_size_summary *, va_heap> |
| 273 | *ipa_size_summaries; |
| 274 | |
| 275 | /* Information kept about callgraph edges. */ |
| 276 | class ipa_call_summary |
| 277 | { |
| 278 | public: |
| 279 | /* Keep all field empty so summary dumping works during its computation. |
| 280 | This is useful for debugging. */ |
| 281 | ipa_call_summary () |
| 282 | : predicate (NULL), param (vNULL), call_stmt_size (0), call_stmt_time (0), |
| 283 | loop_depth (0), is_return_callee_uncaptured (false) |
| 284 | { |
| 285 | } |
| 286 | |
| 287 | /* Copy constructor. */ |
| 288 | ipa_call_summary (const ipa_call_summary &s): |
| 289 | predicate (s.predicate), param (s.param), call_stmt_size (s.call_stmt_size), |
| 290 | call_stmt_time (s.call_stmt_time), loop_depth (s.loop_depth), |
| 291 | is_return_callee_uncaptured (s.is_return_callee_uncaptured) |
| 292 | { |
| 293 | } |
| 294 | |
| 295 | /* Default destructor. */ |
| 296 | ~ipa_call_summary (); |
| 297 | |
| 298 | ipa_predicate *predicate; |
| 299 | /* Vector indexed by parameters. */ |
| 300 | vec<inline_param_summary> param; |
| 301 | /* Estimated size and time of the call statement. */ |
| 302 | int call_stmt_size; |
| 303 | int call_stmt_time; |
| 304 | /* Depth of loop nest, 0 means no nesting. */ |
| 305 | unsigned int loop_depth; |
| 306 | /* Indicates whether the caller returns the value of it's callee. */ |
| 307 | bool is_return_callee_uncaptured; |
| 308 | }; |
| 309 | |
| 310 | class ipa_call_summary_t: public fast_call_summary <ipa_call_summary *, va_heap> |
| 311 | { |
| 312 | public: |
| 313 | ipa_call_summary_t (symbol_table *symtab): |
| 314 | fast_call_summary <ipa_call_summary *, va_heap> (symtab) {} |
| 315 | |
| 316 | /* Hook that is called by summary when an edge is duplicated. */ |
| 317 | void duplicate (cgraph_edge *src, cgraph_edge *dst, |
| 318 | ipa_call_summary *src_data, |
| 319 | ipa_call_summary *dst_data) final override; |
| 320 | }; |
| 321 | |
| 322 | /* Estimated execution times, code sizes and other information about the |
| 323 | code executing a call described by ipa_call_context. */ |
| 324 | |
| 325 | struct ipa_call_estimates |
| 326 | { |
| 327 | /* Estimated size needed to execute call in the given context. */ |
| 328 | int size; |
| 329 | |
| 330 | /* Minimal size needed for the call that is + independent on the call context |
| 331 | and can be used for fast estimates. */ |
| 332 | int min_size; |
| 333 | |
| 334 | /* Estimated time needed to execute call in the given context. */ |
| 335 | sreal time; |
| 336 | |
| 337 | /* Estimated time needed to execute the function when not ignoring |
| 338 | computations known to be constant in this context. */ |
| 339 | sreal nonspecialized_time; |
| 340 | |
| 341 | /* Further discovered reasons why to inline or specialize the give calls. */ |
| 342 | ipa_hints hints; |
| 343 | |
| 344 | /* Frequency how often a loop with known number of iterations is encountered. |
| 345 | Calculated with hints. */ |
| 346 | sreal loops_with_known_iterations; |
| 347 | |
| 348 | /* Frequency how often a loop with known strides is encountered. Calculated |
| 349 | with hints. */ |
| 350 | sreal loops_with_known_strides; |
| 351 | }; |
| 352 | |
| 353 | class ipa_cached_call_context; |
| 354 | |
| 355 | /* This object describe a context of call. That is a summary of known |
| 356 | information about its parameters. Main purpose of this context is |
| 357 | to give more realistic estimations of function runtime, size and |
| 358 | inline hints. */ |
| 359 | class ipa_call_context |
| 360 | { |
| 361 | public: |
| 362 | ipa_call_context (cgraph_node *node, |
| 363 | clause_t possible_truths, |
| 364 | clause_t nonspec_possible_truths, |
| 365 | vec<inline_param_summary> inline_param_summary, |
| 366 | ipa_auto_call_arg_values *arg_values); |
| 367 | ipa_call_context () |
| 368 | : m_node(NULL) |
| 369 | { |
| 370 | } |
| 371 | void estimate_size_and_time (ipa_call_estimates *estimates, |
| 372 | bool est_times = true, bool est_hints = true); |
| 373 | bool equal_to (const ipa_call_context &); |
| 374 | bool exists_p () |
| 375 | { |
| 376 | return m_node != NULL; |
| 377 | } |
| 378 | private: |
| 379 | /* Called function. */ |
| 380 | cgraph_node *m_node; |
| 381 | /* Clause describing what predicate conditionals can be satisfied |
| 382 | in this context if function is inlined/specialized. */ |
| 383 | clause_t m_possible_truths; |
| 384 | /* Clause describing what predicate conditionals can be satisfied |
| 385 | in this context if function is kept offline. */ |
| 386 | clause_t m_nonspec_possible_truths; |
| 387 | /* Inline summary maintains info about change probabilities. */ |
| 388 | vec<inline_param_summary> m_inline_param_summary; |
| 389 | |
| 390 | /* Even after having calculated clauses, the information about argument |
| 391 | values is used to resolve indirect calls. */ |
| 392 | ipa_call_arg_values m_avals; |
| 393 | |
| 394 | friend ipa_cached_call_context; |
| 395 | }; |
| 396 | |
| 397 | /* Variant of ipa_call_context that is stored in a cache over a longer period |
| 398 | of time. */ |
| 399 | |
| 400 | class ipa_cached_call_context : public ipa_call_context |
| 401 | { |
| 402 | public: |
| 403 | void duplicate_from (const ipa_call_context &ctx); |
| 404 | void release (); |
| 405 | }; |
| 406 | |
| 407 | extern fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries; |
| 408 | |
| 409 | /* In ipa-fnsummary.cc */ |
| 410 | void ipa_debug_fn_summary (struct cgraph_node *); |
| 411 | void ipa_dump_fn_summaries (FILE *f); |
| 412 | void ipa_dump_fn_summary (FILE *f, struct cgraph_node *node); |
| 413 | void ipa_dump_hints (FILE *f, ipa_hints); |
| 414 | void ipa_free_fn_summary (void); |
| 415 | void ipa_free_size_summary (void); |
| 416 | void inline_analyze_function (struct cgraph_node *node); |
| 417 | void estimate_ipcp_clone_size_and_time (struct cgraph_node *node, |
| 418 | ipa_auto_call_arg_values *avals, |
| 419 | ipa_call_estimates *estimates); |
| 420 | void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge); |
| 421 | void ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset = true); |
| 422 | void compute_fn_summary (struct cgraph_node *, bool); |
| 423 | bool refs_local_or_readonly_memory_p (tree); |
| 424 | bool points_to_local_or_readonly_memory_p (tree); |
| 425 | |
| 426 | |
| 427 | void evaluate_properties_for_edge (struct cgraph_edge *e, |
| 428 | bool inline_p, |
| 429 | clause_t *clause_ptr, |
| 430 | clause_t *nonspec_clause_ptr, |
| 431 | ipa_auto_call_arg_values *avals, |
| 432 | bool compute_contexts); |
| 433 | |
| 434 | void ipa_fnsummary_cc_finalize (void); |
| 435 | HOST_WIDE_INT ipa_get_stack_frame_offset (struct cgraph_node *node); |
| 436 | void ipa_remove_from_growth_caches (struct cgraph_edge *edge); |
| 437 | |
| 438 | /* Return true if EDGE is a cross module call. */ |
| 439 | |
| 440 | inline bool |
| 441 | cross_module_call_p (struct cgraph_edge *edge) |
| 442 | { |
| 443 | /* Here we do not want to walk to alias target becuase ICF may create |
| 444 | cross-unit aliases. */ |
| 445 | if (edge->caller->unit_id == edge->callee->unit_id) |
| 446 | return false; |
| 447 | /* If the call is to a (former) comdat function or s symbol with mutiple |
| 448 | extern inline definitions then treat is as in-module call. */ |
| 449 | if (edge->callee->merged_extern_inline || edge->callee->merged_comdat |
| 450 | || DECL_COMDAT (edge->callee->decl)) |
| 451 | return false; |
| 452 | return true; |
| 453 | } |
| 454 | |
| 455 | #endif /* GCC_IPA_FNSUMMARY_H */ |
| 456 | |