| 1 | /* Interprocedural constant propagation |
| 2 | Copyright (C) 2005-2025 Free Software Foundation, Inc. |
| 3 | |
| 4 | Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor |
| 5 | <mjambor@suse.cz> |
| 6 | |
| 7 | This file is part of GCC. |
| 8 | |
| 9 | GCC is free software; you can redistribute it and/or modify it under |
| 10 | the terms of the GNU General Public License as published by the Free |
| 11 | Software Foundation; either version 3, or (at your option) any later |
| 12 | version. |
| 13 | |
| 14 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| 15 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 16 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 17 | for more details. |
| 18 | |
| 19 | You should have received a copy of the GNU General Public License |
| 20 | along with GCC; see the file COPYING3. If not see |
| 21 | <http://www.gnu.org/licenses/>. */ |
| 22 | |
| 23 | /* Interprocedural constant propagation (IPA-CP). |
| 24 | |
| 25 | The goal of this transformation is to |
| 26 | |
| 27 | 1) discover functions which are always invoked with some arguments with the |
| 28 | same known constant values and modify the functions so that the |
| 29 | subsequent optimizations can take advantage of the knowledge, and |
| 30 | |
| 31 | 2) partial specialization - create specialized versions of functions |
| 32 | transformed in this way if some parameters are known constants only in |
| 33 | certain contexts but the estimated tradeoff between speedup and cost size |
| 34 | is deemed good. |
| 35 | |
| 36 | The algorithm also propagates types and attempts to perform type based |
| 37 | devirtualization. Types are propagated much like constants. |
| 38 | |
| 39 | The algorithm basically consists of three stages. In the first, functions |
| 40 | are analyzed one at a time and jump functions are constructed for all known |
| 41 | call-sites. In the second phase, the pass propagates information from the |
| 42 | jump functions across the call to reveal what values are available at what |
| 43 | call sites, performs estimations of effects of known values on functions and |
| 44 | their callees, and finally decides what specialized extra versions should be |
| 45 | created. In the third, the special versions materialize and appropriate |
| 46 | calls are redirected. |
| 47 | |
| 48 | The algorithm used is to a certain extent based on "Interprocedural Constant |
| 49 | Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon, |
| 50 | Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D |
| 51 | Cooper, Mary W. Hall, and Ken Kennedy. |
| 52 | |
| 53 | |
| 54 | First stage - intraprocedural analysis |
| 55 | ======================================= |
| 56 | |
| 57 | This phase computes jump_function and modification flags. |
| 58 | |
| 59 | A jump function for a call-site represents the values passed as an actual |
| 60 | arguments of a given call-site. In principle, there are three types of |
| 61 | values: |
| 62 | |
| 63 | Pass through - the caller's formal parameter is passed as an actual |
| 64 | argument, plus an operation on it can be performed. |
| 65 | Constant - a constant is passed as an actual argument. |
| 66 | Unknown - neither of the above. |
| 67 | |
| 68 | All jump function types are described in detail in ipa-prop.h, together with |
| 69 | the data structures that represent them and methods of accessing them. |
| 70 | |
| 71 | ipcp_generate_summary() is the main function of the first stage. |
| 72 | |
| 73 | Second stage - interprocedural analysis |
| 74 | ======================================== |
| 75 | |
| 76 | This stage is itself divided into two phases. In the first, we propagate |
| 77 | known values over the call graph, in the second, we make cloning decisions. |
| 78 | It uses a different algorithm than the original Callahan's paper. |
| 79 | |
| 80 | First, we traverse the functions topologically from callers to callees and, |
| 81 | for each strongly connected component (SCC), we propagate constants |
| 82 | according to previously computed jump functions. We also record what known |
| 83 | values depend on other known values and estimate local effects. Finally, we |
| 84 | propagate cumulative information about these effects from dependent values |
| 85 | to those on which they depend. |
| 86 | |
| 87 | Second, we again traverse the call graph in the same topological order and |
| 88 | make clones for functions which we know are called with the same values in |
| 89 | all contexts and decide about extra specialized clones of functions just for |
| 90 | some contexts - these decisions are based on both local estimates and |
| 91 | cumulative estimates propagated from callees. |
| 92 | |
| 93 | ipcp_propagate_stage() and ipcp_decision_stage() together constitute the |
| 94 | third stage. |
| 95 | |
| 96 | Third phase - materialization of clones, call statement updates. |
| 97 | ============================================ |
| 98 | |
| 99 | This stage is currently performed by call graph code (mainly in cgraphunit.cc |
| 100 | and tree-inline.cc) according to instructions inserted to the call graph by |
| 101 | the second stage. */ |
| 102 | |
| 103 | #define INCLUDE_ALGORITHM |
| 104 | #include "config.h" |
| 105 | #include "system.h" |
| 106 | #include "coretypes.h" |
| 107 | #include "backend.h" |
| 108 | #include "tree.h" |
| 109 | #include "gimple-expr.h" |
| 110 | #include "gimple.h" |
| 111 | #include "predict.h" |
| 112 | #include "sreal.h" |
| 113 | #include "alloc-pool.h" |
| 114 | #include "tree-pass.h" |
| 115 | #include "cgraph.h" |
| 116 | #include "diagnostic.h" |
| 117 | #include "fold-const.h" |
| 118 | #include "gimple-iterator.h" |
| 119 | #include "gimple-fold.h" |
| 120 | #include "symbol-summary.h" |
| 121 | #include "tree-vrp.h" |
| 122 | #include "ipa-cp.h" |
| 123 | #include "ipa-prop.h" |
| 124 | #include "tree-pretty-print.h" |
| 125 | #include "tree-inline.h" |
| 126 | #include "ipa-fnsummary.h" |
| 127 | #include "ipa-utils.h" |
| 128 | #include "tree-ssa-ccp.h" |
| 129 | #include "stringpool.h" |
| 130 | #include "attribs.h" |
| 131 | #include "dbgcnt.h" |
| 132 | #include "symtab-clones.h" |
| 133 | #include "gimple-range.h" |
| 134 | |
| 135 | |
| 136 | /* Allocation pools for values and their sources in ipa-cp. */ |
| 137 | |
| 138 | object_allocator<ipcp_value<tree> > ipcp_cst_values_pool |
| 139 | ("IPA-CP constant values" ); |
| 140 | |
| 141 | object_allocator<ipcp_value<ipa_polymorphic_call_context> > |
| 142 | ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts" ); |
| 143 | |
| 144 | object_allocator<ipcp_value_source<tree> > ipcp_sources_pool |
| 145 | ("IPA-CP value sources" ); |
| 146 | |
| 147 | object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool |
| 148 | ("IPA_CP aggregate lattices" ); |
| 149 | |
| 150 | /* Original overall size of the program. */ |
| 151 | |
| 152 | static long overall_size, orig_overall_size; |
| 153 | |
| 154 | /* Node name to unique clone suffix number map. */ |
| 155 | static hash_map<const char *, unsigned> *clone_num_suffixes; |
| 156 | |
| 157 | /* Return the param lattices structure corresponding to the Ith formal |
| 158 | parameter of the function described by INFO. */ |
| 159 | static inline class ipcp_param_lattices * |
| 160 | ipa_get_parm_lattices (class ipa_node_params *info, int i) |
| 161 | { |
| 162 | gcc_assert (i >= 0 && i < ipa_get_param_count (info)); |
| 163 | gcc_checking_assert (!info->ipcp_orig_node); |
| 164 | return &(info->lattices[i]); |
| 165 | } |
| 166 | |
| 167 | /* Return the lattice corresponding to the scalar value of the Ith formal |
| 168 | parameter of the function described by INFO. */ |
| 169 | static inline ipcp_lattice<tree> * |
| 170 | ipa_get_scalar_lat (class ipa_node_params *info, int i) |
| 171 | { |
| 172 | class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); |
| 173 | return &plats->itself; |
| 174 | } |
| 175 | |
| 176 | /* Return the lattice corresponding to the scalar value of the Ith formal |
| 177 | parameter of the function described by INFO. */ |
| 178 | static inline ipcp_lattice<ipa_polymorphic_call_context> * |
| 179 | ipa_get_poly_ctx_lat (class ipa_node_params *info, int i) |
| 180 | { |
| 181 | class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); |
| 182 | return &plats->ctxlat; |
| 183 | } |
| 184 | |
| 185 | /* Return whether LAT is a lattice with a single constant and without an |
| 186 | undefined value. */ |
| 187 | |
| 188 | template <typename valtype> |
| 189 | inline bool |
| 190 | ipcp_lattice<valtype>::is_single_const () |
| 191 | { |
| 192 | if (bottom || contains_variable || values_count != 1) |
| 193 | return false; |
| 194 | else |
| 195 | return true; |
| 196 | } |
| 197 | |
| 198 | /* Return true iff X and Y should be considered equal values by IPA-CP. */ |
| 199 | |
| 200 | bool |
| 201 | values_equal_for_ipcp_p (tree x, tree y) |
| 202 | { |
| 203 | gcc_checking_assert (x != NULL_TREE && y != NULL_TREE); |
| 204 | |
| 205 | if (x == y) |
| 206 | return true; |
| 207 | |
| 208 | if (TREE_CODE (x) == ADDR_EXPR |
| 209 | && TREE_CODE (y) == ADDR_EXPR |
| 210 | && (TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL |
| 211 | || (TREE_CODE (TREE_OPERAND (x, 0)) == VAR_DECL |
| 212 | && DECL_IN_CONSTANT_POOL (TREE_OPERAND (x, 0)))) |
| 213 | && (TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL |
| 214 | || (TREE_CODE (TREE_OPERAND (y, 0)) == VAR_DECL |
| 215 | && DECL_IN_CONSTANT_POOL (TREE_OPERAND (y, 0))))) |
| 216 | return TREE_OPERAND (x, 0) == TREE_OPERAND (y, 0) |
| 217 | || operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)), |
| 218 | DECL_INITIAL (TREE_OPERAND (y, 0)), flags: 0); |
| 219 | else |
| 220 | return operand_equal_p (x, y, flags: 0); |
| 221 | } |
| 222 | |
| 223 | /* Print V which is extracted from a value in a lattice to F. This overloaded |
| 224 | function is used to print tree constants. */ |
| 225 | |
| 226 | static void |
| 227 | print_ipcp_constant_value (FILE * f, tree v) |
| 228 | { |
| 229 | ipa_print_constant_value (f, val: v); |
| 230 | } |
| 231 | |
| 232 | /* Print V which is extracted from a value in a lattice to F. This overloaded |
| 233 | function is used to print constant polymorphic call contexts. */ |
| 234 | |
| 235 | static void |
| 236 | print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v) |
| 237 | { |
| 238 | v.dump(f, newline: false); |
| 239 | } |
| 240 | |
| 241 | /* Print a lattice LAT to F. */ |
| 242 | |
| 243 | template <typename valtype> |
| 244 | void |
| 245 | ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits) |
| 246 | { |
| 247 | ipcp_value<valtype> *val; |
| 248 | bool prev = false; |
| 249 | |
| 250 | if (bottom) |
| 251 | { |
| 252 | fprintf (stream: f, format: "BOTTOM\n" ); |
| 253 | return; |
| 254 | } |
| 255 | |
| 256 | if (!values_count && !contains_variable) |
| 257 | { |
| 258 | fprintf (stream: f, format: "TOP\n" ); |
| 259 | return; |
| 260 | } |
| 261 | |
| 262 | if (contains_variable) |
| 263 | { |
| 264 | fprintf (stream: f, format: "VARIABLE" ); |
| 265 | prev = true; |
| 266 | if (dump_benefits) |
| 267 | fprintf (stream: f, format: "\n" ); |
| 268 | } |
| 269 | |
| 270 | for (val = values; val; val = val->next) |
| 271 | { |
| 272 | if (dump_benefits && prev) |
| 273 | fprintf (stream: f, format: " " ); |
| 274 | else if (!dump_benefits && prev) |
| 275 | fprintf (stream: f, format: ", " ); |
| 276 | else |
| 277 | prev = true; |
| 278 | |
| 279 | print_ipcp_constant_value (f, val->value); |
| 280 | |
| 281 | if (dump_sources) |
| 282 | { |
| 283 | ipcp_value_source<valtype> *s; |
| 284 | |
| 285 | if (val->self_recursion_generated_p ()) |
| 286 | fprintf (f, " [self_gen(%i), from:" , |
| 287 | val->self_recursion_generated_level); |
| 288 | else |
| 289 | fprintf (f, " [scc: %i, from:" , val->scc_no); |
| 290 | for (s = val->sources; s; s = s->next) |
| 291 | fprintf (f, " %i(%f)" , s->cs->caller->get_uid (), |
| 292 | s->cs->sreal_frequency ().to_double ()); |
| 293 | fprintf (stream: f, format: "]" ); |
| 294 | } |
| 295 | |
| 296 | if (dump_benefits) |
| 297 | fprintf (f, " [loc_time: %g, loc_size: %i, " |
| 298 | "prop_time: %g, prop_size: %i]\n" , |
| 299 | val->local_time_benefit.to_double (), val->local_size_cost, |
| 300 | val->prop_time_benefit.to_double (), val->prop_size_cost); |
| 301 | } |
| 302 | if (!dump_benefits) |
| 303 | fprintf (stream: f, format: "\n" ); |
| 304 | } |
| 305 | |
| 306 | /* Print VALUE to F in a form which in usual cases does not take thousands of |
| 307 | characters. */ |
| 308 | |
| 309 | static void |
| 310 | ipcp_print_widest_int (FILE *f, const widest_int &value) |
| 311 | { |
| 312 | if (value == -1) |
| 313 | fprintf (stream: f, format: "-1" ); |
| 314 | else if (wi::arshift (x: value, y: 128) == -1) |
| 315 | { |
| 316 | char buf[35], *p = buf + 2; |
| 317 | widest_int v = wi::zext (x: value, offset: 128); |
| 318 | size_t len; |
| 319 | print_hex (wi: v, buf); |
| 320 | len = strlen (s: p); |
| 321 | if (len == 32) |
| 322 | { |
| 323 | fprintf (stream: f, format: "0xf..f" ); |
| 324 | while (*p == 'f') |
| 325 | ++p; |
| 326 | } |
| 327 | else |
| 328 | fprintf (stream: f, format: "0xf..f%0*d" , (int) (32 - len), 0); |
| 329 | fputs (s: p, stream: f); |
| 330 | } |
| 331 | else |
| 332 | print_hex (wi: value, file: f); |
| 333 | } |
| 334 | |
| 335 | void |
| 336 | ipcp_bits_lattice::print (FILE *f) |
| 337 | { |
| 338 | if (top_p ()) |
| 339 | fprintf (stream: f, format: " Bits unknown (TOP)\n" ); |
| 340 | else if (bottom_p ()) |
| 341 | fprintf (stream: f, format: " Bits unusable (BOTTOM)\n" ); |
| 342 | else |
| 343 | { |
| 344 | fprintf (stream: f, format: " Bits: value = " ); |
| 345 | ipcp_print_widest_int (f, value: get_value ()); |
| 346 | fprintf (stream: f, format: ", mask = " ); |
| 347 | ipcp_print_widest_int (f, value: get_mask ()); |
| 348 | fprintf (stream: f, format: "\n" ); |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | /* Print value range lattice to F. */ |
| 353 | |
| 354 | void |
| 355 | ipcp_vr_lattice::print (FILE * f) |
| 356 | { |
| 357 | m_vr.dump (f); |
| 358 | } |
| 359 | |
| 360 | /* Print all ipcp_lattices of all functions to F. */ |
| 361 | |
| 362 | static void |
| 363 | print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits) |
| 364 | { |
| 365 | struct cgraph_node *node; |
| 366 | int i, count; |
| 367 | |
| 368 | fprintf (stream: f, format: "\nLattices:\n" ); |
| 369 | FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) |
| 370 | { |
| 371 | class ipa_node_params *info; |
| 372 | |
| 373 | info = ipa_node_params_sum->get (node); |
| 374 | /* Skip unoptimized functions and constprop clones since we don't make |
| 375 | lattices for them. */ |
| 376 | if (!info || info->ipcp_orig_node) |
| 377 | continue; |
| 378 | fprintf (stream: f, format: " Node: %s:\n" , node->dump_name ()); |
| 379 | count = ipa_get_param_count (info); |
| 380 | for (i = 0; i < count; i++) |
| 381 | { |
| 382 | struct ipcp_agg_lattice *aglat; |
| 383 | class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); |
| 384 | fprintf (stream: f, format: " param [%d]: " , i); |
| 385 | plats->itself.print (f, dump_sources, dump_benefits); |
| 386 | fprintf (stream: f, format: " ctxs: " ); |
| 387 | plats->ctxlat.print (f, dump_sources, dump_benefits); |
| 388 | plats->bits_lattice.print (f); |
| 389 | fprintf (stream: f, format: " " ); |
| 390 | plats->m_value_range.print (f); |
| 391 | fprintf (stream: f, format: "\n" ); |
| 392 | if (plats->virt_call) |
| 393 | fprintf (stream: f, format: " virt_call flag set\n" ); |
| 394 | |
| 395 | if (plats->aggs_bottom) |
| 396 | { |
| 397 | fprintf (stream: f, format: " AGGS BOTTOM\n" ); |
| 398 | continue; |
| 399 | } |
| 400 | if (plats->aggs_contain_variable) |
| 401 | fprintf (stream: f, format: " AGGS VARIABLE\n" ); |
| 402 | for (aglat = plats->aggs; aglat; aglat = aglat->next) |
| 403 | { |
| 404 | fprintf (stream: f, format: " %soffset " HOST_WIDE_INT_PRINT_DEC ": " , |
| 405 | plats->aggs_by_ref ? "ref " : "" , aglat->offset); |
| 406 | aglat->print (f, dump_sources, dump_benefits); |
| 407 | } |
| 408 | } |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | /* Determine whether it is at all technically possible to create clones of NODE |
| 413 | and store this information in the ipa_node_params structure associated |
| 414 | with NODE. */ |
| 415 | |
| 416 | static void |
| 417 | determine_versionability (struct cgraph_node *node, |
| 418 | class ipa_node_params *info) |
| 419 | { |
| 420 | const char *reason = NULL; |
| 421 | |
| 422 | /* There are a number of generic reasons functions cannot be versioned. We |
| 423 | also cannot remove parameters if there are type attributes such as fnspec |
| 424 | present. */ |
| 425 | if (node->alias || node->thunk) |
| 426 | reason = "alias or thunk" ; |
| 427 | else if (!node->versionable) |
| 428 | reason = "not a tree_versionable_function" ; |
| 429 | else if (node->get_availability () <= AVAIL_INTERPOSABLE) |
| 430 | reason = "insufficient body availability" ; |
| 431 | else if (!opt_for_fn (node->decl, optimize) |
| 432 | || !opt_for_fn (node->decl, flag_ipa_cp)) |
| 433 | reason = "non-optimized function" ; |
| 434 | else if (lookup_attribute (attr_name: "omp declare simd" , DECL_ATTRIBUTES (node->decl))) |
| 435 | { |
| 436 | /* Ideally we should clone the SIMD clones themselves and create |
| 437 | vector copies of them, so IPA-cp and SIMD clones can happily |
| 438 | coexist, but that may not be worth the effort. */ |
| 439 | reason = "function has SIMD clones" ; |
| 440 | } |
| 441 | else if (lookup_attribute (attr_name: "target_clones" , DECL_ATTRIBUTES (node->decl))) |
| 442 | { |
| 443 | /* Ideally we should clone the target clones themselves and create |
| 444 | copies of them, so IPA-cp and target clones can happily |
| 445 | coexist, but that may not be worth the effort. */ |
| 446 | reason = "function target_clones attribute" ; |
| 447 | } |
| 448 | /* Don't clone decls local to a comdat group; it breaks and for C++ |
| 449 | decloned constructors, inlining is always better anyway. */ |
| 450 | else if (node->comdat_local_p ()) |
| 451 | reason = "comdat-local function" ; |
| 452 | else if (node->calls_comdat_local) |
| 453 | { |
| 454 | /* TODO: call is versionable if we make sure that all |
| 455 | callers are inside of a comdat group. */ |
| 456 | reason = "calls comdat-local function" ; |
| 457 | } |
| 458 | |
| 459 | /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN |
| 460 | work only when inlined. Cloning them may still lead to better code |
| 461 | because ipa-cp will not give up on cloning further. If the function is |
| 462 | external this however leads to wrong code because we may end up producing |
| 463 | offline copy of the function. */ |
| 464 | if (DECL_EXTERNAL (node->decl)) |
| 465 | for (cgraph_edge *edge = node->callees; !reason && edge; |
| 466 | edge = edge->next_callee) |
| 467 | if (fndecl_built_in_p (node: edge->callee->decl, klass: BUILT_IN_NORMAL)) |
| 468 | { |
| 469 | if (DECL_FUNCTION_CODE (decl: edge->callee->decl) == BUILT_IN_VA_ARG_PACK) |
| 470 | reason = "external function which calls va_arg_pack" ; |
| 471 | if (DECL_FUNCTION_CODE (decl: edge->callee->decl) |
| 472 | == BUILT_IN_VA_ARG_PACK_LEN) |
| 473 | reason = "external function which calls va_arg_pack_len" ; |
| 474 | } |
| 475 | |
| 476 | if (reason && dump_file && !node->alias && !node->thunk) |
| 477 | fprintf (stream: dump_file, format: "Function %s is not versionable, reason: %s.\n" , |
| 478 | node->dump_name (), reason); |
| 479 | |
| 480 | info->versionable = (reason == NULL); |
| 481 | } |
| 482 | |
| 483 | /* Return true if it is at all technically possible to create clones of a |
| 484 | NODE. */ |
| 485 | |
| 486 | static bool |
| 487 | ipcp_versionable_function_p (struct cgraph_node *node) |
| 488 | { |
| 489 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 490 | return info && info->versionable; |
| 491 | } |
| 492 | |
| 493 | /* Structure holding accumulated information about callers of a node. */ |
| 494 | |
| 495 | struct caller_statistics |
| 496 | { |
| 497 | /* If requested (see below), self-recursive call counts are summed into this |
| 498 | field. */ |
| 499 | profile_count rec_count_sum; |
| 500 | /* The sum of all ipa counts of all the other (non-recursive) calls. */ |
| 501 | profile_count count_sum; |
| 502 | /* Sum of all frequencies for all calls. */ |
| 503 | sreal freq_sum; |
| 504 | /* Number of calls and calls considered interesting respectively. */ |
| 505 | int n_calls, n_interesting_calls; |
| 506 | /* If itself is set up, also count the number of non-self-recursive |
| 507 | calls. */ |
| 508 | int n_nonrec_calls; |
| 509 | /* If non-NULL, this is the node itself and calls from it should have their |
| 510 | counts included in rec_count_sum and not count_sum. */ |
| 511 | cgraph_node *itself; |
| 512 | /* True if there is a caller that has no IPA profile. */ |
| 513 | bool called_without_ipa_profile; |
| 514 | }; |
| 515 | |
| 516 | /* Initialize fields of STAT to zeroes and optionally set it up so that edges |
| 517 | from IGNORED_CALLER are not counted. */ |
| 518 | |
| 519 | static inline void |
| 520 | init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL) |
| 521 | { |
| 522 | stats->rec_count_sum = profile_count::zero (); |
| 523 | stats->count_sum = profile_count::zero (); |
| 524 | stats->n_calls = 0; |
| 525 | stats->n_interesting_calls = 0; |
| 526 | stats->n_nonrec_calls = 0; |
| 527 | stats->freq_sum = 0; |
| 528 | stats->itself = itself; |
| 529 | stats->called_without_ipa_profile = false; |
| 530 | } |
| 531 | |
| 532 | /* We want to propagate across edges that may be executed, however |
| 533 | we do not want to check maybe_hot, since call itself may be cold |
| 534 | while calee contains some heavy loop which makes propagation still |
| 535 | relevant. |
| 536 | |
| 537 | In particular, even edge called once may lead to significant |
| 538 | improvement. */ |
| 539 | |
| 540 | static bool |
| 541 | cs_interesting_for_ipcp_p (cgraph_edge *e) |
| 542 | { |
| 543 | /* If profile says the edge is executed, we want to optimize. */ |
| 544 | if (e->count.ipa ().nonzero_p ()) |
| 545 | return true; |
| 546 | /* If local (possibly guseed or adjusted 0 profile) claims edge is |
| 547 | not executed, do not propagate. |
| 548 | Do not trust AFDO since branch needs to be executed multiple |
| 549 | time to count while we want to propagate even call called |
| 550 | once during the train run if callee is important. */ |
| 551 | if (e->count.initialized_p () && !e->count.nonzero_p () |
| 552 | && e->count.quality () != AFDO) |
| 553 | return false; |
| 554 | /* If we have zero IPA profile, still consider edge for cloning |
| 555 | in case we do partial training. */ |
| 556 | if (e->count.ipa ().initialized_p () |
| 557 | && !opt_for_fn (e->callee->decl,flag_profile_partial_training)) |
| 558 | return false; |
| 559 | return true; |
| 560 | } |
| 561 | |
| 562 | /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of |
| 563 | non-thunk incoming edges to NODE. */ |
| 564 | |
| 565 | static bool |
| 566 | gather_caller_stats (struct cgraph_node *node, void *data) |
| 567 | { |
| 568 | struct caller_statistics *stats = (struct caller_statistics *) data; |
| 569 | struct cgraph_edge *cs; |
| 570 | |
| 571 | for (cs = node->callers; cs; cs = cs->next_caller) |
| 572 | if (!cs->caller->thunk) |
| 573 | { |
| 574 | ipa_node_params *info = ipa_node_params_sum->get (node: cs->caller); |
| 575 | if (info && info->node_dead) |
| 576 | continue; |
| 577 | |
| 578 | if (cs->count.ipa ().initialized_p ()) |
| 579 | { |
| 580 | if (stats->itself && stats->itself == cs->caller) |
| 581 | stats->rec_count_sum += cs->count.ipa (); |
| 582 | else |
| 583 | stats->count_sum += cs->count.ipa (); |
| 584 | } |
| 585 | else |
| 586 | stats->called_without_ipa_profile = true; |
| 587 | stats->freq_sum += cs->sreal_frequency (); |
| 588 | stats->n_calls++; |
| 589 | if (stats->itself && stats->itself != cs->caller) |
| 590 | stats->n_nonrec_calls++; |
| 591 | |
| 592 | /* If profile known to be zero, we do not want to clone for performance. |
| 593 | However if call is cold, the called function may still contain |
| 594 | important hot loops. */ |
| 595 | if (cs_interesting_for_ipcp_p (e: cs)) |
| 596 | stats->n_interesting_calls++; |
| 597 | } |
| 598 | return false; |
| 599 | |
| 600 | } |
| 601 | |
| 602 | /* Return true if this NODE is viable candidate for cloning. */ |
| 603 | |
| 604 | static bool |
| 605 | ipcp_cloning_candidate_p (struct cgraph_node *node) |
| 606 | { |
| 607 | struct caller_statistics stats; |
| 608 | |
| 609 | gcc_checking_assert (node->has_gimple_body_p ()); |
| 610 | |
| 611 | if (!opt_for_fn (node->decl, flag_ipa_cp_clone)) |
| 612 | { |
| 613 | if (dump_file) |
| 614 | fprintf (stream: dump_file, format: "Not considering %s for cloning; " |
| 615 | "-fipa-cp-clone disabled.\n" , |
| 616 | node->dump_name ()); |
| 617 | return false; |
| 618 | } |
| 619 | |
| 620 | if (node->optimize_for_size_p ()) |
| 621 | { |
| 622 | if (dump_file) |
| 623 | fprintf (stream: dump_file, format: "Not considering %s for cloning; " |
| 624 | "optimizing it for size.\n" , |
| 625 | node->dump_name ()); |
| 626 | return false; |
| 627 | } |
| 628 | |
| 629 | init_caller_stats (stats: &stats); |
| 630 | node->call_for_symbol_thunks_and_aliases (callback: gather_caller_stats, data: &stats, include_overwritable: false); |
| 631 | |
| 632 | if (ipa_size_summaries->get (node)->self_size < stats.n_calls) |
| 633 | { |
| 634 | if (dump_file) |
| 635 | fprintf (stream: dump_file, format: "Considering %s for cloning; code might shrink.\n" , |
| 636 | node->dump_name ()); |
| 637 | return true; |
| 638 | } |
| 639 | if (!stats.n_interesting_calls) |
| 640 | { |
| 641 | if (dump_file) |
| 642 | fprintf (stream: dump_file, format: "Not considering %s for cloning; " |
| 643 | "no calls considered interesting by profile.\n" , |
| 644 | node->dump_name ()); |
| 645 | return false; |
| 646 | } |
| 647 | if (dump_file) |
| 648 | fprintf (stream: dump_file, format: "Considering %s for cloning.\n" , |
| 649 | node->dump_name ()); |
| 650 | return true; |
| 651 | } |
| 652 | |
| 653 | template <typename valtype> |
| 654 | class value_topo_info |
| 655 | { |
| 656 | public: |
| 657 | /* Head of the linked list of topologically sorted values. */ |
| 658 | ipcp_value<valtype> *values_topo; |
| 659 | /* Stack for creating SCCs, represented by a linked list too. */ |
| 660 | ipcp_value<valtype> *stack; |
| 661 | /* Counter driving the algorithm in add_val_to_toposort. */ |
| 662 | int dfs_counter; |
| 663 | |
| 664 | value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0) |
| 665 | {} |
| 666 | void add_val (ipcp_value<valtype> *cur_val); |
| 667 | void propagate_effects (); |
| 668 | }; |
| 669 | |
| 670 | /* Arrays representing a topological ordering of call graph nodes and a stack |
| 671 | of nodes used during constant propagation and also data required to perform |
| 672 | topological sort of values and propagation of benefits in the determined |
| 673 | order. */ |
| 674 | |
| 675 | class ipa_topo_info |
| 676 | { |
| 677 | public: |
| 678 | /* Array with obtained topological order of cgraph nodes. */ |
| 679 | struct cgraph_node **order; |
| 680 | /* Stack of cgraph nodes used during propagation within SCC until all values |
| 681 | in the SCC stabilize. */ |
| 682 | struct cgraph_node **stack; |
| 683 | int nnodes, stack_top; |
| 684 | |
| 685 | value_topo_info<tree> constants; |
| 686 | value_topo_info<ipa_polymorphic_call_context> contexts; |
| 687 | |
| 688 | ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0), |
| 689 | constants () |
| 690 | {} |
| 691 | }; |
| 692 | |
| 693 | /* Skip edges from and to nodes without ipa_cp enabled. |
| 694 | Ignore not available symbols. */ |
| 695 | |
| 696 | static bool |
| 697 | ignore_edge_p (cgraph_edge *e) |
| 698 | { |
| 699 | enum availability avail; |
| 700 | cgraph_node *ultimate_target |
| 701 | = e->callee->function_or_virtual_thunk_symbol (avail: &avail, ref: e->caller); |
| 702 | |
| 703 | return (avail <= AVAIL_INTERPOSABLE |
| 704 | || !opt_for_fn (ultimate_target->decl, optimize) |
| 705 | || !opt_for_fn (ultimate_target->decl, flag_ipa_cp)); |
| 706 | } |
| 707 | |
| 708 | /* Allocate the arrays in TOPO and topologically sort the nodes into order. */ |
| 709 | |
| 710 | static void |
| 711 | build_toporder_info (class ipa_topo_info *topo) |
| 712 | { |
| 713 | topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); |
| 714 | topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); |
| 715 | |
| 716 | gcc_checking_assert (topo->stack_top == 0); |
| 717 | topo->nnodes = ipa_reduced_postorder (topo->order, true, |
| 718 | ignore_edge: ignore_edge_p); |
| 719 | } |
| 720 | |
| 721 | /* Free information about strongly connected components and the arrays in |
| 722 | TOPO. */ |
| 723 | |
| 724 | static void |
| 725 | free_toporder_info (class ipa_topo_info *topo) |
| 726 | { |
| 727 | ipa_free_postorder_info (); |
| 728 | free (ptr: topo->order); |
| 729 | free (ptr: topo->stack); |
| 730 | } |
| 731 | |
| 732 | /* Add NODE to the stack in TOPO, unless it is already there. */ |
| 733 | |
| 734 | static inline void |
| 735 | push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node) |
| 736 | { |
| 737 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 738 | if (info->node_enqueued) |
| 739 | return; |
| 740 | info->node_enqueued = 1; |
| 741 | topo->stack[topo->stack_top++] = node; |
| 742 | } |
| 743 | |
| 744 | /* Pop a node from the stack in TOPO and return it or return NULL if the stack |
| 745 | is empty. */ |
| 746 | |
| 747 | static struct cgraph_node * |
| 748 | pop_node_from_stack (class ipa_topo_info *topo) |
| 749 | { |
| 750 | if (topo->stack_top) |
| 751 | { |
| 752 | struct cgraph_node *node; |
| 753 | topo->stack_top--; |
| 754 | node = topo->stack[topo->stack_top]; |
| 755 | ipa_node_params_sum->get (node)->node_enqueued = 0; |
| 756 | return node; |
| 757 | } |
| 758 | else |
| 759 | return NULL; |
| 760 | } |
| 761 | |
| 762 | /* Set lattice LAT to bottom and return true if it previously was not set as |
| 763 | such. */ |
| 764 | |
| 765 | template <typename valtype> |
| 766 | inline bool |
| 767 | ipcp_lattice<valtype>::set_to_bottom () |
| 768 | { |
| 769 | bool ret = !bottom; |
| 770 | bottom = true; |
| 771 | return ret; |
| 772 | } |
| 773 | |
| 774 | /* Mark lattice as containing an unknown value and return true if it previously |
| 775 | was not marked as such. */ |
| 776 | |
| 777 | template <typename valtype> |
| 778 | inline bool |
| 779 | ipcp_lattice<valtype>::set_contains_variable () |
| 780 | { |
| 781 | bool ret = !contains_variable; |
| 782 | contains_variable = true; |
| 783 | return ret; |
| 784 | } |
| 785 | |
| 786 | /* Set all aggregate lattices in PLATS to bottom and return true if they were |
| 787 | not previously set as such. */ |
| 788 | |
| 789 | static inline bool |
| 790 | set_agg_lats_to_bottom (class ipcp_param_lattices *plats) |
| 791 | { |
| 792 | bool ret = !plats->aggs_bottom; |
| 793 | plats->aggs_bottom = true; |
| 794 | return ret; |
| 795 | } |
| 796 | |
| 797 | /* Mark all aggregate lattices in PLATS as containing an unknown value and |
| 798 | return true if they were not previously marked as such. */ |
| 799 | |
| 800 | static inline bool |
| 801 | set_agg_lats_contain_variable (class ipcp_param_lattices *plats) |
| 802 | { |
| 803 | bool ret = !plats->aggs_contain_variable; |
| 804 | plats->aggs_contain_variable = true; |
| 805 | return ret; |
| 806 | } |
| 807 | |
| 808 | bool |
| 809 | ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other) |
| 810 | { |
| 811 | return meet_with_1 (other_vr: other.m_vr); |
| 812 | } |
| 813 | |
| 814 | /* Meet the current value of the lattice with the range described by |
| 815 | P_VR. */ |
| 816 | |
| 817 | bool |
| 818 | ipcp_vr_lattice::meet_with (const vrange &p_vr) |
| 819 | { |
| 820 | return meet_with_1 (other_vr: p_vr); |
| 821 | } |
| 822 | |
| 823 | /* Meet the current value of the lattice with the range described by |
| 824 | OTHER_VR. Return TRUE if anything changed. */ |
| 825 | |
| 826 | bool |
| 827 | ipcp_vr_lattice::meet_with_1 (const vrange &other_vr) |
| 828 | { |
| 829 | if (bottom_p ()) |
| 830 | return false; |
| 831 | |
| 832 | if (other_vr.varying_p ()) |
| 833 | return set_to_bottom (); |
| 834 | |
| 835 | bool res; |
| 836 | if (flag_checking) |
| 837 | { |
| 838 | value_range save (m_vr); |
| 839 | res = m_vr.union_ (r: other_vr); |
| 840 | gcc_assert (res == (m_vr != save)); |
| 841 | } |
| 842 | else |
| 843 | res = m_vr.union_ (r: other_vr); |
| 844 | return res; |
| 845 | } |
| 846 | |
| 847 | /* Return true if value range information in the lattice is yet unknown. */ |
| 848 | |
| 849 | bool |
| 850 | ipcp_vr_lattice::top_p () const |
| 851 | { |
| 852 | return m_vr.undefined_p (); |
| 853 | } |
| 854 | |
| 855 | /* Return true if value range information in the lattice is known to be |
| 856 | unusable. */ |
| 857 | |
| 858 | bool |
| 859 | ipcp_vr_lattice::bottom_p () const |
| 860 | { |
| 861 | return m_vr.varying_p (); |
| 862 | } |
| 863 | |
| 864 | /* Set value range information in the lattice to bottom. Return true if it |
| 865 | previously was in a different state. */ |
| 866 | |
| 867 | bool |
| 868 | ipcp_vr_lattice::set_to_bottom () |
| 869 | { |
| 870 | if (m_vr.varying_p ()) |
| 871 | return false; |
| 872 | |
| 873 | /* Setting an unsupported type here forces the temporary to default |
| 874 | to unsupported_range, which can handle VARYING/DEFINED ranges, |
| 875 | but nothing else (union, intersect, etc). This allows us to set |
| 876 | bottoms on any ranges, and is safe as all users of the lattice |
| 877 | check for bottom first. */ |
| 878 | m_vr.set_type (void_type_node); |
| 879 | m_vr.set_varying (void_type_node); |
| 880 | |
| 881 | return true; |
| 882 | } |
| 883 | |
| 884 | /* Set lattice value to bottom, if it already isn't the case. */ |
| 885 | |
| 886 | bool |
| 887 | ipcp_bits_lattice::set_to_bottom () |
| 888 | { |
| 889 | if (bottom_p ()) |
| 890 | return false; |
| 891 | m_lattice_val = IPA_BITS_VARYING; |
| 892 | m_value = 0; |
| 893 | m_mask = -1; |
| 894 | return true; |
| 895 | } |
| 896 | |
| 897 | /* Set to constant if it isn't already. Only meant to be called |
| 898 | when switching state from TOP. */ |
| 899 | |
| 900 | bool |
| 901 | ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask) |
| 902 | { |
| 903 | gcc_assert (top_p ()); |
| 904 | m_lattice_val = IPA_BITS_CONSTANT; |
| 905 | m_value = wi::bit_and (x: wi::bit_not (x: mask), y: value); |
| 906 | m_mask = mask; |
| 907 | return true; |
| 908 | } |
| 909 | |
| 910 | /* Return true if any of the known bits are non-zero. */ |
| 911 | |
| 912 | bool |
| 913 | ipcp_bits_lattice::known_nonzero_p () const |
| 914 | { |
| 915 | if (!constant_p ()) |
| 916 | return false; |
| 917 | return wi::ne_p (x: wi::bit_and (x: wi::bit_not (x: m_mask), y: m_value), y: 0); |
| 918 | } |
| 919 | |
| 920 | /* Convert operand to value, mask form. */ |
| 921 | |
| 922 | void |
| 923 | ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp) |
| 924 | { |
| 925 | wide_int get_nonzero_bits (const_tree); |
| 926 | |
| 927 | if (TREE_CODE (operand) == INTEGER_CST) |
| 928 | { |
| 929 | *valuep = wi::to_widest (t: operand); |
| 930 | *maskp = 0; |
| 931 | } |
| 932 | else |
| 933 | { |
| 934 | *valuep = 0; |
| 935 | *maskp = -1; |
| 936 | } |
| 937 | } |
| 938 | |
| 939 | /* Meet operation, similar to ccp_lattice_meet, we xor values |
| 940 | if this->value, value have different values at same bit positions, we want |
| 941 | to drop that bit to varying. Return true if mask is changed. |
| 942 | This function assumes that the lattice value is in CONSTANT state. If |
| 943 | DROP_ALL_ONES, mask out any known bits with value one afterwards. */ |
| 944 | |
| 945 | bool |
| 946 | ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask, |
| 947 | unsigned precision, bool drop_all_ones) |
| 948 | { |
| 949 | gcc_assert (constant_p ()); |
| 950 | |
| 951 | widest_int old_mask = m_mask; |
| 952 | m_mask = (m_mask | mask) | (m_value ^ value); |
| 953 | if (drop_all_ones) |
| 954 | m_mask |= m_value; |
| 955 | |
| 956 | widest_int cap_mask = wi::shifted_mask <widest_int> (start: 0, width: precision, negate_p: true); |
| 957 | m_mask |= cap_mask; |
| 958 | if (wi::sext (x: m_mask, offset: precision) == -1) |
| 959 | return set_to_bottom (); |
| 960 | |
| 961 | m_value &= ~m_mask; |
| 962 | return m_mask != old_mask; |
| 963 | } |
| 964 | |
| 965 | /* Meet the bits lattice with operand |
| 966 | described by <value, mask, sgn, precision. */ |
| 967 | |
| 968 | bool |
| 969 | ipcp_bits_lattice::meet_with (widest_int value, widest_int mask, |
| 970 | unsigned precision) |
| 971 | { |
| 972 | if (bottom_p ()) |
| 973 | return false; |
| 974 | |
| 975 | if (top_p ()) |
| 976 | { |
| 977 | if (wi::sext (x: mask, offset: precision) == -1) |
| 978 | return set_to_bottom (); |
| 979 | return set_to_constant (value, mask); |
| 980 | } |
| 981 | |
| 982 | return meet_with_1 (value, mask, precision, drop_all_ones: false); |
| 983 | } |
| 984 | |
| 985 | /* Meet bits lattice with the result of bit_value_binop (other, operand) |
| 986 | if code is binary operation or bit_value_unop (other) if code is unary op. |
| 987 | In the case when code is nop_expr, no adjustment is required. If |
| 988 | DROP_ALL_ONES, mask out any known bits with value one afterwards. */ |
| 989 | |
| 990 | bool |
| 991 | ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision, |
| 992 | signop sgn, enum tree_code code, tree operand, |
| 993 | bool drop_all_ones) |
| 994 | { |
| 995 | if (other.bottom_p ()) |
| 996 | return set_to_bottom (); |
| 997 | |
| 998 | if (bottom_p () || other.top_p ()) |
| 999 | return false; |
| 1000 | |
| 1001 | widest_int adjusted_value, adjusted_mask; |
| 1002 | |
| 1003 | if (TREE_CODE_CLASS (code) == tcc_binary) |
| 1004 | { |
| 1005 | tree type = TREE_TYPE (operand); |
| 1006 | widest_int o_value, o_mask; |
| 1007 | get_value_and_mask (operand, valuep: &o_value, maskp: &o_mask); |
| 1008 | |
| 1009 | bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask, |
| 1010 | sgn, precision, other.get_value (), other.get_mask (), |
| 1011 | TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask); |
| 1012 | |
| 1013 | if (wi::sext (x: adjusted_mask, offset: precision) == -1) |
| 1014 | return set_to_bottom (); |
| 1015 | } |
| 1016 | |
| 1017 | else if (TREE_CODE_CLASS (code) == tcc_unary) |
| 1018 | { |
| 1019 | bit_value_unop (code, sgn, precision, &adjusted_value, |
| 1020 | &adjusted_mask, sgn, precision, other.get_value (), |
| 1021 | other.get_mask ()); |
| 1022 | |
| 1023 | if (wi::sext (x: adjusted_mask, offset: precision) == -1) |
| 1024 | return set_to_bottom (); |
| 1025 | } |
| 1026 | |
| 1027 | else |
| 1028 | return set_to_bottom (); |
| 1029 | |
| 1030 | if (top_p ()) |
| 1031 | { |
| 1032 | if (drop_all_ones) |
| 1033 | { |
| 1034 | adjusted_mask |= adjusted_value; |
| 1035 | adjusted_value &= ~adjusted_mask; |
| 1036 | } |
| 1037 | widest_int cap_mask = wi::shifted_mask <widest_int> (start: 0, width: precision, negate_p: true); |
| 1038 | adjusted_mask |= cap_mask; |
| 1039 | if (wi::sext (x: adjusted_mask, offset: precision) == -1) |
| 1040 | return set_to_bottom (); |
| 1041 | return set_to_constant (value: adjusted_value, mask: adjusted_mask); |
| 1042 | } |
| 1043 | else |
| 1044 | return meet_with_1 (value: adjusted_value, mask: adjusted_mask, precision, |
| 1045 | drop_all_ones); |
| 1046 | } |
| 1047 | |
| 1048 | /* Dump the contents of the list to FILE. */ |
| 1049 | |
| 1050 | void |
| 1051 | ipa_argagg_value_list::dump (FILE *f) |
| 1052 | { |
| 1053 | bool comma = false; |
| 1054 | for (const ipa_argagg_value &av : m_elts) |
| 1055 | { |
| 1056 | fprintf (stream: f, format: "%s %i[%u]=" , comma ? "," : "" , |
| 1057 | av.index, av.unit_offset); |
| 1058 | print_generic_expr (f, av.value); |
| 1059 | if (av.by_ref) |
| 1060 | fprintf (stream: f, format: "(by_ref)" ); |
| 1061 | if (av.killed) |
| 1062 | fprintf (stream: f, format: "(killed)" ); |
| 1063 | comma = true; |
| 1064 | } |
| 1065 | fprintf (stream: f, format: "\n" ); |
| 1066 | } |
| 1067 | |
| 1068 | /* Dump the contents of the list to stderr. */ |
| 1069 | |
| 1070 | void |
| 1071 | ipa_argagg_value_list::debug () |
| 1072 | { |
| 1073 | dump (stderr); |
| 1074 | } |
| 1075 | |
| 1076 | /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or |
| 1077 | NULL if there is no such constant. */ |
| 1078 | |
| 1079 | const ipa_argagg_value * |
| 1080 | ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const |
| 1081 | { |
| 1082 | ipa_argagg_value key; |
| 1083 | key.index = index; |
| 1084 | key.unit_offset = unit_offset; |
| 1085 | const ipa_argagg_value *res |
| 1086 | = std::lower_bound (first: m_elts.begin (), last: m_elts.end (), val: key, |
| 1087 | comp: [] (const ipa_argagg_value &elt, |
| 1088 | const ipa_argagg_value &val) |
| 1089 | { |
| 1090 | if (elt.index < val.index) |
| 1091 | return true; |
| 1092 | if (elt.index > val.index) |
| 1093 | return false; |
| 1094 | if (elt.unit_offset < val.unit_offset) |
| 1095 | return true; |
| 1096 | return false; |
| 1097 | }); |
| 1098 | |
| 1099 | if (res == m_elts.end () |
| 1100 | || res->index != index |
| 1101 | || res->unit_offset != unit_offset) |
| 1102 | res = nullptr; |
| 1103 | |
| 1104 | /* TODO: perhaps remove the check (that the underlying array is indeed |
| 1105 | sorted) if it turns out it can be too slow? */ |
| 1106 | if (!flag_checking) |
| 1107 | return res; |
| 1108 | |
| 1109 | const ipa_argagg_value *slow_res = NULL; |
| 1110 | int prev_index = -1; |
| 1111 | unsigned prev_unit_offset = 0; |
| 1112 | for (const ipa_argagg_value &av : m_elts) |
| 1113 | { |
| 1114 | gcc_assert (prev_index < 0 |
| 1115 | || prev_index < av.index |
| 1116 | || prev_unit_offset < av.unit_offset); |
| 1117 | prev_index = av.index; |
| 1118 | prev_unit_offset = av.unit_offset; |
| 1119 | if (av.index == index |
| 1120 | && av.unit_offset == unit_offset) |
| 1121 | slow_res = &av; |
| 1122 | } |
| 1123 | gcc_assert (res == slow_res); |
| 1124 | |
| 1125 | return res; |
| 1126 | } |
| 1127 | |
| 1128 | /* Return the first item describing a constant stored for parameter with INDEX, |
| 1129 | regardless of offset or reference, or NULL if there is no such constant. */ |
| 1130 | |
| 1131 | const ipa_argagg_value * |
| 1132 | ipa_argagg_value_list::get_elt_for_index (int index) const |
| 1133 | { |
| 1134 | const ipa_argagg_value *res |
| 1135 | = std::lower_bound (first: m_elts.begin (), last: m_elts.end (), val: index, |
| 1136 | comp: [] (const ipa_argagg_value &elt, unsigned idx) |
| 1137 | { |
| 1138 | return elt.index < idx; |
| 1139 | }); |
| 1140 | if (res == m_elts.end () |
| 1141 | || res->index != index) |
| 1142 | res = nullptr; |
| 1143 | return res; |
| 1144 | } |
| 1145 | |
| 1146 | /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not |
| 1147 | performing any check of whether value is passed by reference, or NULL_TREE |
| 1148 | if there is no such constant. */ |
| 1149 | |
| 1150 | tree |
| 1151 | ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const |
| 1152 | { |
| 1153 | const ipa_argagg_value *av = get_elt (index, unit_offset); |
| 1154 | return av ? av->value : NULL_TREE; |
| 1155 | } |
| 1156 | |
| 1157 | /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is |
| 1158 | passed by reference or not according to BY_REF, or NULL_TREE if there is |
| 1159 | no such constant. */ |
| 1160 | |
| 1161 | tree |
| 1162 | ipa_argagg_value_list::get_value (int index, unsigned unit_offset, |
| 1163 | bool by_ref) const |
| 1164 | { |
| 1165 | const ipa_argagg_value *av = get_elt (index, unit_offset); |
| 1166 | if (av && av->by_ref == by_ref) |
| 1167 | return av->value; |
| 1168 | return NULL_TREE; |
| 1169 | } |
| 1170 | |
| 1171 | /* Return true if all elements present in OTHER are also present in this |
| 1172 | list. */ |
| 1173 | |
| 1174 | bool |
| 1175 | ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const |
| 1176 | { |
| 1177 | unsigned j = 0; |
| 1178 | for (unsigned i = 0; i < other.m_elts.size (); i++) |
| 1179 | { |
| 1180 | unsigned other_index = other.m_elts[i].index; |
| 1181 | unsigned other_offset = other.m_elts[i].unit_offset; |
| 1182 | |
| 1183 | while (j < m_elts.size () |
| 1184 | && (m_elts[j].index < other_index |
| 1185 | || (m_elts[j].index == other_index |
| 1186 | && m_elts[j].unit_offset < other_offset))) |
| 1187 | j++; |
| 1188 | |
| 1189 | if (j >= m_elts.size () |
| 1190 | || m_elts[j].index != other_index |
| 1191 | || m_elts[j].unit_offset != other_offset |
| 1192 | || m_elts[j].by_ref != other.m_elts[i].by_ref |
| 1193 | || !m_elts[j].value |
| 1194 | || !values_equal_for_ipcp_p (x: m_elts[j].value, y: other.m_elts[i].value)) |
| 1195 | return false; |
| 1196 | } |
| 1197 | return true; |
| 1198 | } |
| 1199 | |
| 1200 | /* Push all items in this list that describe parameter SRC_INDEX into RES as |
| 1201 | ones describing DST_INDEX while subtracting UNIT_DELTA from their unit |
| 1202 | offsets but skip those which would end up with a negative offset. */ |
| 1203 | |
| 1204 | void |
| 1205 | ipa_argagg_value_list::push_adjusted_values (unsigned src_index, |
| 1206 | unsigned dest_index, |
| 1207 | unsigned unit_delta, |
| 1208 | vec<ipa_argagg_value> *res) const |
| 1209 | { |
| 1210 | const ipa_argagg_value *av = get_elt_for_index (index: src_index); |
| 1211 | if (!av) |
| 1212 | return; |
| 1213 | unsigned prev_unit_offset = 0; |
| 1214 | bool first = true; |
| 1215 | for (; av < m_elts.end (); ++av) |
| 1216 | { |
| 1217 | if (av->index > src_index) |
| 1218 | return; |
| 1219 | if (av->index == src_index |
| 1220 | && (av->unit_offset >= unit_delta) |
| 1221 | && av->value) |
| 1222 | { |
| 1223 | ipa_argagg_value new_av; |
| 1224 | gcc_checking_assert (av->value); |
| 1225 | new_av.value = av->value; |
| 1226 | new_av.unit_offset = av->unit_offset - unit_delta; |
| 1227 | new_av.index = dest_index; |
| 1228 | new_av.by_ref = av->by_ref; |
| 1229 | gcc_assert (!av->killed); |
| 1230 | new_av.killed = false; |
| 1231 | |
| 1232 | /* Quick check that the offsets we push are indeed increasing. */ |
| 1233 | gcc_assert (first |
| 1234 | || new_av.unit_offset > prev_unit_offset); |
| 1235 | prev_unit_offset = new_av.unit_offset; |
| 1236 | first = false; |
| 1237 | |
| 1238 | res->safe_push (obj: new_av); |
| 1239 | } |
| 1240 | } |
| 1241 | } |
| 1242 | |
| 1243 | /* Push to RES information about single lattices describing aggregate values in |
| 1244 | PLATS as those describing parameter DEST_INDEX and the original offset minus |
| 1245 | UNIT_DELTA. Return true if any item has been pushed to RES. */ |
| 1246 | |
| 1247 | static bool |
| 1248 | push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index, |
| 1249 | unsigned unit_delta, |
| 1250 | vec<ipa_argagg_value> *res) |
| 1251 | { |
| 1252 | if (plats->aggs_contain_variable) |
| 1253 | return false; |
| 1254 | |
| 1255 | bool pushed_sth = false; |
| 1256 | bool first = true; |
| 1257 | unsigned prev_unit_offset = 0; |
| 1258 | for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next) |
| 1259 | if (aglat->is_single_const () |
| 1260 | && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0) |
| 1261 | { |
| 1262 | ipa_argagg_value iav; |
| 1263 | iav.value = aglat->values->value; |
| 1264 | iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta; |
| 1265 | iav.index = dest_index; |
| 1266 | iav.by_ref = plats->aggs_by_ref; |
| 1267 | iav.killed = false; |
| 1268 | |
| 1269 | gcc_assert (first |
| 1270 | || iav.unit_offset > prev_unit_offset); |
| 1271 | prev_unit_offset = iav.unit_offset; |
| 1272 | first = false; |
| 1273 | |
| 1274 | pushed_sth = true; |
| 1275 | res->safe_push (obj: iav); |
| 1276 | } |
| 1277 | return pushed_sth; |
| 1278 | } |
| 1279 | |
| 1280 | /* Turn all values in LIST that are not present in OTHER into NULL_TREEs. |
| 1281 | Return the number of remaining valid entries. */ |
| 1282 | |
| 1283 | static unsigned |
| 1284 | intersect_argaggs_with (vec<ipa_argagg_value> &elts, |
| 1285 | const vec<ipa_argagg_value> &other) |
| 1286 | { |
| 1287 | unsigned valid_entries = 0; |
| 1288 | unsigned j = 0; |
| 1289 | for (unsigned i = 0; i < elts.length (); i++) |
| 1290 | { |
| 1291 | if (!elts[i].value) |
| 1292 | continue; |
| 1293 | |
| 1294 | unsigned this_index = elts[i].index; |
| 1295 | unsigned this_offset = elts[i].unit_offset; |
| 1296 | |
| 1297 | while (j < other.length () |
| 1298 | && (other[j].index < this_index |
| 1299 | || (other[j].index == this_index |
| 1300 | && other[j].unit_offset < this_offset))) |
| 1301 | j++; |
| 1302 | |
| 1303 | if (j >= other.length ()) |
| 1304 | { |
| 1305 | elts[i].value = NULL_TREE; |
| 1306 | continue; |
| 1307 | } |
| 1308 | |
| 1309 | if (other[j].index == this_index |
| 1310 | && other[j].unit_offset == this_offset |
| 1311 | && other[j].by_ref == elts[i].by_ref |
| 1312 | && other[j].value |
| 1313 | && values_equal_for_ipcp_p (x: other[j].value, y: elts[i].value)) |
| 1314 | valid_entries++; |
| 1315 | else |
| 1316 | elts[i].value = NULL_TREE; |
| 1317 | } |
| 1318 | return valid_entries; |
| 1319 | } |
| 1320 | |
| 1321 | /* Mark bot aggregate and scalar lattices as containing an unknown variable, |
| 1322 | return true is any of them has not been marked as such so far. */ |
| 1323 | |
| 1324 | static inline bool |
| 1325 | set_all_contains_variable (class ipcp_param_lattices *plats) |
| 1326 | { |
| 1327 | bool ret; |
| 1328 | ret = plats->itself.set_contains_variable (); |
| 1329 | ret |= plats->ctxlat.set_contains_variable (); |
| 1330 | ret |= set_agg_lats_contain_variable (plats); |
| 1331 | ret |= plats->bits_lattice.set_to_bottom (); |
| 1332 | ret |= plats->m_value_range.set_to_bottom (); |
| 1333 | return ret; |
| 1334 | } |
| 1335 | |
| 1336 | /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA |
| 1337 | points to by the number of callers to NODE. */ |
| 1338 | |
| 1339 | static bool |
| 1340 | count_callers (cgraph_node *node, void *data) |
| 1341 | { |
| 1342 | int *caller_count = (int *) data; |
| 1343 | |
| 1344 | for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller) |
| 1345 | /* Local thunks can be handled transparently, but if the thunk cannot |
| 1346 | be optimized out, count it as a real use. */ |
| 1347 | if (!cs->caller->thunk || !cs->caller->local) |
| 1348 | ++*caller_count; |
| 1349 | return false; |
| 1350 | } |
| 1351 | |
| 1352 | /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on |
| 1353 | the one caller of some other node. Set the caller's corresponding flag. */ |
| 1354 | |
| 1355 | static bool |
| 1356 | set_single_call_flag (cgraph_node *node, void *) |
| 1357 | { |
| 1358 | cgraph_edge *cs = node->callers; |
| 1359 | /* Local thunks can be handled transparently, skip them. */ |
| 1360 | while (cs && cs->caller->thunk && cs->caller->local) |
| 1361 | cs = cs->next_caller; |
| 1362 | if (cs) |
| 1363 | if (ipa_node_params* info = ipa_node_params_sum->get (node: cs->caller)) |
| 1364 | { |
| 1365 | info->node_calling_single_call = true; |
| 1366 | return true; |
| 1367 | } |
| 1368 | return false; |
| 1369 | } |
| 1370 | |
| 1371 | /* Initialize ipcp_lattices. */ |
| 1372 | |
| 1373 | static void |
| 1374 | initialize_node_lattices (struct cgraph_node *node) |
| 1375 | { |
| 1376 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 1377 | struct cgraph_edge *ie; |
| 1378 | bool disable = false, variable = false; |
| 1379 | int i; |
| 1380 | |
| 1381 | gcc_checking_assert (node->has_gimple_body_p ()); |
| 1382 | |
| 1383 | if (!ipa_get_param_count (info)) |
| 1384 | disable = true; |
| 1385 | else if (node->local) |
| 1386 | { |
| 1387 | int caller_count = 0; |
| 1388 | node->call_for_symbol_thunks_and_aliases (callback: count_callers, data: &caller_count, |
| 1389 | include_overwritable: true); |
| 1390 | gcc_checking_assert (caller_count > 0); |
| 1391 | if (caller_count == 1) |
| 1392 | node->call_for_symbol_thunks_and_aliases (callback: set_single_call_flag, |
| 1393 | NULL, include_overwritable: true); |
| 1394 | } |
| 1395 | else |
| 1396 | { |
| 1397 | /* When cloning is allowed, we can assume that externally visible |
| 1398 | functions are not called. We will compensate this by cloning |
| 1399 | later. */ |
| 1400 | if (ipcp_versionable_function_p (node) |
| 1401 | && ipcp_cloning_candidate_p (node)) |
| 1402 | variable = true; |
| 1403 | else |
| 1404 | disable = true; |
| 1405 | } |
| 1406 | |
| 1407 | if (dump_file && (dump_flags & TDF_DETAILS) |
| 1408 | && !node->alias && !node->thunk) |
| 1409 | { |
| 1410 | fprintf (stream: dump_file, format: "Initializing lattices of %s\n" , |
| 1411 | node->dump_name ()); |
| 1412 | if (disable || variable) |
| 1413 | fprintf (stream: dump_file, format: " Marking all lattices as %s\n" , |
| 1414 | disable ? "BOTTOM" : "VARIABLE" ); |
| 1415 | } |
| 1416 | |
| 1417 | auto_vec<bool, 16> surviving_params; |
| 1418 | bool pre_modified = false; |
| 1419 | |
| 1420 | clone_info *cinfo = clone_info::get (node); |
| 1421 | |
| 1422 | if (!disable && cinfo && cinfo->param_adjustments) |
| 1423 | { |
| 1424 | /* At the moment all IPA optimizations should use the number of |
| 1425 | parameters of the prevailing decl as the m_always_copy_start. |
| 1426 | Handling any other value would complicate the code below, so for the |
| 1427 | time bing let's only assert it is so. */ |
| 1428 | gcc_assert ((cinfo->param_adjustments->m_always_copy_start |
| 1429 | == ipa_get_param_count (info)) |
| 1430 | || cinfo->param_adjustments->m_always_copy_start < 0); |
| 1431 | |
| 1432 | pre_modified = true; |
| 1433 | cinfo->param_adjustments->get_surviving_params (surviving_params: &surviving_params); |
| 1434 | |
| 1435 | if (dump_file && (dump_flags & TDF_DETAILS) |
| 1436 | && !node->alias && !node->thunk) |
| 1437 | { |
| 1438 | bool first = true; |
| 1439 | for (int j = 0; j < ipa_get_param_count (info); j++) |
| 1440 | { |
| 1441 | if (j < (int) surviving_params.length () |
| 1442 | && surviving_params[j]) |
| 1443 | continue; |
| 1444 | if (first) |
| 1445 | { |
| 1446 | fprintf (stream: dump_file, |
| 1447 | format: " The following parameters are dead on arrival:" ); |
| 1448 | first = false; |
| 1449 | } |
| 1450 | fprintf (stream: dump_file, format: " %u" , j); |
| 1451 | } |
| 1452 | if (!first) |
| 1453 | fprintf (stream: dump_file, format: "\n" ); |
| 1454 | } |
| 1455 | } |
| 1456 | |
| 1457 | for (i = 0; i < ipa_get_param_count (info); i++) |
| 1458 | { |
| 1459 | ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); |
| 1460 | tree type = ipa_get_type (info, i); |
| 1461 | if (disable |
| 1462 | || !ipa_get_type (info, i) |
| 1463 | || (pre_modified && (surviving_params.length () <= (unsigned) i |
| 1464 | || !surviving_params[i]))) |
| 1465 | { |
| 1466 | plats->itself.set_to_bottom (); |
| 1467 | plats->ctxlat.set_to_bottom (); |
| 1468 | set_agg_lats_to_bottom (plats); |
| 1469 | plats->bits_lattice.set_to_bottom (); |
| 1470 | plats->m_value_range.init (type); |
| 1471 | plats->m_value_range.set_to_bottom (); |
| 1472 | } |
| 1473 | else |
| 1474 | { |
| 1475 | plats->m_value_range.init (type); |
| 1476 | if (variable) |
| 1477 | set_all_contains_variable (plats); |
| 1478 | } |
| 1479 | } |
| 1480 | |
| 1481 | for (ie = node->indirect_calls; ie; ie = ie->next_callee) |
| 1482 | if (ie->indirect_info->polymorphic |
| 1483 | && ie->indirect_info->param_index >= 0) |
| 1484 | { |
| 1485 | gcc_checking_assert (ie->indirect_info->param_index >= 0); |
| 1486 | ipa_get_parm_lattices (info, |
| 1487 | i: ie->indirect_info->param_index)->virt_call = 1; |
| 1488 | } |
| 1489 | } |
| 1490 | |
| 1491 | /* Return VALUE if it is NULL_TREE or if it can be directly safely IPA-CP |
| 1492 | propagated to a parameter of type PARAM_TYPE, or return a fold-converted |
| 1493 | VALUE to PARAM_TYPE if that is possible. Return NULL_TREE otherwise. */ |
| 1494 | |
| 1495 | static tree |
| 1496 | ipacp_value_safe_for_type (tree param_type, tree value) |
| 1497 | { |
| 1498 | if (!value) |
| 1499 | return NULL_TREE; |
| 1500 | tree val_type = TREE_TYPE (value); |
| 1501 | if (param_type == val_type |
| 1502 | || useless_type_conversion_p (param_type, val_type)) |
| 1503 | return value; |
| 1504 | if (fold_convertible_p (param_type, value)) |
| 1505 | return fold_convert (param_type, value); |
| 1506 | else |
| 1507 | return NULL_TREE; |
| 1508 | } |
| 1509 | |
| 1510 | /* Return the result of a (possibly arithmetic) operation determined by OPCODE |
| 1511 | on the constant value INPUT. OPERAND is 2nd operand for binary operation |
| 1512 | and is required for binary operations. RES_TYPE, required when opcode is |
| 1513 | not NOP_EXPR, is the type in which any operation is to be performed. Return |
| 1514 | NULL_TREE if that cannot be determined or be considered an interprocedural |
| 1515 | invariant. */ |
| 1516 | |
| 1517 | static tree |
| 1518 | ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand, |
| 1519 | tree res_type) |
| 1520 | { |
| 1521 | tree res; |
| 1522 | |
| 1523 | if (opcode == NOP_EXPR) |
| 1524 | return input; |
| 1525 | if (!is_gimple_ip_invariant (input)) |
| 1526 | return NULL_TREE; |
| 1527 | |
| 1528 | if (opcode == ASSERT_EXPR) |
| 1529 | { |
| 1530 | if (values_equal_for_ipcp_p (x: input, y: operand)) |
| 1531 | return input; |
| 1532 | else |
| 1533 | return NULL_TREE; |
| 1534 | } |
| 1535 | |
| 1536 | if (TREE_CODE_CLASS (opcode) == tcc_unary) |
| 1537 | res = fold_unary (opcode, res_type, input); |
| 1538 | else |
| 1539 | res = fold_binary (opcode, res_type, input, operand); |
| 1540 | |
| 1541 | if (res && !is_gimple_ip_invariant (res)) |
| 1542 | return NULL_TREE; |
| 1543 | |
| 1544 | return res; |
| 1545 | } |
| 1546 | |
| 1547 | /* Return the result of an ancestor jump function JFUNC on the constant value |
| 1548 | INPUT. Return NULL_TREE if that cannot be determined. */ |
| 1549 | |
| 1550 | static tree |
| 1551 | ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input) |
| 1552 | { |
| 1553 | gcc_checking_assert (TREE_CODE (input) != TREE_BINFO); |
| 1554 | if (TREE_CODE (input) == ADDR_EXPR) |
| 1555 | { |
| 1556 | gcc_checking_assert (is_gimple_ip_invariant_address (input)); |
| 1557 | poly_int64 off = ipa_get_jf_ancestor_offset (jfunc); |
| 1558 | if (known_eq (off, 0)) |
| 1559 | return input; |
| 1560 | poly_int64 byte_offset = exact_div (a: off, BITS_PER_UNIT); |
| 1561 | return build1 (ADDR_EXPR, TREE_TYPE (input), |
| 1562 | fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input, |
| 1563 | build_int_cst (ptr_type_node, byte_offset))); |
| 1564 | } |
| 1565 | else if (ipa_get_jf_ancestor_keep_null (jfunc) |
| 1566 | && zerop (input)) |
| 1567 | return input; |
| 1568 | else |
| 1569 | return NULL_TREE; |
| 1570 | } |
| 1571 | |
| 1572 | /* Determine whether JFUNC evaluates to a single known constant value and if |
| 1573 | so, return it. Otherwise return NULL. INFO describes the caller node or |
| 1574 | the one it is inlined to, so that pass-through jump functions can be |
| 1575 | evaluated. PARM_TYPE is the type of the parameter to which the result is |
| 1576 | passed. */ |
| 1577 | |
| 1578 | tree |
| 1579 | ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc, |
| 1580 | tree parm_type) |
| 1581 | { |
| 1582 | if (jfunc->type == IPA_JF_CONST) |
| 1583 | return ipa_get_jf_constant (jfunc); |
| 1584 | else if (jfunc->type == IPA_JF_PASS_THROUGH |
| 1585 | || jfunc->type == IPA_JF_ANCESTOR) |
| 1586 | { |
| 1587 | tree input; |
| 1588 | int idx; |
| 1589 | |
| 1590 | if (jfunc->type == IPA_JF_PASS_THROUGH) |
| 1591 | idx = ipa_get_jf_pass_through_formal_id (jfunc); |
| 1592 | else |
| 1593 | idx = ipa_get_jf_ancestor_formal_id (jfunc); |
| 1594 | |
| 1595 | if (info->ipcp_orig_node) |
| 1596 | input = info->known_csts[idx]; |
| 1597 | else |
| 1598 | { |
| 1599 | ipcp_lattice<tree> *lat; |
| 1600 | |
| 1601 | if (info->lattices.is_empty () |
| 1602 | || idx >= ipa_get_param_count (info)) |
| 1603 | return NULL_TREE; |
| 1604 | lat = ipa_get_scalar_lat (info, i: idx); |
| 1605 | if (!lat->is_single_const ()) |
| 1606 | return NULL_TREE; |
| 1607 | input = lat->values->value; |
| 1608 | } |
| 1609 | |
| 1610 | if (!input) |
| 1611 | return NULL_TREE; |
| 1612 | |
| 1613 | if (jfunc->type == IPA_JF_PASS_THROUGH) |
| 1614 | { |
| 1615 | if (!parm_type) |
| 1616 | return NULL_TREE; |
| 1617 | enum tree_code opcode = ipa_get_jf_pass_through_operation (jfunc); |
| 1618 | tree op2 = ipa_get_jf_pass_through_operand (jfunc); |
| 1619 | tree op_type |
| 1620 | = (opcode == NOP_EXPR) ? NULL_TREE |
| 1621 | : ipa_get_jf_pass_through_op_type (jfunc); |
| 1622 | tree cstval = ipa_get_jf_arith_result (opcode, input, operand: op2, res_type: op_type); |
| 1623 | return ipacp_value_safe_for_type (param_type: parm_type, value: cstval); |
| 1624 | } |
| 1625 | else |
| 1626 | return ipa_get_jf_ancestor_result (jfunc, input); |
| 1627 | } |
| 1628 | else |
| 1629 | return NULL_TREE; |
| 1630 | } |
| 1631 | |
| 1632 | /* Determine whether JFUNC evaluates to single known polymorphic context, given |
| 1633 | that INFO describes the caller node or the one it is inlined to, CS is the |
| 1634 | call graph edge corresponding to JFUNC and CSIDX index of the described |
| 1635 | parameter. */ |
| 1636 | |
| 1637 | ipa_polymorphic_call_context |
| 1638 | ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx, |
| 1639 | ipa_jump_func *jfunc) |
| 1640 | { |
| 1641 | ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs); |
| 1642 | ipa_polymorphic_call_context ctx; |
| 1643 | ipa_polymorphic_call_context *edge_ctx |
| 1644 | = cs ? ipa_get_ith_polymorhic_call_context (args, i: csidx) : NULL; |
| 1645 | |
| 1646 | if (edge_ctx && !edge_ctx->useless_p ()) |
| 1647 | ctx = *edge_ctx; |
| 1648 | |
| 1649 | if (jfunc->type == IPA_JF_PASS_THROUGH |
| 1650 | || jfunc->type == IPA_JF_ANCESTOR) |
| 1651 | { |
| 1652 | ipa_polymorphic_call_context srcctx; |
| 1653 | int srcidx; |
| 1654 | bool type_preserved = true; |
| 1655 | if (jfunc->type == IPA_JF_PASS_THROUGH) |
| 1656 | { |
| 1657 | if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) |
| 1658 | return ctx; |
| 1659 | type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc); |
| 1660 | srcidx = ipa_get_jf_pass_through_formal_id (jfunc); |
| 1661 | } |
| 1662 | else |
| 1663 | { |
| 1664 | type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc); |
| 1665 | srcidx = ipa_get_jf_ancestor_formal_id (jfunc); |
| 1666 | } |
| 1667 | if (info->ipcp_orig_node) |
| 1668 | { |
| 1669 | if (info->known_contexts.exists ()) |
| 1670 | srcctx = info->known_contexts[srcidx]; |
| 1671 | } |
| 1672 | else |
| 1673 | { |
| 1674 | if (info->lattices.is_empty () |
| 1675 | || srcidx >= ipa_get_param_count (info)) |
| 1676 | return ctx; |
| 1677 | ipcp_lattice<ipa_polymorphic_call_context> *lat; |
| 1678 | lat = ipa_get_poly_ctx_lat (info, i: srcidx); |
| 1679 | if (!lat->is_single_const ()) |
| 1680 | return ctx; |
| 1681 | srcctx = lat->values->value; |
| 1682 | } |
| 1683 | if (srcctx.useless_p ()) |
| 1684 | return ctx; |
| 1685 | if (jfunc->type == IPA_JF_ANCESTOR) |
| 1686 | srcctx.offset_by (off: ipa_get_jf_ancestor_offset (jfunc)); |
| 1687 | if (!type_preserved) |
| 1688 | srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor); |
| 1689 | srcctx.combine_with (ctx); |
| 1690 | return srcctx; |
| 1691 | } |
| 1692 | |
| 1693 | return ctx; |
| 1694 | } |
| 1695 | |
| 1696 | /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to |
| 1697 | DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if |
| 1698 | the result is a range that is not VARYING nor UNDEFINED. */ |
| 1699 | |
| 1700 | bool |
| 1701 | ipa_vr_operation_and_type_effects (vrange &dst_vr, |
| 1702 | const vrange &src_vr, |
| 1703 | enum tree_code operation, |
| 1704 | tree dst_type, tree src_type) |
| 1705 | { |
| 1706 | if (!ipa_vr_supported_type_p (type: dst_type) |
| 1707 | || !ipa_vr_supported_type_p (type: src_type)) |
| 1708 | return false; |
| 1709 | |
| 1710 | range_op_handler handler (operation); |
| 1711 | if (!handler) |
| 1712 | return false; |
| 1713 | |
| 1714 | value_range varying (dst_type); |
| 1715 | varying.set_varying (dst_type); |
| 1716 | |
| 1717 | return (handler.operand_check_p (dst_type, src_type, dst_type) |
| 1718 | && handler.fold_range (r&: dst_vr, type: dst_type, lh: src_vr, rh: varying) |
| 1719 | && !dst_vr.varying_p () |
| 1720 | && !dst_vr.undefined_p ()); |
| 1721 | } |
| 1722 | |
| 1723 | /* Same as above, but the SRC_VR argument is an IPA_VR which must |
| 1724 | first be extracted onto a vrange. */ |
| 1725 | |
| 1726 | bool |
| 1727 | ipa_vr_operation_and_type_effects (vrange &dst_vr, |
| 1728 | const ipa_vr &src_vr, |
| 1729 | enum tree_code operation, |
| 1730 | tree dst_type, tree src_type) |
| 1731 | { |
| 1732 | value_range tmp; |
| 1733 | src_vr.get_vrange (tmp); |
| 1734 | return ipa_vr_operation_and_type_effects (dst_vr, src_vr: tmp, operation, |
| 1735 | dst_type, src_type); |
| 1736 | } |
| 1737 | |
| 1738 | /* Given a PASS_THROUGH jump function JFUNC that takes as its source SRC_VR of |
| 1739 | SRC_TYPE and the result needs to be DST_TYPE, if any value range information |
| 1740 | can be deduced at all, intersect VR with it. CONTEXT_NODE is the call graph |
| 1741 | node representing the function for which optimization flags should be |
| 1742 | evaluated. */ |
| 1743 | |
| 1744 | static void |
| 1745 | ipa_vr_intersect_with_arith_jfunc (vrange &vr, |
| 1746 | ipa_jump_func *jfunc, |
| 1747 | cgraph_node *context_node, |
| 1748 | const value_range &src_vr, |
| 1749 | tree src_type, |
| 1750 | tree dst_type) |
| 1751 | { |
| 1752 | if (src_vr.undefined_p () || src_vr.varying_p ()) |
| 1753 | return; |
| 1754 | |
| 1755 | enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc); |
| 1756 | if (TREE_CODE_CLASS (operation) == tcc_unary) |
| 1757 | { |
| 1758 | value_range op_res; |
| 1759 | const value_range *inter_vr; |
| 1760 | if (operation != NOP_EXPR) |
| 1761 | { |
| 1762 | tree operation_type = ipa_get_jf_pass_through_op_type (jfunc); |
| 1763 | op_res.set_varying (operation_type); |
| 1764 | if (!ipa_vr_operation_and_type_effects (dst_vr&: op_res, src_vr, operation, |
| 1765 | dst_type: operation_type, src_type)) |
| 1766 | return; |
| 1767 | if (src_type == dst_type) |
| 1768 | { |
| 1769 | vr.intersect (op_res); |
| 1770 | return; |
| 1771 | } |
| 1772 | inter_vr = &op_res; |
| 1773 | src_type = operation_type; |
| 1774 | } |
| 1775 | else |
| 1776 | inter_vr = &src_vr; |
| 1777 | |
| 1778 | value_range tmp_res (dst_type); |
| 1779 | if (ipa_vr_operation_and_type_effects (dst_vr&: tmp_res, src_vr: *inter_vr, operation: NOP_EXPR, |
| 1780 | dst_type, src_type)) |
| 1781 | vr.intersect (tmp_res); |
| 1782 | return; |
| 1783 | } |
| 1784 | |
| 1785 | tree operand = ipa_get_jf_pass_through_operand (jfunc); |
| 1786 | range_op_handler handler (operation); |
| 1787 | if (!handler) |
| 1788 | return; |
| 1789 | value_range op_vr (TREE_TYPE (operand)); |
| 1790 | ipa_get_range_from_ip_invariant (r&: op_vr, val: operand, node: context_node); |
| 1791 | |
| 1792 | tree operation_type = ipa_get_jf_pass_through_op_type (jfunc); |
| 1793 | value_range op_res (operation_type); |
| 1794 | if (!ipa_vr_supported_type_p (type: operation_type) |
| 1795 | || !handler.operand_check_p (operation_type, src_type, op_vr.type ()) |
| 1796 | || !handler.fold_range (r&: op_res, type: operation_type, lh: src_vr, rh: op_vr)) |
| 1797 | return; |
| 1798 | |
| 1799 | value_range tmp_res (dst_type); |
| 1800 | if (ipa_vr_operation_and_type_effects (dst_vr&: tmp_res, src_vr: op_res, operation: NOP_EXPR, dst_type, |
| 1801 | src_type: operation_type)) |
| 1802 | vr.intersect (tmp_res); |
| 1803 | } |
| 1804 | |
| 1805 | /* Determine range of JFUNC given that INFO describes the caller node or |
| 1806 | the one it is inlined to, CS is the call graph edge corresponding to JFUNC |
| 1807 | and PARM_TYPE of the parameter. */ |
| 1808 | |
| 1809 | void |
| 1810 | ipa_value_range_from_jfunc (vrange &vr, |
| 1811 | ipa_node_params *info, cgraph_edge *cs, |
| 1812 | ipa_jump_func *jfunc, tree parm_type) |
| 1813 | { |
| 1814 | vr.set_varying (parm_type); |
| 1815 | |
| 1816 | if (jfunc->m_vr && jfunc->m_vr->known_p ()) |
| 1817 | ipa_vr_operation_and_type_effects (dst_vr&: vr, |
| 1818 | src_vr: *jfunc->m_vr, |
| 1819 | operation: NOP_EXPR, dst_type: parm_type, |
| 1820 | src_type: jfunc->m_vr->type ()); |
| 1821 | if (vr.singleton_p ()) |
| 1822 | return; |
| 1823 | |
| 1824 | if (jfunc->type == IPA_JF_PASS_THROUGH) |
| 1825 | { |
| 1826 | ipcp_transformation *sum |
| 1827 | = ipcp_get_transformation_summary (node: cs->caller->inlined_to |
| 1828 | ? cs->caller->inlined_to |
| 1829 | : cs->caller); |
| 1830 | if (!sum || !sum->m_vr) |
| 1831 | return; |
| 1832 | |
| 1833 | int idx = ipa_get_jf_pass_through_formal_id (jfunc); |
| 1834 | |
| 1835 | if (!(*sum->m_vr)[idx].known_p ()) |
| 1836 | return; |
| 1837 | tree src_type = ipa_get_type (info, i: idx); |
| 1838 | value_range srcvr; |
| 1839 | (*sum->m_vr)[idx].get_vrange (srcvr); |
| 1840 | |
| 1841 | ipa_vr_intersect_with_arith_jfunc (vr, jfunc, context_node: cs->caller, src_vr: srcvr, src_type, |
| 1842 | dst_type: parm_type); |
| 1843 | } |
| 1844 | } |
| 1845 | |
| 1846 | /* Determine whether ITEM, jump function for an aggregate part, evaluates to a |
| 1847 | single known constant value and if so, return it. Otherwise return NULL. |
| 1848 | NODE and INFO describes the caller node or the one it is inlined to, and |
| 1849 | its related info. */ |
| 1850 | |
| 1851 | tree |
| 1852 | ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node, |
| 1853 | const ipa_agg_jf_item *item) |
| 1854 | { |
| 1855 | tree value = NULL_TREE; |
| 1856 | int src_idx; |
| 1857 | |
| 1858 | if (item->offset < 0 |
| 1859 | || item->jftype == IPA_JF_UNKNOWN |
| 1860 | || item->offset >= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT) |
| 1861 | return NULL_TREE; |
| 1862 | |
| 1863 | if (item->jftype == IPA_JF_CONST) |
| 1864 | return item->value.constant; |
| 1865 | |
| 1866 | gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH |
| 1867 | || item->jftype == IPA_JF_LOAD_AGG); |
| 1868 | |
| 1869 | src_idx = item->value.pass_through.formal_id; |
| 1870 | |
| 1871 | if (info->ipcp_orig_node) |
| 1872 | { |
| 1873 | if (item->jftype == IPA_JF_PASS_THROUGH) |
| 1874 | value = info->known_csts[src_idx]; |
| 1875 | else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node)) |
| 1876 | { |
| 1877 | ipa_argagg_value_list avl (ts); |
| 1878 | value = avl.get_value (index: src_idx, |
| 1879 | unit_offset: item->value.load_agg.offset / BITS_PER_UNIT, |
| 1880 | by_ref: item->value.load_agg.by_ref); |
| 1881 | } |
| 1882 | } |
| 1883 | else if (!info->lattices.is_empty ()) |
| 1884 | { |
| 1885 | class ipcp_param_lattices *src_plats |
| 1886 | = ipa_get_parm_lattices (info, i: src_idx); |
| 1887 | |
| 1888 | if (item->jftype == IPA_JF_PASS_THROUGH) |
| 1889 | { |
| 1890 | struct ipcp_lattice<tree> *lat = &src_plats->itself; |
| 1891 | |
| 1892 | if (!lat->is_single_const ()) |
| 1893 | return NULL_TREE; |
| 1894 | |
| 1895 | value = lat->values->value; |
| 1896 | } |
| 1897 | else if (src_plats->aggs |
| 1898 | && !src_plats->aggs_bottom |
| 1899 | && !src_plats->aggs_contain_variable |
| 1900 | && src_plats->aggs_by_ref == item->value.load_agg.by_ref) |
| 1901 | { |
| 1902 | struct ipcp_agg_lattice *aglat; |
| 1903 | |
| 1904 | for (aglat = src_plats->aggs; aglat; aglat = aglat->next) |
| 1905 | { |
| 1906 | if (aglat->offset > item->value.load_agg.offset) |
| 1907 | break; |
| 1908 | |
| 1909 | if (aglat->offset == item->value.load_agg.offset) |
| 1910 | { |
| 1911 | if (aglat->is_single_const ()) |
| 1912 | value = aglat->values->value; |
| 1913 | break; |
| 1914 | } |
| 1915 | } |
| 1916 | } |
| 1917 | } |
| 1918 | |
| 1919 | if (!value) |
| 1920 | return NULL_TREE; |
| 1921 | |
| 1922 | if (item->jftype == IPA_JF_LOAD_AGG) |
| 1923 | { |
| 1924 | tree load_type = item->value.load_agg.type; |
| 1925 | tree value_type = TREE_TYPE (value); |
| 1926 | |
| 1927 | /* Ensure value type is compatible with load type. */ |
| 1928 | if (!useless_type_conversion_p (load_type, value_type)) |
| 1929 | return NULL_TREE; |
| 1930 | } |
| 1931 | |
| 1932 | tree cstval = ipa_get_jf_arith_result (opcode: item->value.pass_through.operation, |
| 1933 | input: value, |
| 1934 | operand: item->value.pass_through.operand, |
| 1935 | res_type: item->value.pass_through.op_type); |
| 1936 | return ipacp_value_safe_for_type (param_type: item->type, value: cstval); |
| 1937 | } |
| 1938 | |
| 1939 | /* Process all items in AGG_JFUNC relative to caller (or the node the original |
| 1940 | caller is inlined to) NODE which described by INFO and push the results to |
| 1941 | RES as describing values passed in parameter DST_INDEX. */ |
| 1942 | |
| 1943 | void |
| 1944 | ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node, |
| 1945 | ipa_agg_jump_function *agg_jfunc, |
| 1946 | unsigned dst_index, |
| 1947 | vec<ipa_argagg_value> *res) |
| 1948 | { |
| 1949 | unsigned prev_unit_offset = 0; |
| 1950 | bool first = true; |
| 1951 | |
| 1952 | for (const ipa_agg_jf_item &item : agg_jfunc->items) |
| 1953 | { |
| 1954 | tree value = ipa_agg_value_from_jfunc (info, node, item: &item); |
| 1955 | if (!value) |
| 1956 | continue; |
| 1957 | |
| 1958 | ipa_argagg_value iav; |
| 1959 | iav.value = value; |
| 1960 | iav.unit_offset = item.offset / BITS_PER_UNIT; |
| 1961 | iav.index = dst_index; |
| 1962 | iav.by_ref = agg_jfunc->by_ref; |
| 1963 | iav.killed = 0; |
| 1964 | |
| 1965 | gcc_assert (first |
| 1966 | || iav.unit_offset > prev_unit_offset); |
| 1967 | prev_unit_offset = iav.unit_offset; |
| 1968 | first = false; |
| 1969 | |
| 1970 | res->safe_push (obj: iav); |
| 1971 | } |
| 1972 | } |
| 1973 | |
| 1974 | /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not |
| 1975 | bottom, not containing a variable component and without any known value at |
| 1976 | the same time. */ |
| 1977 | |
| 1978 | DEBUG_FUNCTION void |
| 1979 | ipcp_verify_propagated_values (void) |
| 1980 | { |
| 1981 | struct cgraph_node *node; |
| 1982 | |
| 1983 | FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) |
| 1984 | { |
| 1985 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 1986 | if (!opt_for_fn (node->decl, flag_ipa_cp) |
| 1987 | || !opt_for_fn (node->decl, optimize)) |
| 1988 | continue; |
| 1989 | int i, count = ipa_get_param_count (info); |
| 1990 | |
| 1991 | for (i = 0; i < count; i++) |
| 1992 | { |
| 1993 | ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i); |
| 1994 | |
| 1995 | if (!lat->bottom |
| 1996 | && !lat->contains_variable |
| 1997 | && lat->values_count == 0) |
| 1998 | { |
| 1999 | if (dump_file) |
| 2000 | { |
| 2001 | symtab->dump (f: dump_file); |
| 2002 | fprintf (stream: dump_file, format: "\nIPA lattices after constant " |
| 2003 | "propagation, before gcc_unreachable:\n" ); |
| 2004 | print_all_lattices (f: dump_file, dump_sources: true, dump_benefits: false); |
| 2005 | } |
| 2006 | |
| 2007 | gcc_unreachable (); |
| 2008 | } |
| 2009 | } |
| 2010 | } |
| 2011 | } |
| 2012 | |
| 2013 | /* Return true iff X and Y should be considered equal contexts by IPA-CP. */ |
| 2014 | |
| 2015 | static bool |
| 2016 | values_equal_for_ipcp_p (ipa_polymorphic_call_context x, |
| 2017 | ipa_polymorphic_call_context y) |
| 2018 | { |
| 2019 | return x.equal_to (x: y); |
| 2020 | } |
| 2021 | |
| 2022 | |
| 2023 | /* Add a new value source to the value represented by THIS, marking that a |
| 2024 | value comes from edge CS and (if the underlying jump function is a |
| 2025 | pass-through or an ancestor one) from a caller value SRC_VAL of a caller |
| 2026 | parameter described by SRC_INDEX. OFFSET is negative if the source was the |
| 2027 | scalar value of the parameter itself or the offset within an aggregate. */ |
| 2028 | |
| 2029 | template <typename valtype> |
| 2030 | void |
| 2031 | ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val, |
| 2032 | int src_idx, HOST_WIDE_INT offset) |
| 2033 | { |
| 2034 | ipcp_value_source<valtype> *src; |
| 2035 | |
| 2036 | src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>; |
| 2037 | src->offset = offset; |
| 2038 | src->cs = cs; |
| 2039 | src->val = src_val; |
| 2040 | src->index = src_idx; |
| 2041 | |
| 2042 | src->next = sources; |
| 2043 | sources = src; |
| 2044 | } |
| 2045 | |
| 2046 | /* Allocate a new ipcp_value holding a tree constant, initialize its value to |
| 2047 | SOURCE and clear all other fields. */ |
| 2048 | |
| 2049 | static ipcp_value<tree> * |
| 2050 | allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level) |
| 2051 | { |
| 2052 | ipcp_value<tree> *val; |
| 2053 | |
| 2054 | val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>(); |
| 2055 | val->value = cst; |
| 2056 | val->self_recursion_generated_level = same_lat_gen_level; |
| 2057 | return val; |
| 2058 | } |
| 2059 | |
| 2060 | /* Allocate a new ipcp_value holding a polymorphic context, initialize its |
| 2061 | value to SOURCE and clear all other fields. */ |
| 2062 | |
| 2063 | static ipcp_value<ipa_polymorphic_call_context> * |
| 2064 | allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx, |
| 2065 | unsigned same_lat_gen_level) |
| 2066 | { |
| 2067 | ipcp_value<ipa_polymorphic_call_context> *val; |
| 2068 | |
| 2069 | val = new (ipcp_poly_ctx_values_pool.allocate ()) |
| 2070 | ipcp_value<ipa_polymorphic_call_context>(); |
| 2071 | val->value = ctx; |
| 2072 | val->self_recursion_generated_level = same_lat_gen_level; |
| 2073 | return val; |
| 2074 | } |
| 2075 | |
| 2076 | /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS, |
| 2077 | SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same |
| 2078 | meaning. OFFSET -1 means the source is scalar and not a part of an |
| 2079 | aggregate. If non-NULL, VAL_P records address of existing or newly added |
| 2080 | ipcp_value. |
| 2081 | |
| 2082 | If the value is generated for a self-recursive call as a result of an |
| 2083 | arithmetic pass-through jump-function acting on a value in the same lattice, |
| 2084 | SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be |
| 2085 | zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */ |
| 2086 | |
| 2087 | template <typename valtype> |
| 2088 | bool |
| 2089 | ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs, |
| 2090 | ipcp_value<valtype> *src_val, |
| 2091 | int src_idx, HOST_WIDE_INT offset, |
| 2092 | ipcp_value<valtype> **val_p, |
| 2093 | unsigned same_lat_gen_level) |
| 2094 | { |
| 2095 | ipcp_value<valtype> *val, *last_val = NULL; |
| 2096 | |
| 2097 | if (val_p) |
| 2098 | *val_p = NULL; |
| 2099 | |
| 2100 | if (bottom) |
| 2101 | return false; |
| 2102 | |
| 2103 | for (val = values; val; last_val = val, val = val->next) |
| 2104 | if (values_equal_for_ipcp_p (val->value, newval)) |
| 2105 | { |
| 2106 | if (val_p) |
| 2107 | *val_p = val; |
| 2108 | |
| 2109 | if (val->self_recursion_generated_level < same_lat_gen_level) |
| 2110 | val->self_recursion_generated_level = same_lat_gen_level; |
| 2111 | |
| 2112 | if (ipa_edge_within_scc (cs)) |
| 2113 | { |
| 2114 | ipcp_value_source<valtype> *s; |
| 2115 | for (s = val->sources; s; s = s->next) |
| 2116 | if (s->cs == cs && s->val == src_val) |
| 2117 | break; |
| 2118 | if (s) |
| 2119 | return false; |
| 2120 | } |
| 2121 | |
| 2122 | val->add_source (cs, src_val, src_idx, offset); |
| 2123 | return false; |
| 2124 | } |
| 2125 | |
| 2126 | if (!same_lat_gen_level && values_count >= opt_for_fn (cs->callee->decl, |
| 2127 | param_ipa_cp_value_list_size)) |
| 2128 | { |
| 2129 | /* We can only free sources, not the values themselves, because sources |
| 2130 | of other values in this SCC might point to them. */ |
| 2131 | for (val = values; val; val = val->next) |
| 2132 | { |
| 2133 | while (val->sources) |
| 2134 | { |
| 2135 | ipcp_value_source<valtype> *src = val->sources; |
| 2136 | val->sources = src->next; |
| 2137 | ipcp_sources_pool.remove (object: (ipcp_value_source<tree>*)src); |
| 2138 | } |
| 2139 | } |
| 2140 | values = NULL; |
| 2141 | return set_to_bottom (); |
| 2142 | } |
| 2143 | |
| 2144 | values_count++; |
| 2145 | val = allocate_and_init_ipcp_value (newval, same_lat_gen_level); |
| 2146 | val->add_source (cs, src_val, src_idx, offset); |
| 2147 | val->next = NULL; |
| 2148 | |
| 2149 | /* Add the new value to end of value list, which can reduce iterations |
| 2150 | of propagation stage for recursive function. */ |
| 2151 | if (last_val) |
| 2152 | last_val->next = val; |
| 2153 | else |
| 2154 | values = val; |
| 2155 | |
| 2156 | if (val_p) |
| 2157 | *val_p = val; |
| 2158 | |
| 2159 | return true; |
| 2160 | } |
| 2161 | |
| 2162 | /* A helper function that returns result of operation specified by OPCODE on |
| 2163 | the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the |
| 2164 | value of SRC_VAL. If the operation is binary, OPND2 is a constant value |
| 2165 | acting as its second operand. OP_TYPE is the type in which the operation is |
| 2166 | performed. */ |
| 2167 | |
| 2168 | static tree |
| 2169 | get_val_across_arith_op (enum tree_code opcode, |
| 2170 | tree opnd1_type, |
| 2171 | tree opnd2, |
| 2172 | ipcp_value<tree> *src_val, |
| 2173 | tree op_type) |
| 2174 | { |
| 2175 | tree opnd1 = src_val->value; |
| 2176 | |
| 2177 | /* Skip source values that is incompatible with specified type. */ |
| 2178 | if (opnd1_type |
| 2179 | && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1))) |
| 2180 | return NULL_TREE; |
| 2181 | |
| 2182 | return ipa_get_jf_arith_result (opcode, input: opnd1, operand: opnd2, res_type: op_type); |
| 2183 | } |
| 2184 | |
| 2185 | /* Propagate values through an arithmetic transformation described by a jump |
| 2186 | function associated with edge CS, taking values from SRC_LAT and putting |
| 2187 | them into DEST_LAT. OPND1_TYPE, if non-NULL, is the expected type for the |
| 2188 | values in SRC_LAT. OPND2 is a constant value if transformation is a binary |
| 2189 | operation. SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes |
| 2190 | lattice of a part of an aggregate, otherwise it should be -1. SRC_IDX is |
| 2191 | the index of the source parameter. OP_TYPE is the type in which the |
| 2192 | operation is performed and can be NULL when OPCODE is NOP_EXPR. RES_TYPE is |
| 2193 | the value type of result being propagated into. Return true if DEST_LAT |
| 2194 | changed. */ |
| 2195 | |
| 2196 | static bool |
| 2197 | propagate_vals_across_arith_jfunc (cgraph_edge *cs, |
| 2198 | enum tree_code opcode, |
| 2199 | tree opnd1_type, |
| 2200 | tree opnd2, |
| 2201 | ipcp_lattice<tree> *src_lat, |
| 2202 | ipcp_lattice<tree> *dest_lat, |
| 2203 | HOST_WIDE_INT src_offset, |
| 2204 | int src_idx, |
| 2205 | tree op_type, |
| 2206 | tree res_type) |
| 2207 | { |
| 2208 | ipcp_value<tree> *src_val; |
| 2209 | bool ret = false; |
| 2210 | |
| 2211 | /* Due to circular dependencies, propagating within an SCC through arithmetic |
| 2212 | transformation would create infinite number of values. But for |
| 2213 | self-feeding recursive function, we could allow propagation in a limited |
| 2214 | count, and this can enable a simple kind of recursive function versioning. |
| 2215 | For other scenario, we would just make lattices bottom. */ |
| 2216 | if (opcode != NOP_EXPR && ipa_edge_within_scc (cs)) |
| 2217 | { |
| 2218 | int i; |
| 2219 | |
| 2220 | int max_recursive_depth = opt_for_fn(cs->caller->decl, |
| 2221 | param_ipa_cp_max_recursive_depth); |
| 2222 | if (src_lat != dest_lat || max_recursive_depth < 1) |
| 2223 | return dest_lat->set_contains_variable (); |
| 2224 | |
| 2225 | /* No benefit if recursive execution is in low probability. */ |
| 2226 | if (cs->sreal_frequency () * 100 |
| 2227 | <= ((sreal) 1) * opt_for_fn (cs->caller->decl, |
| 2228 | param_ipa_cp_min_recursive_probability)) |
| 2229 | return dest_lat->set_contains_variable (); |
| 2230 | |
| 2231 | auto_vec<ipcp_value<tree> *, 8> val_seeds; |
| 2232 | |
| 2233 | for (src_val = src_lat->values; src_val; src_val = src_val->next) |
| 2234 | { |
| 2235 | /* Now we do not use self-recursively generated value as propagation |
| 2236 | source, this is absolutely conservative, but could avoid explosion |
| 2237 | of lattice's value space, especially when one recursive function |
| 2238 | calls another recursive. */ |
| 2239 | if (src_val->self_recursion_generated_p ()) |
| 2240 | { |
| 2241 | ipcp_value_source<tree> *s; |
| 2242 | |
| 2243 | /* If the lattice has already been propagated for the call site, |
| 2244 | no need to do that again. */ |
| 2245 | for (s = src_val->sources; s; s = s->next) |
| 2246 | if (s->cs == cs) |
| 2247 | return dest_lat->set_contains_variable (); |
| 2248 | } |
| 2249 | else |
| 2250 | val_seeds.safe_push (obj: src_val); |
| 2251 | } |
| 2252 | |
| 2253 | gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size); |
| 2254 | |
| 2255 | /* Recursively generate lattice values with a limited count. */ |
| 2256 | FOR_EACH_VEC_ELT (val_seeds, i, src_val) |
| 2257 | { |
| 2258 | for (int j = 1; j < max_recursive_depth; j++) |
| 2259 | { |
| 2260 | tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, |
| 2261 | src_val, op_type); |
| 2262 | cstval = ipacp_value_safe_for_type (param_type: res_type, value: cstval); |
| 2263 | if (!cstval) |
| 2264 | break; |
| 2265 | |
| 2266 | ret |= dest_lat->add_value (newval: cstval, cs, src_val, src_idx, |
| 2267 | offset: src_offset, val_p: &src_val, same_lat_gen_level: j); |
| 2268 | gcc_checking_assert (src_val); |
| 2269 | } |
| 2270 | } |
| 2271 | ret |= dest_lat->set_contains_variable (); |
| 2272 | } |
| 2273 | else |
| 2274 | for (src_val = src_lat->values; src_val; src_val = src_val->next) |
| 2275 | { |
| 2276 | /* Now we do not use self-recursively generated value as propagation |
| 2277 | source, otherwise it is easy to make value space of normal lattice |
| 2278 | overflow. */ |
| 2279 | if (src_val->self_recursion_generated_p ()) |
| 2280 | { |
| 2281 | ret |= dest_lat->set_contains_variable (); |
| 2282 | continue; |
| 2283 | } |
| 2284 | |
| 2285 | tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, |
| 2286 | src_val, op_type); |
| 2287 | cstval = ipacp_value_safe_for_type (param_type: res_type, value: cstval); |
| 2288 | if (cstval) |
| 2289 | ret |= dest_lat->add_value (newval: cstval, cs, src_val, src_idx, |
| 2290 | offset: src_offset); |
| 2291 | else |
| 2292 | ret |= dest_lat->set_contains_variable (); |
| 2293 | } |
| 2294 | |
| 2295 | return ret; |
| 2296 | } |
| 2297 | |
| 2298 | /* Propagate values through a pass-through jump function JFUNC associated with |
| 2299 | edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX |
| 2300 | is the index of the source parameter. PARM_TYPE is the type of the |
| 2301 | parameter to which the result is passed. */ |
| 2302 | |
| 2303 | static bool |
| 2304 | propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc, |
| 2305 | ipcp_lattice<tree> *src_lat, |
| 2306 | ipcp_lattice<tree> *dest_lat, int src_idx, |
| 2307 | tree parm_type) |
| 2308 | { |
| 2309 | gcc_checking_assert (parm_type); |
| 2310 | enum tree_code opcode = ipa_get_jf_pass_through_operation (jfunc); |
| 2311 | tree op_type = (opcode == NOP_EXPR) ? NULL_TREE |
| 2312 | : ipa_get_jf_pass_through_op_type (jfunc); |
| 2313 | return propagate_vals_across_arith_jfunc (cs, opcode, NULL_TREE, |
| 2314 | opnd2: ipa_get_jf_pass_through_operand (jfunc), |
| 2315 | src_lat, dest_lat, src_offset: -1, src_idx, op_type, |
| 2316 | res_type: parm_type); |
| 2317 | } |
| 2318 | |
| 2319 | /* Propagate values through an ancestor jump function JFUNC associated with |
| 2320 | edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX |
| 2321 | is the index of the source parameter. */ |
| 2322 | |
| 2323 | static bool |
| 2324 | propagate_vals_across_ancestor (struct cgraph_edge *cs, |
| 2325 | struct ipa_jump_func *jfunc, |
| 2326 | ipcp_lattice<tree> *src_lat, |
| 2327 | ipcp_lattice<tree> *dest_lat, int src_idx, |
| 2328 | tree param_type) |
| 2329 | { |
| 2330 | ipcp_value<tree> *src_val; |
| 2331 | bool ret = false; |
| 2332 | |
| 2333 | if (ipa_edge_within_scc (cs)) |
| 2334 | return dest_lat->set_contains_variable (); |
| 2335 | |
| 2336 | for (src_val = src_lat->values; src_val; src_val = src_val->next) |
| 2337 | { |
| 2338 | tree t = ipa_get_jf_ancestor_result (jfunc, input: src_val->value); |
| 2339 | t = ipacp_value_safe_for_type (param_type, value: t); |
| 2340 | if (t) |
| 2341 | ret |= dest_lat->add_value (newval: t, cs, src_val, src_idx); |
| 2342 | else |
| 2343 | ret |= dest_lat->set_contains_variable (); |
| 2344 | } |
| 2345 | |
| 2346 | return ret; |
| 2347 | } |
| 2348 | |
| 2349 | /* Propagate scalar values across jump function JFUNC that is associated with |
| 2350 | edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the |
| 2351 | parameter to which the result is passed. */ |
| 2352 | |
| 2353 | static bool |
| 2354 | propagate_scalar_across_jump_function (struct cgraph_edge *cs, |
| 2355 | struct ipa_jump_func *jfunc, |
| 2356 | ipcp_lattice<tree> *dest_lat, |
| 2357 | tree param_type) |
| 2358 | { |
| 2359 | if (dest_lat->bottom) |
| 2360 | return false; |
| 2361 | |
| 2362 | if (jfunc->type == IPA_JF_CONST) |
| 2363 | { |
| 2364 | tree val = ipa_get_jf_constant (jfunc); |
| 2365 | val = ipacp_value_safe_for_type (param_type, value: val); |
| 2366 | if (val) |
| 2367 | return dest_lat->add_value (newval: val, cs, NULL, src_idx: 0); |
| 2368 | else |
| 2369 | return dest_lat->set_contains_variable (); |
| 2370 | } |
| 2371 | else if (jfunc->type == IPA_JF_PASS_THROUGH |
| 2372 | || jfunc->type == IPA_JF_ANCESTOR) |
| 2373 | { |
| 2374 | ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 2375 | ipcp_lattice<tree> *src_lat; |
| 2376 | int src_idx; |
| 2377 | bool ret; |
| 2378 | |
| 2379 | if (jfunc->type == IPA_JF_PASS_THROUGH) |
| 2380 | src_idx = ipa_get_jf_pass_through_formal_id (jfunc); |
| 2381 | else |
| 2382 | src_idx = ipa_get_jf_ancestor_formal_id (jfunc); |
| 2383 | |
| 2384 | src_lat = ipa_get_scalar_lat (info: caller_info, i: src_idx); |
| 2385 | if (src_lat->bottom) |
| 2386 | return dest_lat->set_contains_variable (); |
| 2387 | |
| 2388 | /* If we would need to clone the caller and cannot, do not propagate. */ |
| 2389 | if (!ipcp_versionable_function_p (node: cs->caller) |
| 2390 | && (src_lat->contains_variable |
| 2391 | || (src_lat->values_count > 1))) |
| 2392 | return dest_lat->set_contains_variable (); |
| 2393 | |
| 2394 | if (jfunc->type == IPA_JF_PASS_THROUGH) |
| 2395 | ret = propagate_vals_across_pass_through (cs, jfunc, src_lat, |
| 2396 | dest_lat, src_idx, |
| 2397 | parm_type: param_type); |
| 2398 | else |
| 2399 | ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat, |
| 2400 | src_idx, param_type); |
| 2401 | |
| 2402 | if (src_lat->contains_variable) |
| 2403 | ret |= dest_lat->set_contains_variable (); |
| 2404 | |
| 2405 | return ret; |
| 2406 | } |
| 2407 | |
| 2408 | /* TODO: We currently do not handle member method pointers in IPA-CP (we only |
| 2409 | use it for indirect inlining), we should propagate them too. */ |
| 2410 | return dest_lat->set_contains_variable (); |
| 2411 | } |
| 2412 | |
| 2413 | /* Propagate scalar values across jump function JFUNC that is associated with |
| 2414 | edge CS and describes argument IDX and put the values into DEST_LAT. */ |
| 2415 | |
| 2416 | static bool |
| 2417 | propagate_context_across_jump_function (cgraph_edge *cs, |
| 2418 | ipa_jump_func *jfunc, int idx, |
| 2419 | ipcp_lattice<ipa_polymorphic_call_context> *dest_lat) |
| 2420 | { |
| 2421 | if (dest_lat->bottom) |
| 2422 | return false; |
| 2423 | ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs); |
| 2424 | bool ret = false; |
| 2425 | bool added_sth = false; |
| 2426 | bool type_preserved = true; |
| 2427 | |
| 2428 | ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr |
| 2429 | = ipa_get_ith_polymorhic_call_context (args, i: idx); |
| 2430 | |
| 2431 | if (edge_ctx_ptr) |
| 2432 | edge_ctx = *edge_ctx_ptr; |
| 2433 | |
| 2434 | if (jfunc->type == IPA_JF_PASS_THROUGH |
| 2435 | || jfunc->type == IPA_JF_ANCESTOR) |
| 2436 | { |
| 2437 | ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 2438 | int src_idx; |
| 2439 | ipcp_lattice<ipa_polymorphic_call_context> *src_lat; |
| 2440 | |
| 2441 | /* TODO: Once we figure out how to propagate speculations, it will |
| 2442 | probably be a good idea to switch to speculation if type_preserved is |
| 2443 | not set instead of punting. */ |
| 2444 | if (jfunc->type == IPA_JF_PASS_THROUGH) |
| 2445 | { |
| 2446 | if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) |
| 2447 | goto prop_fail; |
| 2448 | type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc); |
| 2449 | src_idx = ipa_get_jf_pass_through_formal_id (jfunc); |
| 2450 | } |
| 2451 | else |
| 2452 | { |
| 2453 | type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc); |
| 2454 | src_idx = ipa_get_jf_ancestor_formal_id (jfunc); |
| 2455 | } |
| 2456 | |
| 2457 | src_lat = ipa_get_poly_ctx_lat (info: caller_info, i: src_idx); |
| 2458 | /* If we would need to clone the caller and cannot, do not propagate. */ |
| 2459 | if (!ipcp_versionable_function_p (node: cs->caller) |
| 2460 | && (src_lat->contains_variable |
| 2461 | || (src_lat->values_count > 1))) |
| 2462 | goto prop_fail; |
| 2463 | |
| 2464 | ipcp_value<ipa_polymorphic_call_context> *src_val; |
| 2465 | for (src_val = src_lat->values; src_val; src_val = src_val->next) |
| 2466 | { |
| 2467 | ipa_polymorphic_call_context cur = src_val->value; |
| 2468 | |
| 2469 | if (!type_preserved) |
| 2470 | cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor); |
| 2471 | if (jfunc->type == IPA_JF_ANCESTOR) |
| 2472 | cur.offset_by (off: ipa_get_jf_ancestor_offset (jfunc)); |
| 2473 | /* TODO: In cases we know how the context is going to be used, |
| 2474 | we can improve the result by passing proper OTR_TYPE. */ |
| 2475 | cur.combine_with (edge_ctx); |
| 2476 | if (!cur.useless_p ()) |
| 2477 | { |
| 2478 | if (src_lat->contains_variable |
| 2479 | && !edge_ctx.equal_to (x: cur)) |
| 2480 | ret |= dest_lat->set_contains_variable (); |
| 2481 | ret |= dest_lat->add_value (newval: cur, cs, src_val, src_idx); |
| 2482 | added_sth = true; |
| 2483 | } |
| 2484 | } |
| 2485 | } |
| 2486 | |
| 2487 | prop_fail: |
| 2488 | if (!added_sth) |
| 2489 | { |
| 2490 | if (!edge_ctx.useless_p ()) |
| 2491 | ret |= dest_lat->add_value (newval: edge_ctx, cs); |
| 2492 | else |
| 2493 | ret |= dest_lat->set_contains_variable (); |
| 2494 | } |
| 2495 | |
| 2496 | return ret; |
| 2497 | } |
| 2498 | |
| 2499 | /* Propagate bits across jfunc that is associated with |
| 2500 | edge cs and update dest_lattice accordingly. */ |
| 2501 | |
| 2502 | bool |
| 2503 | propagate_bits_across_jump_function (cgraph_edge *cs, int idx, |
| 2504 | ipa_jump_func *jfunc, |
| 2505 | ipcp_bits_lattice *dest_lattice) |
| 2506 | { |
| 2507 | if (dest_lattice->bottom_p ()) |
| 2508 | return false; |
| 2509 | |
| 2510 | enum availability availability; |
| 2511 | cgraph_node *callee = cs->callee->function_symbol (avail: &availability); |
| 2512 | ipa_node_params *callee_info = ipa_node_params_sum->get (node: callee); |
| 2513 | tree parm_type = ipa_get_type (info: callee_info, i: idx); |
| 2514 | |
| 2515 | /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the |
| 2516 | transform for these cases. Similarly, we can have bad type mismatches |
| 2517 | with LTO, avoid doing anything with those too. */ |
| 2518 | if (!parm_type |
| 2519 | || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type))) |
| 2520 | { |
| 2521 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 2522 | fprintf (stream: dump_file, format: "Setting dest_lattice to bottom, because type of " |
| 2523 | "param %i of %s is NULL or unsuitable for bits propagation\n" , |
| 2524 | idx, cs->callee->dump_name ()); |
| 2525 | |
| 2526 | return dest_lattice->set_to_bottom (); |
| 2527 | } |
| 2528 | |
| 2529 | if (jfunc->type == IPA_JF_PASS_THROUGH |
| 2530 | || jfunc->type == IPA_JF_ANCESTOR) |
| 2531 | { |
| 2532 | ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 2533 | tree operand = NULL_TREE; |
| 2534 | tree op_type = NULL_TREE; |
| 2535 | enum tree_code code; |
| 2536 | unsigned src_idx; |
| 2537 | bool keep_null = false; |
| 2538 | |
| 2539 | if (jfunc->type == IPA_JF_PASS_THROUGH) |
| 2540 | { |
| 2541 | code = ipa_get_jf_pass_through_operation (jfunc); |
| 2542 | src_idx = ipa_get_jf_pass_through_formal_id (jfunc); |
| 2543 | if (code != NOP_EXPR) |
| 2544 | { |
| 2545 | operand = ipa_get_jf_pass_through_operand (jfunc); |
| 2546 | op_type = ipa_get_jf_pass_through_op_type (jfunc); |
| 2547 | } |
| 2548 | } |
| 2549 | else |
| 2550 | { |
| 2551 | code = POINTER_PLUS_EXPR; |
| 2552 | src_idx = ipa_get_jf_ancestor_formal_id (jfunc); |
| 2553 | unsigned HOST_WIDE_INT offset |
| 2554 | = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT; |
| 2555 | keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset); |
| 2556 | operand = build_int_cstu (size_type_node, offset); |
| 2557 | } |
| 2558 | |
| 2559 | class ipcp_param_lattices *src_lats |
| 2560 | = ipa_get_parm_lattices (info: caller_info, i: src_idx); |
| 2561 | |
| 2562 | /* Try to propagate bits if src_lattice is bottom, but jfunc is known. |
| 2563 | for eg consider: |
| 2564 | int f(int x) |
| 2565 | { |
| 2566 | g (x & 0xff); |
| 2567 | } |
| 2568 | Assume lattice for x is bottom, however we can still propagate |
| 2569 | result of x & 0xff == 0xff, which gets computed during ccp1 pass |
| 2570 | and we store it in jump function during analysis stage. */ |
| 2571 | |
| 2572 | if (!src_lats->bits_lattice.bottom_p ()) |
| 2573 | { |
| 2574 | if (!op_type) |
| 2575 | op_type = ipa_get_type (info: caller_info, i: src_idx); |
| 2576 | |
| 2577 | unsigned precision = TYPE_PRECISION (op_type); |
| 2578 | signop sgn = TYPE_SIGN (op_type); |
| 2579 | bool drop_all_ones |
| 2580 | = keep_null && !src_lats->bits_lattice.known_nonzero_p (); |
| 2581 | |
| 2582 | return dest_lattice->meet_with (other&: src_lats->bits_lattice, precision, |
| 2583 | sgn, code, operand, drop_all_ones); |
| 2584 | } |
| 2585 | } |
| 2586 | |
| 2587 | value_range vr (parm_type); |
| 2588 | if (jfunc->m_vr) |
| 2589 | { |
| 2590 | jfunc->m_vr->get_vrange (vr); |
| 2591 | if (!vr.undefined_p () && !vr.varying_p ()) |
| 2592 | { |
| 2593 | irange_bitmask bm = vr.get_bitmask (); |
| 2594 | widest_int mask |
| 2595 | = widest_int::from (x: bm.mask (), TYPE_SIGN (parm_type)); |
| 2596 | widest_int value |
| 2597 | = widest_int::from (x: bm.value (), TYPE_SIGN (parm_type)); |
| 2598 | return dest_lattice->meet_with (value, mask, |
| 2599 | TYPE_PRECISION (parm_type)); |
| 2600 | } |
| 2601 | } |
| 2602 | return dest_lattice->set_to_bottom (); |
| 2603 | } |
| 2604 | |
| 2605 | /* Propagate value range across jump function JFUNC that is associated with |
| 2606 | edge CS with param of callee of PARAM_TYPE and update DEST_PLATS |
| 2607 | accordingly. */ |
| 2608 | |
| 2609 | static bool |
| 2610 | propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc, |
| 2611 | class ipcp_param_lattices *dest_plats, |
| 2612 | tree param_type) |
| 2613 | { |
| 2614 | ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range; |
| 2615 | |
| 2616 | if (dest_lat->bottom_p ()) |
| 2617 | return false; |
| 2618 | |
| 2619 | if (!param_type |
| 2620 | || !ipa_vr_supported_type_p (type: param_type)) |
| 2621 | return dest_lat->set_to_bottom (); |
| 2622 | |
| 2623 | value_range vr (param_type); |
| 2624 | vr.set_varying (param_type); |
| 2625 | if (jfunc->m_vr) |
| 2626 | ipa_vr_operation_and_type_effects (dst_vr&: vr, src_vr: *jfunc->m_vr, operation: NOP_EXPR, |
| 2627 | dst_type: param_type, |
| 2628 | src_type: jfunc->m_vr->type ()); |
| 2629 | |
| 2630 | if (jfunc->type == IPA_JF_PASS_THROUGH) |
| 2631 | { |
| 2632 | ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 2633 | int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); |
| 2634 | class ipcp_param_lattices *src_lats |
| 2635 | = ipa_get_parm_lattices (info: caller_info, i: src_idx); |
| 2636 | tree operand_type = ipa_get_type (info: caller_info, i: src_idx); |
| 2637 | |
| 2638 | if (src_lats->m_value_range.bottom_p ()) |
| 2639 | return dest_lat->set_to_bottom (); |
| 2640 | |
| 2641 | if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR |
| 2642 | || !ipa_edge_within_scc (cs)) |
| 2643 | ipa_vr_intersect_with_arith_jfunc (vr, jfunc, context_node: cs->caller, |
| 2644 | src_vr: src_lats->m_value_range.m_vr, |
| 2645 | src_type: operand_type, dst_type: param_type); |
| 2646 | } |
| 2647 | |
| 2648 | if (!vr.undefined_p () && !vr.varying_p ()) |
| 2649 | return dest_lat->meet_with (p_vr: vr); |
| 2650 | else |
| 2651 | return dest_lat->set_to_bottom (); |
| 2652 | } |
| 2653 | |
| 2654 | /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches |
| 2655 | NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all |
| 2656 | other cases, return false). If there are no aggregate items, set |
| 2657 | aggs_by_ref to NEW_AGGS_BY_REF. */ |
| 2658 | |
| 2659 | static bool |
| 2660 | set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats, |
| 2661 | bool new_aggs_by_ref) |
| 2662 | { |
| 2663 | if (dest_plats->aggs) |
| 2664 | { |
| 2665 | if (dest_plats->aggs_by_ref != new_aggs_by_ref) |
| 2666 | { |
| 2667 | set_agg_lats_to_bottom (dest_plats); |
| 2668 | return true; |
| 2669 | } |
| 2670 | } |
| 2671 | else |
| 2672 | dest_plats->aggs_by_ref = new_aggs_by_ref; |
| 2673 | return false; |
| 2674 | } |
| 2675 | |
| 2676 | /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an |
| 2677 | already existing lattice for the given OFFSET and SIZE, marking all skipped |
| 2678 | lattices as containing variable and checking for overlaps. If there is no |
| 2679 | already existing lattice for the OFFSET and VAL_SIZE, create one, initialize |
| 2680 | it with offset, size and contains_variable to PRE_EXISTING, and return true, |
| 2681 | unless there are too many already. If there are two many, return false. If |
| 2682 | there are overlaps turn whole DEST_PLATS to bottom and return false. If any |
| 2683 | skipped lattices were newly marked as containing variable, set *CHANGE to |
| 2684 | true. MAX_AGG_ITEMS is the maximum number of lattices. */ |
| 2685 | |
| 2686 | static bool |
| 2687 | merge_agg_lats_step (class ipcp_param_lattices *dest_plats, |
| 2688 | HOST_WIDE_INT offset, HOST_WIDE_INT val_size, |
| 2689 | struct ipcp_agg_lattice ***aglat, |
| 2690 | bool pre_existing, bool *change, int max_agg_items) |
| 2691 | { |
| 2692 | gcc_checking_assert (offset >= 0); |
| 2693 | |
| 2694 | while (**aglat && (**aglat)->offset < offset) |
| 2695 | { |
| 2696 | if ((**aglat)->offset + (**aglat)->size > offset) |
| 2697 | { |
| 2698 | set_agg_lats_to_bottom (dest_plats); |
| 2699 | return false; |
| 2700 | } |
| 2701 | *change |= (**aglat)->set_contains_variable (); |
| 2702 | *aglat = &(**aglat)->next; |
| 2703 | } |
| 2704 | |
| 2705 | if (**aglat && (**aglat)->offset == offset) |
| 2706 | { |
| 2707 | if ((**aglat)->size != val_size) |
| 2708 | { |
| 2709 | set_agg_lats_to_bottom (dest_plats); |
| 2710 | return false; |
| 2711 | } |
| 2712 | gcc_assert (!(**aglat)->next |
| 2713 | || (**aglat)->next->offset >= offset + val_size); |
| 2714 | return true; |
| 2715 | } |
| 2716 | else |
| 2717 | { |
| 2718 | struct ipcp_agg_lattice *new_al; |
| 2719 | |
| 2720 | if (**aglat && (**aglat)->offset < offset + val_size) |
| 2721 | { |
| 2722 | set_agg_lats_to_bottom (dest_plats); |
| 2723 | return false; |
| 2724 | } |
| 2725 | if (dest_plats->aggs_count == max_agg_items) |
| 2726 | return false; |
| 2727 | dest_plats->aggs_count++; |
| 2728 | new_al = ipcp_agg_lattice_pool.allocate (); |
| 2729 | |
| 2730 | new_al->offset = offset; |
| 2731 | new_al->size = val_size; |
| 2732 | new_al->contains_variable = pre_existing; |
| 2733 | |
| 2734 | new_al->next = **aglat; |
| 2735 | **aglat = new_al; |
| 2736 | return true; |
| 2737 | } |
| 2738 | } |
| 2739 | |
| 2740 | /* Set all AGLAT and all other aggregate lattices reachable by next pointers as |
| 2741 | containing an unknown value. */ |
| 2742 | |
| 2743 | static bool |
| 2744 | set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat) |
| 2745 | { |
| 2746 | bool ret = false; |
| 2747 | while (aglat) |
| 2748 | { |
| 2749 | ret |= aglat->set_contains_variable (); |
| 2750 | aglat = aglat->next; |
| 2751 | } |
| 2752 | return ret; |
| 2753 | } |
| 2754 | |
| 2755 | /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting |
| 2756 | DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source |
| 2757 | parameter used for lattice value sources. Return true if DEST_PLATS changed |
| 2758 | in any way. */ |
| 2759 | |
| 2760 | static bool |
| 2761 | merge_aggregate_lattices (struct cgraph_edge *cs, |
| 2762 | class ipcp_param_lattices *dest_plats, |
| 2763 | class ipcp_param_lattices *src_plats, |
| 2764 | int src_idx, HOST_WIDE_INT offset_delta) |
| 2765 | { |
| 2766 | bool pre_existing = dest_plats->aggs != NULL; |
| 2767 | struct ipcp_agg_lattice **dst_aglat; |
| 2768 | bool ret = false; |
| 2769 | |
| 2770 | if (set_check_aggs_by_ref (dest_plats, new_aggs_by_ref: src_plats->aggs_by_ref)) |
| 2771 | return true; |
| 2772 | if (src_plats->aggs_bottom) |
| 2773 | return set_agg_lats_contain_variable (dest_plats); |
| 2774 | if (src_plats->aggs_contain_variable) |
| 2775 | ret |= set_agg_lats_contain_variable (dest_plats); |
| 2776 | dst_aglat = &dest_plats->aggs; |
| 2777 | |
| 2778 | int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl, |
| 2779 | param_ipa_max_agg_items); |
| 2780 | for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs; |
| 2781 | src_aglat; |
| 2782 | src_aglat = src_aglat->next) |
| 2783 | { |
| 2784 | HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta; |
| 2785 | |
| 2786 | if (new_offset < 0) |
| 2787 | continue; |
| 2788 | if (merge_agg_lats_step (dest_plats, offset: new_offset, val_size: src_aglat->size, |
| 2789 | aglat: &dst_aglat, pre_existing, change: &ret, max_agg_items)) |
| 2790 | { |
| 2791 | struct ipcp_agg_lattice *new_al = *dst_aglat; |
| 2792 | |
| 2793 | dst_aglat = &(*dst_aglat)->next; |
| 2794 | if (src_aglat->bottom) |
| 2795 | { |
| 2796 | ret |= new_al->set_contains_variable (); |
| 2797 | continue; |
| 2798 | } |
| 2799 | if (src_aglat->contains_variable) |
| 2800 | ret |= new_al->set_contains_variable (); |
| 2801 | for (ipcp_value<tree> *val = src_aglat->values; |
| 2802 | val; |
| 2803 | val = val->next) |
| 2804 | ret |= new_al->add_value (newval: val->value, cs, src_val: val, src_idx, |
| 2805 | offset: src_aglat->offset); |
| 2806 | } |
| 2807 | else if (dest_plats->aggs_bottom) |
| 2808 | return true; |
| 2809 | } |
| 2810 | ret |= set_chain_of_aglats_contains_variable (*dst_aglat); |
| 2811 | return ret; |
| 2812 | } |
| 2813 | |
| 2814 | /* Determine whether there is anything to propagate FROM SRC_PLATS through a |
| 2815 | pass-through JFUNC and if so, whether it has conform and conforms to the |
| 2816 | rules about propagating values passed by reference. */ |
| 2817 | |
| 2818 | static bool |
| 2819 | agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats, |
| 2820 | struct ipa_jump_func *jfunc) |
| 2821 | { |
| 2822 | return src_plats->aggs |
| 2823 | && (!src_plats->aggs_by_ref |
| 2824 | || ipa_get_jf_pass_through_agg_preserved (jfunc)); |
| 2825 | } |
| 2826 | |
| 2827 | /* Propagate values through ITEM, jump function for a part of an aggregate, |
| 2828 | into corresponding aggregate lattice AGLAT. CS is the call graph edge |
| 2829 | associated with the jump function. Return true if AGLAT changed in any |
| 2830 | way. */ |
| 2831 | |
| 2832 | static bool |
| 2833 | propagate_aggregate_lattice (struct cgraph_edge *cs, |
| 2834 | struct ipa_agg_jf_item *item, |
| 2835 | struct ipcp_agg_lattice *aglat) |
| 2836 | { |
| 2837 | class ipa_node_params *caller_info; |
| 2838 | class ipcp_param_lattices *src_plats; |
| 2839 | struct ipcp_lattice<tree> *src_lat; |
| 2840 | HOST_WIDE_INT src_offset; |
| 2841 | int src_idx; |
| 2842 | tree load_type; |
| 2843 | bool ret; |
| 2844 | |
| 2845 | if (item->jftype == IPA_JF_CONST) |
| 2846 | { |
| 2847 | tree value = item->value.constant; |
| 2848 | |
| 2849 | gcc_checking_assert (is_gimple_ip_invariant (value)); |
| 2850 | return aglat->add_value (newval: value, cs, NULL, src_idx: 0); |
| 2851 | } |
| 2852 | |
| 2853 | gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH |
| 2854 | || item->jftype == IPA_JF_LOAD_AGG); |
| 2855 | |
| 2856 | caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 2857 | src_idx = item->value.pass_through.formal_id; |
| 2858 | src_plats = ipa_get_parm_lattices (info: caller_info, i: src_idx); |
| 2859 | |
| 2860 | if (item->jftype == IPA_JF_PASS_THROUGH) |
| 2861 | { |
| 2862 | load_type = NULL_TREE; |
| 2863 | src_lat = &src_plats->itself; |
| 2864 | src_offset = -1; |
| 2865 | } |
| 2866 | else |
| 2867 | { |
| 2868 | HOST_WIDE_INT load_offset = item->value.load_agg.offset; |
| 2869 | struct ipcp_agg_lattice *src_aglat; |
| 2870 | |
| 2871 | for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next) |
| 2872 | if (src_aglat->offset >= load_offset) |
| 2873 | break; |
| 2874 | |
| 2875 | load_type = item->value.load_agg.type; |
| 2876 | if (!src_aglat |
| 2877 | || src_aglat->offset > load_offset |
| 2878 | || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type)) |
| 2879 | || src_plats->aggs_by_ref != item->value.load_agg.by_ref) |
| 2880 | return aglat->set_contains_variable (); |
| 2881 | |
| 2882 | src_lat = src_aglat; |
| 2883 | src_offset = load_offset; |
| 2884 | } |
| 2885 | |
| 2886 | if (src_lat->bottom |
| 2887 | || (!ipcp_versionable_function_p (node: cs->caller) |
| 2888 | && !src_lat->is_single_const ())) |
| 2889 | return aglat->set_contains_variable (); |
| 2890 | |
| 2891 | ret = propagate_vals_across_arith_jfunc (cs, |
| 2892 | opcode: item->value.pass_through.operation, |
| 2893 | opnd1_type: load_type, |
| 2894 | opnd2: item->value.pass_through.operand, |
| 2895 | src_lat, dest_lat: aglat, |
| 2896 | src_offset, |
| 2897 | src_idx, |
| 2898 | op_type: item->value.pass_through.op_type, |
| 2899 | res_type: item->type); |
| 2900 | |
| 2901 | if (src_lat->contains_variable) |
| 2902 | ret |= aglat->set_contains_variable (); |
| 2903 | |
| 2904 | return ret; |
| 2905 | } |
| 2906 | |
| 2907 | /* Propagate scalar values across jump function JFUNC that is associated with |
| 2908 | edge CS and put the values into DEST_LAT. */ |
| 2909 | |
| 2910 | static bool |
| 2911 | propagate_aggs_across_jump_function (struct cgraph_edge *cs, |
| 2912 | struct ipa_jump_func *jfunc, |
| 2913 | class ipcp_param_lattices *dest_plats) |
| 2914 | { |
| 2915 | bool ret = false; |
| 2916 | |
| 2917 | if (dest_plats->aggs_bottom) |
| 2918 | return false; |
| 2919 | |
| 2920 | if (jfunc->type == IPA_JF_PASS_THROUGH |
| 2921 | && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) |
| 2922 | { |
| 2923 | ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 2924 | int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); |
| 2925 | class ipcp_param_lattices *src_plats; |
| 2926 | |
| 2927 | src_plats = ipa_get_parm_lattices (info: caller_info, i: src_idx); |
| 2928 | if (agg_pass_through_permissible_p (src_plats, jfunc)) |
| 2929 | { |
| 2930 | /* Currently we do not produce clobber aggregate jump |
| 2931 | functions, replace with merging when we do. */ |
| 2932 | gcc_assert (!jfunc->agg.items); |
| 2933 | ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, |
| 2934 | src_idx, offset_delta: 0); |
| 2935 | return ret; |
| 2936 | } |
| 2937 | } |
| 2938 | else if (jfunc->type == IPA_JF_ANCESTOR |
| 2939 | && ipa_get_jf_ancestor_agg_preserved (jfunc)) |
| 2940 | { |
| 2941 | ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 2942 | int src_idx = ipa_get_jf_ancestor_formal_id (jfunc); |
| 2943 | class ipcp_param_lattices *src_plats; |
| 2944 | |
| 2945 | src_plats = ipa_get_parm_lattices (info: caller_info, i: src_idx); |
| 2946 | if (src_plats->aggs && src_plats->aggs_by_ref) |
| 2947 | { |
| 2948 | /* Currently we do not produce clobber aggregate jump |
| 2949 | functions, replace with merging when we do. */ |
| 2950 | gcc_assert (!jfunc->agg.items); |
| 2951 | ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx, |
| 2952 | offset_delta: ipa_get_jf_ancestor_offset (jfunc)); |
| 2953 | } |
| 2954 | else if (!src_plats->aggs_by_ref) |
| 2955 | ret |= set_agg_lats_to_bottom (dest_plats); |
| 2956 | else |
| 2957 | ret |= set_agg_lats_contain_variable (dest_plats); |
| 2958 | return ret; |
| 2959 | } |
| 2960 | |
| 2961 | if (jfunc->agg.items) |
| 2962 | { |
| 2963 | bool pre_existing = dest_plats->aggs != NULL; |
| 2964 | struct ipcp_agg_lattice **aglat = &dest_plats->aggs; |
| 2965 | struct ipa_agg_jf_item *item; |
| 2966 | int i; |
| 2967 | |
| 2968 | if (set_check_aggs_by_ref (dest_plats, new_aggs_by_ref: jfunc->agg.by_ref)) |
| 2969 | return true; |
| 2970 | |
| 2971 | int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl, |
| 2972 | param_ipa_max_agg_items); |
| 2973 | FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item) |
| 2974 | { |
| 2975 | HOST_WIDE_INT val_size; |
| 2976 | |
| 2977 | if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN) |
| 2978 | continue; |
| 2979 | val_size = tree_to_shwi (TYPE_SIZE (item->type)); |
| 2980 | |
| 2981 | if (merge_agg_lats_step (dest_plats, offset: item->offset, val_size, |
| 2982 | aglat: &aglat, pre_existing, change: &ret, max_agg_items)) |
| 2983 | { |
| 2984 | ret |= propagate_aggregate_lattice (cs, item, aglat: *aglat); |
| 2985 | aglat = &(*aglat)->next; |
| 2986 | } |
| 2987 | else if (dest_plats->aggs_bottom) |
| 2988 | return true; |
| 2989 | } |
| 2990 | |
| 2991 | ret |= set_chain_of_aglats_contains_variable (*aglat); |
| 2992 | } |
| 2993 | else |
| 2994 | ret |= set_agg_lats_contain_variable (dest_plats); |
| 2995 | |
| 2996 | return ret; |
| 2997 | } |
| 2998 | |
| 2999 | /* Return true if on the way cfrom CS->caller to the final (non-alias and |
| 3000 | non-thunk) destination, the call passes through a thunk. */ |
| 3001 | |
| 3002 | static bool |
| 3003 | call_passes_through_thunk (cgraph_edge *cs) |
| 3004 | { |
| 3005 | cgraph_node *alias_or_thunk = cs->callee; |
| 3006 | while (alias_or_thunk->alias) |
| 3007 | alias_or_thunk = alias_or_thunk->get_alias_target (); |
| 3008 | return alias_or_thunk->thunk; |
| 3009 | } |
| 3010 | |
| 3011 | /* Propagate constants from the caller to the callee of CS. INFO describes the |
| 3012 | caller. */ |
| 3013 | |
| 3014 | static bool |
| 3015 | propagate_constants_across_call (struct cgraph_edge *cs) |
| 3016 | { |
| 3017 | class ipa_node_params *callee_info; |
| 3018 | enum availability availability; |
| 3019 | cgraph_node *callee; |
| 3020 | class ipa_edge_args *args; |
| 3021 | bool ret = false; |
| 3022 | int i, args_count, parms_count; |
| 3023 | |
| 3024 | callee = cs->callee->function_symbol (avail: &availability); |
| 3025 | if (!callee->definition) |
| 3026 | return false; |
| 3027 | gcc_checking_assert (callee->has_gimple_body_p ()); |
| 3028 | callee_info = ipa_node_params_sum->get (node: callee); |
| 3029 | if (!callee_info) |
| 3030 | return false; |
| 3031 | |
| 3032 | args = ipa_edge_args_sum->get (edge: cs); |
| 3033 | parms_count = ipa_get_param_count (info: callee_info); |
| 3034 | if (parms_count == 0) |
| 3035 | return false; |
| 3036 | if (!args |
| 3037 | || !opt_for_fn (cs->caller->decl, flag_ipa_cp) |
| 3038 | || !opt_for_fn (cs->caller->decl, optimize)) |
| 3039 | { |
| 3040 | for (i = 0; i < parms_count; i++) |
| 3041 | ret |= set_all_contains_variable (ipa_get_parm_lattices (info: callee_info, |
| 3042 | i)); |
| 3043 | return ret; |
| 3044 | } |
| 3045 | args_count = ipa_get_cs_argument_count (args); |
| 3046 | |
| 3047 | /* If this call goes through a thunk we must not propagate to the first (0th) |
| 3048 | parameter. However, we might need to uncover a thunk from below a series |
| 3049 | of aliases first. */ |
| 3050 | if (call_passes_through_thunk (cs)) |
| 3051 | { |
| 3052 | ret |= set_all_contains_variable (ipa_get_parm_lattices (info: callee_info, |
| 3053 | i: 0)); |
| 3054 | i = 1; |
| 3055 | } |
| 3056 | else |
| 3057 | i = 0; |
| 3058 | |
| 3059 | for (; (i < args_count) && (i < parms_count); i++) |
| 3060 | { |
| 3061 | struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i); |
| 3062 | class ipcp_param_lattices *dest_plats; |
| 3063 | tree param_type = ipa_get_type (info: callee_info, i); |
| 3064 | |
| 3065 | dest_plats = ipa_get_parm_lattices (info: callee_info, i); |
| 3066 | if (availability == AVAIL_INTERPOSABLE) |
| 3067 | ret |= set_all_contains_variable (dest_plats); |
| 3068 | else |
| 3069 | { |
| 3070 | ret |= propagate_scalar_across_jump_function (cs, jfunc: jump_func, |
| 3071 | dest_lat: &dest_plats->itself, |
| 3072 | param_type); |
| 3073 | ret |= propagate_context_across_jump_function (cs, jfunc: jump_func, idx: i, |
| 3074 | dest_lat: &dest_plats->ctxlat); |
| 3075 | ret |
| 3076 | |= propagate_bits_across_jump_function (cs, idx: i, jfunc: jump_func, |
| 3077 | dest_lattice: &dest_plats->bits_lattice); |
| 3078 | ret |= propagate_aggs_across_jump_function (cs, jfunc: jump_func, |
| 3079 | dest_plats); |
| 3080 | if (opt_for_fn (callee->decl, flag_ipa_vrp)) |
| 3081 | ret |= propagate_vr_across_jump_function (cs, jfunc: jump_func, |
| 3082 | dest_plats, param_type); |
| 3083 | else |
| 3084 | ret |= dest_plats->m_value_range.set_to_bottom (); |
| 3085 | } |
| 3086 | } |
| 3087 | for (; i < parms_count; i++) |
| 3088 | ret |= set_all_contains_variable (ipa_get_parm_lattices (info: callee_info, i)); |
| 3089 | |
| 3090 | return ret; |
| 3091 | } |
| 3092 | |
| 3093 | /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS |
| 3094 | KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return |
| 3095 | the destination. The latter three can be NULL. If AGG_REPS is not NULL, |
| 3096 | KNOWN_AGGS is ignored. */ |
| 3097 | |
| 3098 | static tree |
| 3099 | ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie, |
| 3100 | const vec<tree> &known_csts, |
| 3101 | const vec<ipa_polymorphic_call_context> &known_contexts, |
| 3102 | const ipa_argagg_value_list &avs, |
| 3103 | bool *speculative) |
| 3104 | { |
| 3105 | int param_index = ie->indirect_info->param_index; |
| 3106 | HOST_WIDE_INT anc_offset; |
| 3107 | tree t = NULL; |
| 3108 | tree target = NULL; |
| 3109 | |
| 3110 | *speculative = false; |
| 3111 | |
| 3112 | if (param_index == -1) |
| 3113 | return NULL_TREE; |
| 3114 | |
| 3115 | if (!ie->indirect_info->polymorphic) |
| 3116 | { |
| 3117 | tree t = NULL; |
| 3118 | |
| 3119 | if (ie->indirect_info->agg_contents) |
| 3120 | { |
| 3121 | t = NULL; |
| 3122 | if ((unsigned) param_index < known_csts.length () |
| 3123 | && known_csts[param_index]) |
| 3124 | t = ipa_find_agg_cst_from_init (scalar: known_csts[param_index], |
| 3125 | offset: ie->indirect_info->offset, |
| 3126 | by_ref: ie->indirect_info->by_ref); |
| 3127 | |
| 3128 | if (!t && ie->indirect_info->guaranteed_unmodified) |
| 3129 | t = avs.get_value (index: param_index, |
| 3130 | unit_offset: ie->indirect_info->offset / BITS_PER_UNIT, |
| 3131 | by_ref: ie->indirect_info->by_ref); |
| 3132 | } |
| 3133 | else if ((unsigned) param_index < known_csts.length ()) |
| 3134 | t = known_csts[param_index]; |
| 3135 | |
| 3136 | if (t |
| 3137 | && TREE_CODE (t) == ADDR_EXPR |
| 3138 | && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL) |
| 3139 | return TREE_OPERAND (t, 0); |
| 3140 | else |
| 3141 | return NULL_TREE; |
| 3142 | } |
| 3143 | |
| 3144 | if (!opt_for_fn (ie->caller->decl, flag_devirtualize)) |
| 3145 | return NULL_TREE; |
| 3146 | |
| 3147 | gcc_assert (!ie->indirect_info->agg_contents); |
| 3148 | gcc_assert (!ie->indirect_info->by_ref); |
| 3149 | anc_offset = ie->indirect_info->offset; |
| 3150 | |
| 3151 | t = NULL; |
| 3152 | |
| 3153 | if ((unsigned) param_index < known_csts.length () |
| 3154 | && known_csts[param_index]) |
| 3155 | t = ipa_find_agg_cst_from_init (scalar: known_csts[param_index], |
| 3156 | offset: ie->indirect_info->offset, by_ref: true); |
| 3157 | |
| 3158 | /* Try to work out value of virtual table pointer value in replacements. */ |
| 3159 | /* or known aggregate values. */ |
| 3160 | if (!t) |
| 3161 | t = avs.get_value (index: param_index, |
| 3162 | unit_offset: ie->indirect_info->offset / BITS_PER_UNIT, |
| 3163 | by_ref: true); |
| 3164 | |
| 3165 | /* If we found the virtual table pointer, lookup the target. */ |
| 3166 | if (t) |
| 3167 | { |
| 3168 | tree vtable; |
| 3169 | unsigned HOST_WIDE_INT offset; |
| 3170 | if (vtable_pointer_value_to_vtable (t, &vtable, &offset)) |
| 3171 | { |
| 3172 | bool can_refer; |
| 3173 | target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token, |
| 3174 | vtable, offset, can_refer: &can_refer); |
| 3175 | if (can_refer) |
| 3176 | { |
| 3177 | if (!target |
| 3178 | || fndecl_built_in_p (node: target, name1: BUILT_IN_UNREACHABLE) |
| 3179 | || !possible_polymorphic_call_target_p |
| 3180 | (e: ie, n: cgraph_node::get (decl: target))) |
| 3181 | { |
| 3182 | /* Do not speculate builtin_unreachable, it is stupid! */ |
| 3183 | if (ie->indirect_info->vptr_changed) |
| 3184 | return NULL; |
| 3185 | target = ipa_impossible_devirt_target (ie, target); |
| 3186 | } |
| 3187 | *speculative = ie->indirect_info->vptr_changed; |
| 3188 | if (!*speculative) |
| 3189 | return target; |
| 3190 | } |
| 3191 | } |
| 3192 | } |
| 3193 | |
| 3194 | /* Do we know the constant value of pointer? */ |
| 3195 | if (!t && (unsigned) param_index < known_csts.length ()) |
| 3196 | t = known_csts[param_index]; |
| 3197 | |
| 3198 | gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO); |
| 3199 | |
| 3200 | ipa_polymorphic_call_context context; |
| 3201 | if (known_contexts.length () > (unsigned int) param_index) |
| 3202 | { |
| 3203 | context = known_contexts[param_index]; |
| 3204 | context.offset_by (off: anc_offset); |
| 3205 | if (ie->indirect_info->vptr_changed) |
| 3206 | context.possible_dynamic_type_change (ie->in_polymorphic_cdtor, |
| 3207 | otr_type: ie->indirect_info->otr_type); |
| 3208 | if (t) |
| 3209 | { |
| 3210 | ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context |
| 3211 | (t, ie->indirect_info->otr_type, anc_offset); |
| 3212 | if (!ctx2.useless_p ()) |
| 3213 | context.combine_with (ctx2, otr_type: ie->indirect_info->otr_type); |
| 3214 | } |
| 3215 | } |
| 3216 | else if (t) |
| 3217 | { |
| 3218 | context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type, |
| 3219 | anc_offset); |
| 3220 | if (ie->indirect_info->vptr_changed) |
| 3221 | context.possible_dynamic_type_change (ie->in_polymorphic_cdtor, |
| 3222 | otr_type: ie->indirect_info->otr_type); |
| 3223 | } |
| 3224 | else |
| 3225 | return NULL_TREE; |
| 3226 | |
| 3227 | vec <cgraph_node *>targets; |
| 3228 | bool final; |
| 3229 | |
| 3230 | targets = possible_polymorphic_call_targets |
| 3231 | (ie->indirect_info->otr_type, |
| 3232 | ie->indirect_info->otr_token, |
| 3233 | context, copletep: &final); |
| 3234 | if (!final || targets.length () > 1) |
| 3235 | { |
| 3236 | struct cgraph_node *node; |
| 3237 | if (*speculative) |
| 3238 | return target; |
| 3239 | if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively) |
| 3240 | || ie->speculative || !ie->maybe_hot_p ()) |
| 3241 | return NULL; |
| 3242 | node = try_speculative_devirtualization (ie->indirect_info->otr_type, |
| 3243 | ie->indirect_info->otr_token, |
| 3244 | context); |
| 3245 | if (node) |
| 3246 | { |
| 3247 | *speculative = true; |
| 3248 | target = node->decl; |
| 3249 | } |
| 3250 | else |
| 3251 | return NULL; |
| 3252 | } |
| 3253 | else |
| 3254 | { |
| 3255 | *speculative = false; |
| 3256 | if (targets.length () == 1) |
| 3257 | target = targets[0]->decl; |
| 3258 | else |
| 3259 | target = ipa_impossible_devirt_target (ie, NULL_TREE); |
| 3260 | } |
| 3261 | |
| 3262 | if (target && !possible_polymorphic_call_target_p (e: ie, |
| 3263 | n: cgraph_node::get (decl: target))) |
| 3264 | { |
| 3265 | if (*speculative) |
| 3266 | return NULL; |
| 3267 | target = ipa_impossible_devirt_target (ie, target); |
| 3268 | } |
| 3269 | |
| 3270 | return target; |
| 3271 | } |
| 3272 | |
| 3273 | /* If an indirect edge IE can be turned into a direct one based on data in |
| 3274 | AVALS, return the destination. Store into *SPECULATIVE a boolean determinig |
| 3275 | whether the discovered target is only speculative guess. */ |
| 3276 | |
| 3277 | tree |
| 3278 | ipa_get_indirect_edge_target (struct cgraph_edge *ie, |
| 3279 | ipa_call_arg_values *avals, |
| 3280 | bool *speculative) |
| 3281 | { |
| 3282 | ipa_argagg_value_list avl (avals); |
| 3283 | return ipa_get_indirect_edge_target_1 (ie, known_csts: avals->m_known_vals, |
| 3284 | known_contexts: avals->m_known_contexts, |
| 3285 | avs: avl, speculative); |
| 3286 | } |
| 3287 | |
| 3288 | /* Calculate devirtualization time bonus for NODE, assuming we know information |
| 3289 | about arguments stored in AVALS. */ |
| 3290 | |
| 3291 | static int |
| 3292 | devirtualization_time_bonus (struct cgraph_node *node, |
| 3293 | ipa_auto_call_arg_values *avals) |
| 3294 | { |
| 3295 | struct cgraph_edge *ie; |
| 3296 | int res = 0; |
| 3297 | |
| 3298 | for (ie = node->indirect_calls; ie; ie = ie->next_callee) |
| 3299 | { |
| 3300 | struct cgraph_node *callee; |
| 3301 | class ipa_fn_summary *isummary; |
| 3302 | enum availability avail; |
| 3303 | tree target; |
| 3304 | bool speculative; |
| 3305 | |
| 3306 | ipa_argagg_value_list avl (avals); |
| 3307 | target = ipa_get_indirect_edge_target_1 (ie, known_csts: avals->m_known_vals, |
| 3308 | known_contexts: avals->m_known_contexts, |
| 3309 | avs: avl, speculative: &speculative); |
| 3310 | if (!target) |
| 3311 | continue; |
| 3312 | |
| 3313 | /* Only bare minimum benefit for clearly un-inlineable targets. */ |
| 3314 | res += 1; |
| 3315 | callee = cgraph_node::get (decl: target); |
| 3316 | if (!callee || !callee->definition) |
| 3317 | continue; |
| 3318 | callee = callee->function_symbol (avail: &avail); |
| 3319 | if (avail < AVAIL_AVAILABLE) |
| 3320 | continue; |
| 3321 | isummary = ipa_fn_summaries->get (node: callee); |
| 3322 | if (!isummary || !isummary->inlinable) |
| 3323 | continue; |
| 3324 | |
| 3325 | int size = ipa_size_summaries->get (node: callee)->size; |
| 3326 | /* FIXME: The values below need re-considering and perhaps also |
| 3327 | integrating into the cost metrics, at lest in some very basic way. */ |
| 3328 | int max_inline_insns_auto |
| 3329 | = opt_for_fn (callee->decl, param_max_inline_insns_auto); |
| 3330 | if (size <= max_inline_insns_auto / 4) |
| 3331 | res += 31 / ((int)speculative + 1); |
| 3332 | else if (size <= max_inline_insns_auto / 2) |
| 3333 | res += 15 / ((int)speculative + 1); |
| 3334 | else if (size <= max_inline_insns_auto |
| 3335 | || DECL_DECLARED_INLINE_P (callee->decl)) |
| 3336 | res += 7 / ((int)speculative + 1); |
| 3337 | } |
| 3338 | |
| 3339 | return res; |
| 3340 | } |
| 3341 | |
| 3342 | /* Return time bonus incurred because of hints stored in ESTIMATES. */ |
| 3343 | |
| 3344 | static int |
| 3345 | hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates) |
| 3346 | { |
| 3347 | int result = 0; |
| 3348 | ipa_hints hints = estimates.hints; |
| 3349 | if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride)) |
| 3350 | result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus); |
| 3351 | |
| 3352 | sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus); |
| 3353 | |
| 3354 | if (hints & INLINE_HINT_loop_iterations) |
| 3355 | result += (estimates.loops_with_known_iterations * bonus_for_one).to_int (); |
| 3356 | |
| 3357 | if (hints & INLINE_HINT_loop_stride) |
| 3358 | result += (estimates.loops_with_known_strides * bonus_for_one).to_int (); |
| 3359 | |
| 3360 | return result; |
| 3361 | } |
| 3362 | |
| 3363 | /* If there is a reason to penalize the function described by INFO in the |
| 3364 | cloning goodness evaluation, do so. */ |
| 3365 | |
| 3366 | static inline sreal |
| 3367 | incorporate_penalties (cgraph_node *node, ipa_node_params *info, |
| 3368 | sreal evaluation) |
| 3369 | { |
| 3370 | if (info->node_within_scc && !info->node_is_self_scc) |
| 3371 | evaluation = (evaluation |
| 3372 | * (100 - opt_for_fn (node->decl, |
| 3373 | param_ipa_cp_recursion_penalty))) / 100; |
| 3374 | |
| 3375 | if (info->node_calling_single_call) |
| 3376 | evaluation = (evaluation |
| 3377 | * (100 - opt_for_fn (node->decl, |
| 3378 | param_ipa_cp_single_call_penalty))) |
| 3379 | / 100; |
| 3380 | |
| 3381 | return evaluation; |
| 3382 | } |
| 3383 | |
| 3384 | /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT |
| 3385 | and SIZE_COST and with the sum of frequencies of incoming edges to the |
| 3386 | potential new clone in FREQUENCIES. */ |
| 3387 | |
| 3388 | static bool |
| 3389 | good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit, |
| 3390 | sreal freq_sum, profile_count count_sum, |
| 3391 | int size_cost, bool called_without_ipa_profile) |
| 3392 | { |
| 3393 | gcc_assert (count_sum.ipa () == count_sum); |
| 3394 | if (time_benefit == 0 |
| 3395 | || !opt_for_fn (node->decl, flag_ipa_cp_clone) |
| 3396 | || node->optimize_for_size_p () |
| 3397 | /* If there is no call which was executed in profiling or where |
| 3398 | profile is missing, we do not want to clone. */ |
| 3399 | || (!called_without_ipa_profile && !count_sum.nonzero_p ())) |
| 3400 | return false; |
| 3401 | |
| 3402 | gcc_assert (size_cost > 0); |
| 3403 | |
| 3404 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 3405 | int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold); |
| 3406 | /* If we know the execution IPA execution counts, we can estimate overall |
| 3407 | speedup of the program. */ |
| 3408 | if (count_sum.nonzero_p ()) |
| 3409 | { |
| 3410 | profile_count saved_time = count_sum * time_benefit; |
| 3411 | sreal evaluation = saved_time.to_sreal_scale (in: profile_count::one ()) |
| 3412 | / size_cost; |
| 3413 | evaluation = incorporate_penalties (node, info, evaluation); |
| 3414 | |
| 3415 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3416 | { |
| 3417 | fprintf (stream: dump_file, format: " good_cloning_opportunity_p (time: %g, " |
| 3418 | "size: %i, count_sum: " , time_benefit.to_double (), |
| 3419 | size_cost); |
| 3420 | count_sum.dump (f: dump_file); |
| 3421 | fprintf (stream: dump_file, format: ", overall time saved: " ); |
| 3422 | saved_time.dump (f: dump_file); |
| 3423 | fprintf (stream: dump_file, format: "%s%s) -> evaluation: %.2f, threshold: %i\n" , |
| 3424 | info->node_within_scc |
| 3425 | ? (info->node_is_self_scc ? ", self_scc" : ", scc" ) : "" , |
| 3426 | info->node_calling_single_call ? ", single_call" : "" , |
| 3427 | evaluation.to_double (), eval_threshold); |
| 3428 | } |
| 3429 | gcc_checking_assert (saved_time == saved_time.ipa ()); |
| 3430 | if (!maybe_hot_count_p (NULL, saved_time)) |
| 3431 | { |
| 3432 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3433 | fprintf (stream: dump_file, format: " not cloning: time saved is not hot\n" ); |
| 3434 | } |
| 3435 | /* Evaulation approximately corresponds to time saved per instruction |
| 3436 | introduced. This is likely almost always going to be true, since we |
| 3437 | already checked that time saved is large enough to be considered |
| 3438 | hot. */ |
| 3439 | else if (evaluation.to_int () >= eval_threshold) |
| 3440 | return true; |
| 3441 | /* If all call sites have profile known; we know we do not want t clone. |
| 3442 | If there are calls with unknown profile; try local heuristics. */ |
| 3443 | if (!called_without_ipa_profile) |
| 3444 | return false; |
| 3445 | } |
| 3446 | sreal evaluation = (time_benefit * freq_sum) / size_cost; |
| 3447 | evaluation = incorporate_penalties (node, info, evaluation); |
| 3448 | evaluation *= 1000; |
| 3449 | |
| 3450 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3451 | fprintf (stream: dump_file, format: " good_cloning_opportunity_p (time: %g, " |
| 3452 | "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, " |
| 3453 | "threshold: %i\n" , |
| 3454 | time_benefit.to_double (), size_cost, freq_sum.to_double (), |
| 3455 | info->node_within_scc |
| 3456 | ? (info->node_is_self_scc ? ", self_scc" : ", scc" ) : "" , |
| 3457 | info->node_calling_single_call ? ", single_call" : "" , |
| 3458 | evaluation.to_double (), eval_threshold); |
| 3459 | |
| 3460 | return evaluation.to_int () >= eval_threshold; |
| 3461 | } |
| 3462 | |
| 3463 | /* Grow vectors in AVALS and fill them with information about values of |
| 3464 | parameters that are known to be independent of the context. Only calculate |
| 3465 | m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If |
| 3466 | REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable |
| 3467 | parameters will be stored in it. |
| 3468 | |
| 3469 | TODO: Also grow context independent value range vectors. */ |
| 3470 | |
| 3471 | static bool |
| 3472 | gather_context_independent_values (class ipa_node_params *info, |
| 3473 | ipa_auto_call_arg_values *avals, |
| 3474 | bool calculate_aggs, |
| 3475 | int *removable_params_cost) |
| 3476 | { |
| 3477 | int i, count = ipa_get_param_count (info); |
| 3478 | bool ret = false; |
| 3479 | |
| 3480 | avals->m_known_vals.safe_grow_cleared (len: count, exact: true); |
| 3481 | avals->m_known_contexts.safe_grow_cleared (len: count, exact: true); |
| 3482 | |
| 3483 | if (removable_params_cost) |
| 3484 | *removable_params_cost = 0; |
| 3485 | |
| 3486 | for (i = 0; i < count; i++) |
| 3487 | { |
| 3488 | class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); |
| 3489 | ipcp_lattice<tree> *lat = &plats->itself; |
| 3490 | |
| 3491 | if (lat->is_single_const ()) |
| 3492 | { |
| 3493 | ipcp_value<tree> *val = lat->values; |
| 3494 | gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO); |
| 3495 | avals->m_known_vals[i] = val->value; |
| 3496 | if (removable_params_cost) |
| 3497 | *removable_params_cost |
| 3498 | += estimate_move_cost (TREE_TYPE (val->value), false); |
| 3499 | ret = true; |
| 3500 | } |
| 3501 | else if (removable_params_cost |
| 3502 | && !ipa_is_param_used (info, i)) |
| 3503 | *removable_params_cost |
| 3504 | += ipa_get_param_move_cost (info, i); |
| 3505 | |
| 3506 | if (!ipa_is_param_used (info, i)) |
| 3507 | continue; |
| 3508 | |
| 3509 | ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat; |
| 3510 | /* Do not account known context as reason for cloning. We can see |
| 3511 | if it permits devirtualization. */ |
| 3512 | if (ctxlat->is_single_const ()) |
| 3513 | avals->m_known_contexts[i] = ctxlat->values->value; |
| 3514 | |
| 3515 | if (calculate_aggs) |
| 3516 | ret |= push_agg_values_from_plats (plats, dest_index: i, unit_delta: 0, res: &avals->m_known_aggs); |
| 3517 | } |
| 3518 | |
| 3519 | return ret; |
| 3520 | } |
| 3521 | |
| 3522 | /* Perform time and size measurement of NODE with the context given in AVALS, |
| 3523 | calculate the benefit compared to the node without specialization and store |
| 3524 | it into VAL. Take into account REMOVABLE_PARAMS_COST of all |
| 3525 | context-independent or unused removable parameters and EST_MOVE_COST, the |
| 3526 | estimated movement of the considered parameter. */ |
| 3527 | |
| 3528 | static void |
| 3529 | perform_estimation_of_a_value (cgraph_node *node, |
| 3530 | ipa_auto_call_arg_values *avals, |
| 3531 | int removable_params_cost, int est_move_cost, |
| 3532 | ipcp_value_base *val) |
| 3533 | { |
| 3534 | sreal time_benefit; |
| 3535 | ipa_call_estimates estimates; |
| 3536 | |
| 3537 | estimate_ipcp_clone_size_and_time (node, avals, estimates: &estimates); |
| 3538 | |
| 3539 | /* Extern inline functions have no cloning local time benefits because they |
| 3540 | will be inlined anyway. The only reason to clone them is if it enables |
| 3541 | optimization in any of the functions they call. */ |
| 3542 | if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl)) |
| 3543 | time_benefit = 0; |
| 3544 | else |
| 3545 | time_benefit = (estimates.nonspecialized_time - estimates.time) |
| 3546 | + (devirtualization_time_bonus (node, avals) |
| 3547 | + hint_time_bonus (node, estimates) |
| 3548 | + removable_params_cost + est_move_cost); |
| 3549 | |
| 3550 | int size = estimates.size; |
| 3551 | gcc_checking_assert (size >=0); |
| 3552 | /* The inliner-heuristics based estimates may think that in certain |
| 3553 | contexts some functions do not have any size at all but we want |
| 3554 | all specializations to have at least a tiny cost, not least not to |
| 3555 | divide by zero. */ |
| 3556 | if (size == 0) |
| 3557 | size = 1; |
| 3558 | |
| 3559 | val->local_time_benefit = time_benefit; |
| 3560 | val->local_size_cost = size; |
| 3561 | } |
| 3562 | |
| 3563 | /* Get the overall limit oof growth based on parameters extracted from growth. |
| 3564 | it does not really make sense to mix functions with different overall growth |
| 3565 | limits but it is possible and if it happens, we do not want to select one |
| 3566 | limit at random. */ |
| 3567 | |
| 3568 | static long |
| 3569 | get_max_overall_size (cgraph_node *node) |
| 3570 | { |
| 3571 | long max_new_size = orig_overall_size; |
| 3572 | long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns); |
| 3573 | if (max_new_size < large_unit) |
| 3574 | max_new_size = large_unit; |
| 3575 | int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth); |
| 3576 | max_new_size += max_new_size * unit_growth / 100 + 1; |
| 3577 | return max_new_size; |
| 3578 | } |
| 3579 | |
| 3580 | /* Return true if NODE should be cloned just for a parameter removal, possibly |
| 3581 | dumping a reason if not. */ |
| 3582 | |
| 3583 | static bool |
| 3584 | clone_for_param_removal_p (cgraph_node *node) |
| 3585 | { |
| 3586 | if (!node->can_change_signature) |
| 3587 | { |
| 3588 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3589 | fprintf (stream: dump_file, format: " Not considering cloning to remove parameters, " |
| 3590 | "function cannot change signature.\n" ); |
| 3591 | return false; |
| 3592 | } |
| 3593 | if (node->can_be_local_p ()) |
| 3594 | { |
| 3595 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3596 | fprintf (stream: dump_file, format: " Not considering cloning to remove parameters, " |
| 3597 | "IPA-SRA can do it potentially better.\n" ); |
| 3598 | return false; |
| 3599 | } |
| 3600 | return true; |
| 3601 | } |
| 3602 | |
| 3603 | /* Iterate over known values of parameters of NODE and estimate the local |
| 3604 | effects in terms of time and size they have. */ |
| 3605 | |
| 3606 | static void |
| 3607 | estimate_local_effects (struct cgraph_node *node) |
| 3608 | { |
| 3609 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 3610 | int count = ipa_get_param_count (info); |
| 3611 | bool always_const; |
| 3612 | int removable_params_cost; |
| 3613 | |
| 3614 | if (!count || !ipcp_versionable_function_p (node)) |
| 3615 | return; |
| 3616 | |
| 3617 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3618 | fprintf (stream: dump_file, format: "\nEstimating effects for %s.\n" , node->dump_name ()); |
| 3619 | |
| 3620 | ipa_auto_call_arg_values avals; |
| 3621 | always_const = gather_context_independent_values (info, avals: &avals, calculate_aggs: true, |
| 3622 | removable_params_cost: &removable_params_cost); |
| 3623 | int devirt_bonus = devirtualization_time_bonus (node, avals: &avals); |
| 3624 | if (always_const || devirt_bonus |
| 3625 | || (removable_params_cost && clone_for_param_removal_p (node))) |
| 3626 | { |
| 3627 | struct caller_statistics stats; |
| 3628 | ipa_call_estimates estimates; |
| 3629 | |
| 3630 | init_caller_stats (stats: &stats); |
| 3631 | node->call_for_symbol_thunks_and_aliases (callback: gather_caller_stats, data: &stats, |
| 3632 | include_overwritable: false); |
| 3633 | estimate_ipcp_clone_size_and_time (node, avals: &avals, estimates: &estimates); |
| 3634 | sreal time = estimates.nonspecialized_time - estimates.time; |
| 3635 | time += devirt_bonus; |
| 3636 | time += hint_time_bonus (node, estimates); |
| 3637 | time += removable_params_cost; |
| 3638 | int size = estimates.size - stats.n_calls * removable_params_cost; |
| 3639 | |
| 3640 | if (dump_file) |
| 3641 | fprintf (stream: dump_file, format: " - context independent values, size: %i, " |
| 3642 | "time_benefit: %f\n" , size, (time).to_double ()); |
| 3643 | |
| 3644 | if (size <= 0 || node->local) |
| 3645 | { |
| 3646 | info->do_clone_for_all_contexts = true; |
| 3647 | |
| 3648 | if (dump_file) |
| 3649 | fprintf (stream: dump_file, format: " Decided to specialize for all " |
| 3650 | "known contexts, code not going to grow.\n" ); |
| 3651 | } |
| 3652 | else if (good_cloning_opportunity_p (node, time_benefit: time, freq_sum: stats.freq_sum, |
| 3653 | count_sum: stats.count_sum, size_cost: size, |
| 3654 | called_without_ipa_profile: stats.called_without_ipa_profile)) |
| 3655 | { |
| 3656 | if (size + overall_size <= get_max_overall_size (node)) |
| 3657 | { |
| 3658 | info->do_clone_for_all_contexts = true; |
| 3659 | overall_size += size; |
| 3660 | |
| 3661 | if (dump_file) |
| 3662 | fprintf (stream: dump_file, format: " Decided to specialize for all " |
| 3663 | "known contexts, growth (to %li) deemed " |
| 3664 | "beneficial.\n" , overall_size); |
| 3665 | } |
| 3666 | else if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3667 | fprintf (stream: dump_file, format: " Not cloning for all contexts because " |
| 3668 | "maximum unit size would be reached with %li.\n" , |
| 3669 | size + overall_size); |
| 3670 | } |
| 3671 | else if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3672 | fprintf (stream: dump_file, format: " Not cloning for all contexts because " |
| 3673 | "!good_cloning_opportunity_p.\n" ); |
| 3674 | |
| 3675 | } |
| 3676 | |
| 3677 | for (int i = 0; i < count; i++) |
| 3678 | { |
| 3679 | class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); |
| 3680 | ipcp_lattice<tree> *lat = &plats->itself; |
| 3681 | ipcp_value<tree> *val; |
| 3682 | |
| 3683 | if (lat->bottom |
| 3684 | || !lat->values |
| 3685 | || avals.m_known_vals[i]) |
| 3686 | continue; |
| 3687 | |
| 3688 | for (val = lat->values; val; val = val->next) |
| 3689 | { |
| 3690 | gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO); |
| 3691 | avals.m_known_vals[i] = val->value; |
| 3692 | |
| 3693 | int emc = estimate_move_cost (TREE_TYPE (val->value), true); |
| 3694 | perform_estimation_of_a_value (node, avals: &avals, removable_params_cost, |
| 3695 | est_move_cost: emc, val); |
| 3696 | |
| 3697 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3698 | { |
| 3699 | fprintf (stream: dump_file, format: " - estimates for value " ); |
| 3700 | print_ipcp_constant_value (f: dump_file, v: val->value); |
| 3701 | fprintf (stream: dump_file, format: " for " ); |
| 3702 | ipa_dump_param (dump_file, info, i); |
| 3703 | fprintf (stream: dump_file, format: ": time_benefit: %g, size: %i\n" , |
| 3704 | val->local_time_benefit.to_double (), |
| 3705 | val->local_size_cost); |
| 3706 | } |
| 3707 | } |
| 3708 | avals.m_known_vals[i] = NULL_TREE; |
| 3709 | } |
| 3710 | |
| 3711 | for (int i = 0; i < count; i++) |
| 3712 | { |
| 3713 | class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); |
| 3714 | |
| 3715 | if (!plats->virt_call) |
| 3716 | continue; |
| 3717 | |
| 3718 | ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat; |
| 3719 | ipcp_value<ipa_polymorphic_call_context> *val; |
| 3720 | |
| 3721 | if (ctxlat->bottom |
| 3722 | || !ctxlat->values |
| 3723 | || !avals.m_known_contexts[i].useless_p ()) |
| 3724 | continue; |
| 3725 | |
| 3726 | for (val = ctxlat->values; val; val = val->next) |
| 3727 | { |
| 3728 | avals.m_known_contexts[i] = val->value; |
| 3729 | perform_estimation_of_a_value (node, avals: &avals, removable_params_cost, |
| 3730 | est_move_cost: 0, val); |
| 3731 | |
| 3732 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3733 | { |
| 3734 | fprintf (stream: dump_file, format: " - estimates for polymorphic context " ); |
| 3735 | print_ipcp_constant_value (f: dump_file, v: val->value); |
| 3736 | fprintf (stream: dump_file, format: " for " ); |
| 3737 | ipa_dump_param (dump_file, info, i); |
| 3738 | fprintf (stream: dump_file, format: ": time_benefit: %g, size: %i\n" , |
| 3739 | val->local_time_benefit.to_double (), |
| 3740 | val->local_size_cost); |
| 3741 | } |
| 3742 | } |
| 3743 | avals.m_known_contexts[i] = ipa_polymorphic_call_context (); |
| 3744 | } |
| 3745 | |
| 3746 | unsigned all_ctx_len = avals.m_known_aggs.length (); |
| 3747 | auto_vec<ipa_argagg_value, 32> all_ctx; |
| 3748 | all_ctx.reserve_exact (nelems: all_ctx_len); |
| 3749 | all_ctx.splice (src: avals.m_known_aggs); |
| 3750 | avals.m_known_aggs.safe_grow_cleared (len: all_ctx_len + 1); |
| 3751 | |
| 3752 | unsigned j = 0; |
| 3753 | for (int index = 0; index < count; index++) |
| 3754 | { |
| 3755 | class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i: index); |
| 3756 | |
| 3757 | if (plats->aggs_bottom || !plats->aggs) |
| 3758 | continue; |
| 3759 | |
| 3760 | for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next) |
| 3761 | { |
| 3762 | ipcp_value<tree> *val; |
| 3763 | if (aglat->bottom || !aglat->values |
| 3764 | /* If the following is true, the one value is already part of all |
| 3765 | context estimations. */ |
| 3766 | || (!plats->aggs_contain_variable |
| 3767 | && aglat->is_single_const ())) |
| 3768 | continue; |
| 3769 | |
| 3770 | unsigned unit_offset = aglat->offset / BITS_PER_UNIT; |
| 3771 | while (j < all_ctx_len |
| 3772 | && (all_ctx[j].index < index |
| 3773 | || (all_ctx[j].index == index |
| 3774 | && all_ctx[j].unit_offset < unit_offset))) |
| 3775 | { |
| 3776 | avals.m_known_aggs[j] = all_ctx[j]; |
| 3777 | j++; |
| 3778 | } |
| 3779 | |
| 3780 | for (unsigned k = j; k < all_ctx_len; k++) |
| 3781 | avals.m_known_aggs[k+1] = all_ctx[k]; |
| 3782 | |
| 3783 | for (val = aglat->values; val; val = val->next) |
| 3784 | { |
| 3785 | avals.m_known_aggs[j].value = val->value; |
| 3786 | avals.m_known_aggs[j].unit_offset = unit_offset; |
| 3787 | avals.m_known_aggs[j].index = index; |
| 3788 | avals.m_known_aggs[j].by_ref = plats->aggs_by_ref; |
| 3789 | avals.m_known_aggs[j].killed = false; |
| 3790 | |
| 3791 | perform_estimation_of_a_value (node, avals: &avals, |
| 3792 | removable_params_cost, est_move_cost: 0, val); |
| 3793 | |
| 3794 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3795 | { |
| 3796 | fprintf (stream: dump_file, format: " - estimates for value " ); |
| 3797 | print_ipcp_constant_value (f: dump_file, v: val->value); |
| 3798 | fprintf (stream: dump_file, format: " for " ); |
| 3799 | ipa_dump_param (dump_file, info, i: index); |
| 3800 | fprintf (stream: dump_file, format: "[%soffset: " HOST_WIDE_INT_PRINT_DEC |
| 3801 | "]: time_benefit: %g, size: %i\n" , |
| 3802 | plats->aggs_by_ref ? "ref " : "" , |
| 3803 | aglat->offset, |
| 3804 | val->local_time_benefit.to_double (), |
| 3805 | val->local_size_cost); |
| 3806 | } |
| 3807 | } |
| 3808 | } |
| 3809 | } |
| 3810 | } |
| 3811 | |
| 3812 | |
| 3813 | /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the |
| 3814 | topological sort of values. */ |
| 3815 | |
| 3816 | template <typename valtype> |
| 3817 | void |
| 3818 | value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val) |
| 3819 | { |
| 3820 | ipcp_value_source<valtype> *src; |
| 3821 | |
| 3822 | if (cur_val->dfs) |
| 3823 | return; |
| 3824 | |
| 3825 | dfs_counter++; |
| 3826 | cur_val->dfs = dfs_counter; |
| 3827 | cur_val->low_link = dfs_counter; |
| 3828 | |
| 3829 | cur_val->topo_next = stack; |
| 3830 | stack = cur_val; |
| 3831 | cur_val->on_stack = true; |
| 3832 | |
| 3833 | for (src = cur_val->sources; src; src = src->next) |
| 3834 | if (src->val) |
| 3835 | { |
| 3836 | if (src->val->dfs == 0) |
| 3837 | { |
| 3838 | add_val (cur_val: src->val); |
| 3839 | if (src->val->low_link < cur_val->low_link) |
| 3840 | cur_val->low_link = src->val->low_link; |
| 3841 | } |
| 3842 | else if (src->val->on_stack |
| 3843 | && src->val->dfs < cur_val->low_link) |
| 3844 | cur_val->low_link = src->val->dfs; |
| 3845 | } |
| 3846 | |
| 3847 | if (cur_val->dfs == cur_val->low_link) |
| 3848 | { |
| 3849 | ipcp_value<valtype> *v, *scc_list = NULL; |
| 3850 | |
| 3851 | do |
| 3852 | { |
| 3853 | v = stack; |
| 3854 | stack = v->topo_next; |
| 3855 | v->on_stack = false; |
| 3856 | v->scc_no = cur_val->dfs; |
| 3857 | |
| 3858 | v->scc_next = scc_list; |
| 3859 | scc_list = v; |
| 3860 | } |
| 3861 | while (v != cur_val); |
| 3862 | |
| 3863 | cur_val->topo_next = values_topo; |
| 3864 | values_topo = cur_val; |
| 3865 | } |
| 3866 | } |
| 3867 | |
| 3868 | /* Add all values in lattices associated with NODE to the topological sort if |
| 3869 | they are not there yet. */ |
| 3870 | |
| 3871 | static void |
| 3872 | add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo) |
| 3873 | { |
| 3874 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 3875 | int i, count = ipa_get_param_count (info); |
| 3876 | |
| 3877 | for (i = 0; i < count; i++) |
| 3878 | { |
| 3879 | class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); |
| 3880 | ipcp_lattice<tree> *lat = &plats->itself; |
| 3881 | struct ipcp_agg_lattice *aglat; |
| 3882 | |
| 3883 | if (!lat->bottom) |
| 3884 | { |
| 3885 | ipcp_value<tree> *val; |
| 3886 | for (val = lat->values; val; val = val->next) |
| 3887 | topo->constants.add_val (cur_val: val); |
| 3888 | } |
| 3889 | |
| 3890 | if (!plats->aggs_bottom) |
| 3891 | for (aglat = plats->aggs; aglat; aglat = aglat->next) |
| 3892 | if (!aglat->bottom) |
| 3893 | { |
| 3894 | ipcp_value<tree> *val; |
| 3895 | for (val = aglat->values; val; val = val->next) |
| 3896 | topo->constants.add_val (cur_val: val); |
| 3897 | } |
| 3898 | |
| 3899 | ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat; |
| 3900 | if (!ctxlat->bottom) |
| 3901 | { |
| 3902 | ipcp_value<ipa_polymorphic_call_context> *ctxval; |
| 3903 | for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next) |
| 3904 | topo->contexts.add_val (cur_val: ctxval); |
| 3905 | } |
| 3906 | } |
| 3907 | } |
| 3908 | |
| 3909 | /* One pass of constants propagation along the call graph edges, from callers |
| 3910 | to callees (requires topological ordering in TOPO), iterate over strongly |
| 3911 | connected components. */ |
| 3912 | |
| 3913 | static void |
| 3914 | propagate_constants_topo (class ipa_topo_info *topo) |
| 3915 | { |
| 3916 | int i; |
| 3917 | |
| 3918 | for (i = topo->nnodes - 1; i >= 0; i--) |
| 3919 | { |
| 3920 | unsigned j; |
| 3921 | struct cgraph_node *v, *node = topo->order[i]; |
| 3922 | vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node); |
| 3923 | |
| 3924 | /* First, iteratively propagate within the strongly connected component |
| 3925 | until all lattices stabilize. */ |
| 3926 | FOR_EACH_VEC_ELT (cycle_nodes, j, v) |
| 3927 | if (v->has_gimple_body_p ()) |
| 3928 | { |
| 3929 | if (opt_for_fn (v->decl, flag_ipa_cp) |
| 3930 | && opt_for_fn (v->decl, optimize)) |
| 3931 | push_node_to_stack (topo, node: v); |
| 3932 | /* When V is not optimized, we can not push it to stack, but |
| 3933 | still we need to set all its callees lattices to bottom. */ |
| 3934 | else |
| 3935 | { |
| 3936 | for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee) |
| 3937 | propagate_constants_across_call (cs); |
| 3938 | } |
| 3939 | } |
| 3940 | |
| 3941 | v = pop_node_from_stack (topo); |
| 3942 | while (v) |
| 3943 | { |
| 3944 | struct cgraph_edge *cs; |
| 3945 | class ipa_node_params *info = NULL; |
| 3946 | bool self_scc = true; |
| 3947 | |
| 3948 | for (cs = v->callees; cs; cs = cs->next_callee) |
| 3949 | if (ipa_edge_within_scc (cs)) |
| 3950 | { |
| 3951 | cgraph_node *callee = cs->callee->function_symbol (); |
| 3952 | |
| 3953 | if (v != callee) |
| 3954 | self_scc = false; |
| 3955 | |
| 3956 | if (!info) |
| 3957 | { |
| 3958 | info = ipa_node_params_sum->get (node: v); |
| 3959 | info->node_within_scc = true; |
| 3960 | } |
| 3961 | |
| 3962 | if (propagate_constants_across_call (cs)) |
| 3963 | push_node_to_stack (topo, node: callee); |
| 3964 | } |
| 3965 | |
| 3966 | if (info) |
| 3967 | info->node_is_self_scc = self_scc; |
| 3968 | |
| 3969 | v = pop_node_from_stack (topo); |
| 3970 | } |
| 3971 | |
| 3972 | /* Afterwards, propagate along edges leading out of the SCC, calculates |
| 3973 | the local effects of the discovered constants and all valid values to |
| 3974 | their topological sort. */ |
| 3975 | FOR_EACH_VEC_ELT (cycle_nodes, j, v) |
| 3976 | if (v->has_gimple_body_p () |
| 3977 | && opt_for_fn (v->decl, flag_ipa_cp) |
| 3978 | && opt_for_fn (v->decl, optimize)) |
| 3979 | { |
| 3980 | struct cgraph_edge *cs; |
| 3981 | |
| 3982 | estimate_local_effects (node: v); |
| 3983 | add_all_node_vals_to_toposort (node: v, topo); |
| 3984 | for (cs = v->callees; cs; cs = cs->next_callee) |
| 3985 | if (!ipa_edge_within_scc (cs)) |
| 3986 | propagate_constants_across_call (cs); |
| 3987 | } |
| 3988 | cycle_nodes.release (); |
| 3989 | } |
| 3990 | } |
| 3991 | |
| 3992 | /* Propagate the estimated effects of individual values along the topological |
| 3993 | from the dependent values to those they depend on. */ |
| 3994 | |
| 3995 | template <typename valtype> |
| 3996 | void |
| 3997 | value_topo_info<valtype>::propagate_effects () |
| 3998 | { |
| 3999 | ipcp_value<valtype> *base; |
| 4000 | hash_set<ipcp_value<valtype> *> processed_srcvals; |
| 4001 | |
| 4002 | for (base = values_topo; base; base = base->topo_next) |
| 4003 | { |
| 4004 | ipcp_value_source<valtype> *src; |
| 4005 | ipcp_value<valtype> *val; |
| 4006 | sreal time = 0; |
| 4007 | HOST_WIDE_INT size = 0; |
| 4008 | |
| 4009 | for (val = base; val; val = val->scc_next) |
| 4010 | { |
| 4011 | time = time + val->local_time_benefit + val->prop_time_benefit; |
| 4012 | size = size + val->local_size_cost + val->prop_size_cost; |
| 4013 | } |
| 4014 | |
| 4015 | for (val = base; val; val = val->scc_next) |
| 4016 | { |
| 4017 | processed_srcvals.empty (); |
| 4018 | for (src = val->sources; src; src = src->next) |
| 4019 | if (src->val |
| 4020 | && cs_interesting_for_ipcp_p (src->cs)) |
| 4021 | { |
| 4022 | if (!processed_srcvals.add (src->val)) |
| 4023 | { |
| 4024 | HOST_WIDE_INT prop_size = size + src->val->prop_size_cost; |
| 4025 | if (prop_size < INT_MAX) |
| 4026 | src->val->prop_size_cost = prop_size; |
| 4027 | else |
| 4028 | continue; |
| 4029 | } |
| 4030 | |
| 4031 | int special_factor = 1; |
| 4032 | if (val->same_scc (src->val)) |
| 4033 | special_factor |
| 4034 | = opt_for_fn(src->cs->caller->decl, |
| 4035 | param_ipa_cp_recursive_freq_factor); |
| 4036 | else if (val->self_recursion_generated_p () |
| 4037 | && (src->cs->callee->function_symbol () |
| 4038 | == src->cs->caller)) |
| 4039 | { |
| 4040 | int max_recur_gen_depth |
| 4041 | = opt_for_fn(src->cs->caller->decl, |
| 4042 | param_ipa_cp_max_recursive_depth); |
| 4043 | special_factor = max_recur_gen_depth |
| 4044 | - val->self_recursion_generated_level + 1; |
| 4045 | } |
| 4046 | |
| 4047 | src->val->prop_time_benefit |
| 4048 | += time * special_factor * src->cs->sreal_frequency (); |
| 4049 | } |
| 4050 | |
| 4051 | if (size < INT_MAX) |
| 4052 | { |
| 4053 | val->prop_time_benefit = time; |
| 4054 | val->prop_size_cost = size; |
| 4055 | } |
| 4056 | else |
| 4057 | { |
| 4058 | val->prop_time_benefit = 0; |
| 4059 | val->prop_size_cost = 0; |
| 4060 | } |
| 4061 | } |
| 4062 | } |
| 4063 | } |
| 4064 | |
| 4065 | |
| 4066 | /* Propagate constants, polymorphic contexts and their effects from the |
| 4067 | summaries interprocedurally. */ |
| 4068 | |
| 4069 | static void |
| 4070 | ipcp_propagate_stage (class ipa_topo_info *topo) |
| 4071 | { |
| 4072 | struct cgraph_node *node; |
| 4073 | |
| 4074 | if (dump_file) |
| 4075 | fprintf (stream: dump_file, format: "\n Propagating constants:\n\n" ); |
| 4076 | |
| 4077 | FOR_EACH_DEFINED_FUNCTION (node) |
| 4078 | { |
| 4079 | if (node->has_gimple_body_p () |
| 4080 | && opt_for_fn (node->decl, flag_ipa_cp) |
| 4081 | && opt_for_fn (node->decl, optimize)) |
| 4082 | { |
| 4083 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 4084 | determine_versionability (node, info); |
| 4085 | |
| 4086 | unsigned nlattices = ipa_get_param_count (info); |
| 4087 | info->lattices.safe_grow_cleared (len: nlattices, exact: true); |
| 4088 | initialize_node_lattices (node); |
| 4089 | } |
| 4090 | ipa_size_summary *s = ipa_size_summaries->get (node); |
| 4091 | if (node->definition && !node->alias && s != NULL) |
| 4092 | overall_size += s->self_size; |
| 4093 | } |
| 4094 | |
| 4095 | orig_overall_size = overall_size; |
| 4096 | |
| 4097 | if (dump_file) |
| 4098 | fprintf (stream: dump_file, format: "\noverall_size: %li\n" , overall_size); |
| 4099 | |
| 4100 | propagate_constants_topo (topo); |
| 4101 | if (flag_checking) |
| 4102 | ipcp_verify_propagated_values (); |
| 4103 | topo->constants.propagate_effects (); |
| 4104 | topo->contexts.propagate_effects (); |
| 4105 | |
| 4106 | if (dump_file) |
| 4107 | { |
| 4108 | fprintf (stream: dump_file, format: "\nIPA lattices after all propagation:\n" ); |
| 4109 | print_all_lattices (f: dump_file, dump_sources: (dump_flags & TDF_DETAILS), dump_benefits: true); |
| 4110 | } |
| 4111 | } |
| 4112 | |
| 4113 | /* Discover newly direct outgoing edges from NODE which is a new clone with |
| 4114 | known KNOWN_CSTS and make them direct. */ |
| 4115 | |
| 4116 | static void |
| 4117 | ipcp_discover_new_direct_edges (struct cgraph_node *node, |
| 4118 | vec<tree> known_csts, |
| 4119 | vec<ipa_polymorphic_call_context> |
| 4120 | known_contexts, |
| 4121 | vec<ipa_argagg_value, va_gc> *aggvals) |
| 4122 | { |
| 4123 | struct cgraph_edge *ie, *next_ie; |
| 4124 | bool found = false; |
| 4125 | |
| 4126 | for (ie = node->indirect_calls; ie; ie = next_ie) |
| 4127 | { |
| 4128 | tree target; |
| 4129 | bool speculative; |
| 4130 | |
| 4131 | next_ie = ie->next_callee; |
| 4132 | ipa_argagg_value_list avs (aggvals); |
| 4133 | target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts, |
| 4134 | avs, speculative: &speculative); |
| 4135 | if (target) |
| 4136 | { |
| 4137 | bool agg_contents = ie->indirect_info->agg_contents; |
| 4138 | bool polymorphic = ie->indirect_info->polymorphic; |
| 4139 | int param_index = ie->indirect_info->param_index; |
| 4140 | struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target, |
| 4141 | speculative); |
| 4142 | found = true; |
| 4143 | |
| 4144 | if (cs && !agg_contents && !polymorphic) |
| 4145 | { |
| 4146 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 4147 | int c = ipa_get_controlled_uses (info, i: param_index); |
| 4148 | if (c != IPA_UNDESCRIBED_USE |
| 4149 | && !ipa_get_param_load_dereferenced (info, i: param_index)) |
| 4150 | { |
| 4151 | struct ipa_ref *to_del; |
| 4152 | |
| 4153 | c--; |
| 4154 | ipa_set_controlled_uses (info, i: param_index, val: c); |
| 4155 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 4156 | fprintf (stream: dump_file, format: " controlled uses count of param " |
| 4157 | "%i bumped down to %i\n" , param_index, c); |
| 4158 | if (c == 0 |
| 4159 | && (to_del = node->find_reference (referred_node: cs->callee, NULL, lto_stmt_uid: 0, |
| 4160 | use_type: IPA_REF_ADDR))) |
| 4161 | { |
| 4162 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 4163 | fprintf (stream: dump_file, format: " and even removing its " |
| 4164 | "cloning-created reference\n" ); |
| 4165 | to_del->remove_reference (); |
| 4166 | } |
| 4167 | } |
| 4168 | } |
| 4169 | } |
| 4170 | } |
| 4171 | /* Turning calls to direct calls will improve overall summary. */ |
| 4172 | if (found) |
| 4173 | ipa_update_overall_fn_summary (node); |
| 4174 | } |
| 4175 | |
| 4176 | class edge_clone_summary; |
| 4177 | static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL; |
| 4178 | |
| 4179 | /* Edge clone summary. */ |
| 4180 | |
| 4181 | class edge_clone_summary |
| 4182 | { |
| 4183 | public: |
| 4184 | /* Default constructor. */ |
| 4185 | edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {} |
| 4186 | |
| 4187 | /* Default destructor. */ |
| 4188 | ~edge_clone_summary () |
| 4189 | { |
| 4190 | if (prev_clone) |
| 4191 | edge_clone_summaries->get (edge: prev_clone)->next_clone = next_clone; |
| 4192 | if (next_clone) |
| 4193 | edge_clone_summaries->get (edge: next_clone)->prev_clone = prev_clone; |
| 4194 | } |
| 4195 | |
| 4196 | cgraph_edge *prev_clone; |
| 4197 | cgraph_edge *next_clone; |
| 4198 | }; |
| 4199 | |
| 4200 | class edge_clone_summary_t: |
| 4201 | public call_summary <edge_clone_summary *> |
| 4202 | { |
| 4203 | public: |
| 4204 | edge_clone_summary_t (symbol_table *symtab): |
| 4205 | call_summary <edge_clone_summary *> (symtab) |
| 4206 | { |
| 4207 | m_initialize_when_cloning = true; |
| 4208 | } |
| 4209 | |
| 4210 | void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge, |
| 4211 | edge_clone_summary *src_data, |
| 4212 | edge_clone_summary *dst_data) final override; |
| 4213 | }; |
| 4214 | |
| 4215 | /* Edge duplication hook. */ |
| 4216 | |
| 4217 | void |
| 4218 | edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge, |
| 4219 | edge_clone_summary *src_data, |
| 4220 | edge_clone_summary *dst_data) |
| 4221 | { |
| 4222 | if (src_data->next_clone) |
| 4223 | edge_clone_summaries->get (edge: src_data->next_clone)->prev_clone = dst_edge; |
| 4224 | dst_data->prev_clone = src_edge; |
| 4225 | dst_data->next_clone = src_data->next_clone; |
| 4226 | src_data->next_clone = dst_edge; |
| 4227 | } |
| 4228 | |
| 4229 | /* Return true is CS calls DEST or its clone for all contexts. When |
| 4230 | ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive |
| 4231 | edges from/to an all-context clone. */ |
| 4232 | |
| 4233 | static bool |
| 4234 | calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest, |
| 4235 | bool allow_recursion_to_clone) |
| 4236 | { |
| 4237 | enum availability availability; |
| 4238 | cgraph_node *callee = cs->callee->function_symbol (avail: &availability); |
| 4239 | |
| 4240 | if (availability <= AVAIL_INTERPOSABLE) |
| 4241 | return false; |
| 4242 | if (callee == dest) |
| 4243 | return true; |
| 4244 | if (!allow_recursion_to_clone && cs->caller == callee) |
| 4245 | return false; |
| 4246 | |
| 4247 | ipa_node_params *info = ipa_node_params_sum->get (node: callee); |
| 4248 | return info->is_all_contexts_clone && info->ipcp_orig_node == dest; |
| 4249 | } |
| 4250 | |
| 4251 | /* Return true if edge CS does bring about the value described by SRC to |
| 4252 | DEST_VAL of node DEST or its clone for all contexts. */ |
| 4253 | |
| 4254 | static bool |
| 4255 | cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src, |
| 4256 | cgraph_node *dest, ipcp_value<tree> *dest_val) |
| 4257 | { |
| 4258 | ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 4259 | |
| 4260 | if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, allow_recursion_to_clone: !src->val) |
| 4261 | || caller_info->node_dead) |
| 4262 | return false; |
| 4263 | |
| 4264 | if (!src->val) |
| 4265 | return true; |
| 4266 | |
| 4267 | if (caller_info->ipcp_orig_node) |
| 4268 | { |
| 4269 | tree t = NULL_TREE; |
| 4270 | if (src->offset == -1) |
| 4271 | t = caller_info->known_csts[src->index]; |
| 4272 | else if (ipcp_transformation *ts |
| 4273 | = ipcp_get_transformation_summary (node: cs->caller)) |
| 4274 | { |
| 4275 | ipa_argagg_value_list avl (ts); |
| 4276 | t = avl.get_value (index: src->index, unit_offset: src->offset / BITS_PER_UNIT); |
| 4277 | } |
| 4278 | return (t != NULL_TREE |
| 4279 | && values_equal_for_ipcp_p (x: src->val->value, y: t)); |
| 4280 | } |
| 4281 | else |
| 4282 | { |
| 4283 | if (src->val == dest_val) |
| 4284 | return true; |
| 4285 | |
| 4286 | struct ipcp_agg_lattice *aglat; |
| 4287 | class ipcp_param_lattices *plats = ipa_get_parm_lattices (info: caller_info, |
| 4288 | i: src->index); |
| 4289 | if (src->offset == -1) |
| 4290 | return (plats->itself.is_single_const () |
| 4291 | && values_equal_for_ipcp_p (x: src->val->value, |
| 4292 | y: plats->itself.values->value)); |
| 4293 | else |
| 4294 | { |
| 4295 | if (plats->aggs_bottom || plats->aggs_contain_variable) |
| 4296 | return false; |
| 4297 | for (aglat = plats->aggs; aglat; aglat = aglat->next) |
| 4298 | if (aglat->offset == src->offset) |
| 4299 | return (aglat->is_single_const () |
| 4300 | && values_equal_for_ipcp_p (x: src->val->value, |
| 4301 | y: aglat->values->value)); |
| 4302 | } |
| 4303 | return false; |
| 4304 | } |
| 4305 | } |
| 4306 | |
| 4307 | /* Return true if edge CS does bring about the value described by SRC to |
| 4308 | DST_VAL of node DEST or its clone for all contexts. */ |
| 4309 | |
| 4310 | static bool |
| 4311 | cgraph_edge_brings_value_p (cgraph_edge *cs, |
| 4312 | ipcp_value_source<ipa_polymorphic_call_context> *src, |
| 4313 | cgraph_node *dest, |
| 4314 | ipcp_value<ipa_polymorphic_call_context> *) |
| 4315 | { |
| 4316 | ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 4317 | |
| 4318 | if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, allow_recursion_to_clone: true) |
| 4319 | || caller_info->node_dead) |
| 4320 | return false; |
| 4321 | if (!src->val) |
| 4322 | return true; |
| 4323 | |
| 4324 | if (caller_info->ipcp_orig_node) |
| 4325 | return (caller_info->known_contexts.length () > (unsigned) src->index) |
| 4326 | && values_equal_for_ipcp_p (x: src->val->value, |
| 4327 | y: caller_info->known_contexts[src->index]); |
| 4328 | |
| 4329 | class ipcp_param_lattices *plats = ipa_get_parm_lattices (info: caller_info, |
| 4330 | i: src->index); |
| 4331 | return plats->ctxlat.is_single_const () |
| 4332 | && values_equal_for_ipcp_p (x: src->val->value, |
| 4333 | y: plats->ctxlat.values->value); |
| 4334 | } |
| 4335 | |
| 4336 | /* Get the next clone in the linked list of clones of an edge. */ |
| 4337 | |
| 4338 | static inline struct cgraph_edge * |
| 4339 | get_next_cgraph_edge_clone (struct cgraph_edge *cs) |
| 4340 | { |
| 4341 | edge_clone_summary *s = edge_clone_summaries->get (edge: cs); |
| 4342 | return s != NULL ? s->next_clone : NULL; |
| 4343 | } |
| 4344 | |
| 4345 | /* Given VAL that is intended for DEST, iterate over all its sources and if any |
| 4346 | of them is viable and hot, return true. In that case, for those that still |
| 4347 | hold, add their edge frequency and their number and cumulative profile |
| 4348 | counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT, |
| 4349 | REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */ |
| 4350 | |
| 4351 | template <typename valtype> |
| 4352 | static bool |
| 4353 | get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest, |
| 4354 | sreal *freq_sum, int *caller_count, |
| 4355 | profile_count *rec_count_sum, |
| 4356 | profile_count *nonrec_count_sum, |
| 4357 | bool *called_without_ipa_profile) |
| 4358 | { |
| 4359 | ipcp_value_source<valtype> *src; |
| 4360 | sreal freq = 0; |
| 4361 | int count = 0; |
| 4362 | profile_count rec_cnt = profile_count::zero (); |
| 4363 | profile_count nonrec_cnt = profile_count::zero (); |
| 4364 | bool interesting = false; |
| 4365 | bool non_self_recursive = false; |
| 4366 | *called_without_ipa_profile = false; |
| 4367 | |
| 4368 | for (src = val->sources; src; src = src->next) |
| 4369 | { |
| 4370 | struct cgraph_edge *cs = src->cs; |
| 4371 | while (cs) |
| 4372 | { |
| 4373 | if (cgraph_edge_brings_value_p (cs, src, dest, val)) |
| 4374 | { |
| 4375 | count++; |
| 4376 | freq += cs->sreal_frequency (); |
| 4377 | interesting |= cs_interesting_for_ipcp_p (e: cs); |
| 4378 | if (cs->caller != dest) |
| 4379 | { |
| 4380 | non_self_recursive = true; |
| 4381 | if (cs->count.ipa ().initialized_p ()) |
| 4382 | rec_cnt += cs->count.ipa (); |
| 4383 | else |
| 4384 | *called_without_ipa_profile = true; |
| 4385 | } |
| 4386 | else if (cs->count.ipa ().initialized_p ()) |
| 4387 | nonrec_cnt += cs->count.ipa (); |
| 4388 | else |
| 4389 | *called_without_ipa_profile = true; |
| 4390 | } |
| 4391 | cs = get_next_cgraph_edge_clone (cs); |
| 4392 | } |
| 4393 | } |
| 4394 | |
| 4395 | /* If the only edges bringing a value are self-recursive ones, do not bother |
| 4396 | evaluating it. */ |
| 4397 | if (!non_self_recursive) |
| 4398 | return false; |
| 4399 | |
| 4400 | *freq_sum = freq; |
| 4401 | *caller_count = count; |
| 4402 | *rec_count_sum = rec_cnt; |
| 4403 | *nonrec_count_sum = nonrec_cnt; |
| 4404 | |
| 4405 | return interesting; |
| 4406 | } |
| 4407 | |
| 4408 | /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers |
| 4409 | to let a non-self-recursive caller be the first element. Thus, we can |
| 4410 | simplify intersecting operations on values that arrive from all of these |
| 4411 | callers, especially when there exists self-recursive call. Return true if |
| 4412 | this kind of adjustment is possible. */ |
| 4413 | |
| 4414 | static bool |
| 4415 | adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers, |
| 4416 | cgraph_node *node) |
| 4417 | { |
| 4418 | for (unsigned i = 0; i < callers.length (); i++) |
| 4419 | { |
| 4420 | cgraph_edge *cs = callers[i]; |
| 4421 | |
| 4422 | if (cs->caller != node) |
| 4423 | { |
| 4424 | if (i > 0) |
| 4425 | { |
| 4426 | callers[i] = callers[0]; |
| 4427 | callers[0] = cs; |
| 4428 | } |
| 4429 | return true; |
| 4430 | } |
| 4431 | } |
| 4432 | return false; |
| 4433 | } |
| 4434 | |
| 4435 | /* Return a vector of incoming edges that do bring value VAL to node DEST. It |
| 4436 | is assumed their number is known and equal to CALLER_COUNT. */ |
| 4437 | |
| 4438 | template <typename valtype> |
| 4439 | static vec<cgraph_edge *> |
| 4440 | gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest, |
| 4441 | int caller_count) |
| 4442 | { |
| 4443 | ipcp_value_source<valtype> *src; |
| 4444 | vec<cgraph_edge *> ret; |
| 4445 | |
| 4446 | ret.create (nelems: caller_count); |
| 4447 | for (src = val->sources; src; src = src->next) |
| 4448 | { |
| 4449 | struct cgraph_edge *cs = src->cs; |
| 4450 | while (cs) |
| 4451 | { |
| 4452 | if (cgraph_edge_brings_value_p (cs, src, dest, val)) |
| 4453 | ret.quick_push (obj: cs); |
| 4454 | cs = get_next_cgraph_edge_clone (cs); |
| 4455 | } |
| 4456 | } |
| 4457 | |
| 4458 | if (caller_count > 1) |
| 4459 | adjust_callers_for_value_intersection (callers&: ret, node: dest); |
| 4460 | |
| 4461 | return ret; |
| 4462 | } |
| 4463 | |
| 4464 | /* Construct a replacement map for a know VALUE for a formal parameter PARAM. |
| 4465 | Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF |
| 4466 | should be set to true when the reference created for the constant should be |
| 4467 | a load one and not an address one because the corresponding parameter p is |
| 4468 | only used as *p. */ |
| 4469 | |
| 4470 | static struct ipa_replace_map * |
| 4471 | get_replacement_map (class ipa_node_params *info, tree value, int parm_num, |
| 4472 | bool force_load_ref) |
| 4473 | { |
| 4474 | struct ipa_replace_map *replace_map; |
| 4475 | |
| 4476 | replace_map = ggc_alloc<ipa_replace_map> (); |
| 4477 | if (dump_file) |
| 4478 | { |
| 4479 | fprintf (stream: dump_file, format: " replacing " ); |
| 4480 | ipa_dump_param (dump_file, info, i: parm_num); |
| 4481 | |
| 4482 | fprintf (stream: dump_file, format: " with const " ); |
| 4483 | print_generic_expr (dump_file, value); |
| 4484 | |
| 4485 | if (force_load_ref) |
| 4486 | fprintf (stream: dump_file, format: " - forcing load reference\n" ); |
| 4487 | else |
| 4488 | fprintf (stream: dump_file, format: "\n" ); |
| 4489 | } |
| 4490 | replace_map->parm_num = parm_num; |
| 4491 | replace_map->new_tree = value; |
| 4492 | replace_map->force_load_ref = force_load_ref; |
| 4493 | return replace_map; |
| 4494 | } |
| 4495 | |
| 4496 | /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied |
| 4497 | one, otherwise it will be referred to as the original node. */ |
| 4498 | |
| 4499 | static void |
| 4500 | dump_profile_updates (cgraph_node *node, bool spec) |
| 4501 | { |
| 4502 | if (spec) |
| 4503 | fprintf (stream: dump_file, format: " setting count of the specialized node %s to " , |
| 4504 | node->dump_name ()); |
| 4505 | else |
| 4506 | fprintf (stream: dump_file, format: " setting count of the original node %s to " , |
| 4507 | node->dump_name ()); |
| 4508 | |
| 4509 | node->count.dump (f: dump_file); |
| 4510 | fprintf (stream: dump_file, format: "\n" ); |
| 4511 | for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee) |
| 4512 | { |
| 4513 | fprintf (stream: dump_file, format: " edge to %s has count " , |
| 4514 | cs->callee->dump_name ()); |
| 4515 | cs->count.dump (f: dump_file); |
| 4516 | fprintf (stream: dump_file, format: "\n" ); |
| 4517 | } |
| 4518 | } |
| 4519 | |
| 4520 | /* With partial train run we do not want to assume that original's count is |
| 4521 | zero whenever we redurect all executed edges to clone. Simply drop profile |
| 4522 | to local one in this case. In eany case, return the new value. ORIG_NODE |
| 4523 | is the original node and its count has not been updaed yet. */ |
| 4524 | |
| 4525 | profile_count |
| 4526 | lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node) |
| 4527 | { |
| 4528 | if (remainder.ipa_p () && !remainder.ipa ().nonzero_p () |
| 4529 | && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p () |
| 4530 | && opt_for_fn (orig_node->decl, flag_profile_partial_training)) |
| 4531 | remainder = orig_node->count.guessed_local (); |
| 4532 | |
| 4533 | return remainder; |
| 4534 | } |
| 4535 | |
| 4536 | /* Structure to sum counts coming from nodes other than the original node and |
| 4537 | its clones. */ |
| 4538 | |
| 4539 | struct gather_other_count_struct |
| 4540 | { |
| 4541 | cgraph_node *orig; |
| 4542 | profile_count other_count; |
| 4543 | }; |
| 4544 | |
| 4545 | /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of |
| 4546 | counts that come from non-self-recursive calls.. */ |
| 4547 | |
| 4548 | static bool |
| 4549 | gather_count_of_non_rec_edges (cgraph_node *node, void *data) |
| 4550 | { |
| 4551 | gather_other_count_struct *desc = (gather_other_count_struct *) data; |
| 4552 | for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller) |
| 4553 | if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig) |
| 4554 | desc->other_count += cs->count.ipa (); |
| 4555 | return false; |
| 4556 | } |
| 4557 | |
| 4558 | /* Structure to help analyze if we need to boost counts of some clones of some |
| 4559 | non-recursive edges to match the new callee count. */ |
| 4560 | |
| 4561 | struct desc_incoming_count_struct |
| 4562 | { |
| 4563 | cgraph_node *orig; |
| 4564 | hash_set <cgraph_edge *> *processed_edges; |
| 4565 | profile_count count; |
| 4566 | unsigned unproc_orig_rec_edges; |
| 4567 | }; |
| 4568 | |
| 4569 | /* Go over edges calling NODE and its thunks and gather information about |
| 4570 | incoming counts so that we know if we need to make any adjustments. */ |
| 4571 | |
| 4572 | static void |
| 4573 | analyze_clone_icoming_counts (cgraph_node *node, |
| 4574 | desc_incoming_count_struct *desc) |
| 4575 | { |
| 4576 | for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller) |
| 4577 | if (cs->caller->thunk) |
| 4578 | { |
| 4579 | analyze_clone_icoming_counts (node: cs->caller, desc); |
| 4580 | continue; |
| 4581 | } |
| 4582 | else |
| 4583 | { |
| 4584 | if (cs->count.initialized_p ()) |
| 4585 | desc->count += cs->count.ipa (); |
| 4586 | if (!desc->processed_edges->contains (k: cs) |
| 4587 | && cs->caller->clone_of == desc->orig) |
| 4588 | desc->unproc_orig_rec_edges++; |
| 4589 | } |
| 4590 | } |
| 4591 | |
| 4592 | /* If caller edge counts of a clone created for a self-recursive arithmetic |
| 4593 | jump function must be adjusted because it is coming from a the "seed" clone |
| 4594 | for the first value and so has been excessively scaled back as if it was not |
| 4595 | a recursive call, adjust it so that the incoming counts of NODE match its |
| 4596 | count. NODE is the node or its thunk. */ |
| 4597 | |
| 4598 | static void |
| 4599 | adjust_clone_incoming_counts (cgraph_node *node, |
| 4600 | desc_incoming_count_struct *desc) |
| 4601 | { |
| 4602 | for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller) |
| 4603 | if (cs->caller->thunk) |
| 4604 | { |
| 4605 | adjust_clone_incoming_counts (node: cs->caller, desc); |
| 4606 | profile_count sum = profile_count::zero (); |
| 4607 | for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller) |
| 4608 | if (e->count.initialized_p ()) |
| 4609 | sum += e->count.ipa (); |
| 4610 | cs->count = cs->count.combine_with_ipa_count (ipa: sum); |
| 4611 | } |
| 4612 | else if (!desc->processed_edges->contains (k: cs) |
| 4613 | && cs->caller->clone_of == desc->orig |
| 4614 | && cs->count.compatible_p (other: desc->count)) |
| 4615 | { |
| 4616 | cs->count += desc->count; |
| 4617 | if (dump_file) |
| 4618 | { |
| 4619 | fprintf (stream: dump_file, format: " Adjusted count of an incoming edge of " |
| 4620 | "a clone %s -> %s to " , cs->caller->dump_name (), |
| 4621 | cs->callee->dump_name ()); |
| 4622 | cs->count.dump (f: dump_file); |
| 4623 | fprintf (stream: dump_file, format: "\n" ); |
| 4624 | } |
| 4625 | } |
| 4626 | } |
| 4627 | |
| 4628 | /* When ORIG_NODE has been cloned for values which have been generated fora |
| 4629 | self-recursive call as a result of an arithmetic pass-through |
| 4630 | jump-functions, adjust its count together with counts of all such clones in |
| 4631 | SELF_GEN_CLONES which also at this point contains ORIG_NODE itself. |
| 4632 | |
| 4633 | The function sums the counts of the original node and all its clones that |
| 4634 | cannot be attributed to a specific clone because it comes from a |
| 4635 | non-recursive edge. This sum is then evenly divided between the clones and |
| 4636 | on top of that each one gets all the counts which can be attributed directly |
| 4637 | to it. */ |
| 4638 | |
| 4639 | static void |
| 4640 | update_counts_for_self_gen_clones (cgraph_node *orig_node, |
| 4641 | const vec<cgraph_node *> &self_gen_clones) |
| 4642 | { |
| 4643 | profile_count redist_sum = orig_node->count.ipa (); |
| 4644 | if (!redist_sum.nonzero_p ()) |
| 4645 | return; |
| 4646 | |
| 4647 | if (dump_file) |
| 4648 | fprintf (stream: dump_file, format: " Updating profile of self recursive clone " |
| 4649 | "series\n" ); |
| 4650 | |
| 4651 | gather_other_count_struct gocs; |
| 4652 | gocs.orig = orig_node; |
| 4653 | gocs.other_count = profile_count::zero (); |
| 4654 | |
| 4655 | auto_vec <profile_count, 8> other_edges_count; |
| 4656 | for (cgraph_node *n : self_gen_clones) |
| 4657 | { |
| 4658 | gocs.other_count = profile_count::zero (); |
| 4659 | n->call_for_symbol_thunks_and_aliases (callback: gather_count_of_non_rec_edges, |
| 4660 | data: &gocs, include_overwritable: false); |
| 4661 | other_edges_count.safe_push (obj: gocs.other_count); |
| 4662 | redist_sum -= gocs.other_count; |
| 4663 | } |
| 4664 | |
| 4665 | hash_set<cgraph_edge *> processed_edges; |
| 4666 | unsigned i = 0; |
| 4667 | for (cgraph_node *n : self_gen_clones) |
| 4668 | { |
| 4669 | profile_count new_count |
| 4670 | = (redist_sum / self_gen_clones.length () + other_edges_count[i]); |
| 4671 | new_count = lenient_count_portion_handling (remainder: new_count, orig_node); |
| 4672 | n->scale_profile_to (ipa_count: new_count); |
| 4673 | for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee) |
| 4674 | processed_edges.add (k: cs); |
| 4675 | |
| 4676 | i++; |
| 4677 | } |
| 4678 | |
| 4679 | /* There are still going to be edges to ORIG_NODE that have one or more |
| 4680 | clones coming from another node clone in SELF_GEN_CLONES and which we |
| 4681 | scaled by the same amount, which means that the total incoming sum of |
| 4682 | counts to ORIG_NODE will be too high, scale such edges back. */ |
| 4683 | for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee) |
| 4684 | { |
| 4685 | if (cs->callee->ultimate_alias_target () == orig_node) |
| 4686 | { |
| 4687 | unsigned den = 0; |
| 4688 | for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (cs: e)) |
| 4689 | if (e->callee->ultimate_alias_target () == orig_node |
| 4690 | && processed_edges.contains (k: e)) |
| 4691 | den++; |
| 4692 | if (den > 0) |
| 4693 | for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (cs: e)) |
| 4694 | if (e->callee->ultimate_alias_target () == orig_node |
| 4695 | && processed_edges.contains (k: e)) |
| 4696 | e->count /= den; |
| 4697 | } |
| 4698 | } |
| 4699 | |
| 4700 | /* Edges from the seeds of the valus generated for arithmetic jump-functions |
| 4701 | along self-recursive edges are likely to have fairly low count and so |
| 4702 | edges from them to nodes in the self_gen_clones do not correspond to the |
| 4703 | artificially distributed count of the nodes, the total sum of incoming |
| 4704 | edges to some clones might be too low. Detect this situation and correct |
| 4705 | it. */ |
| 4706 | for (cgraph_node *n : self_gen_clones) |
| 4707 | { |
| 4708 | if (!n->count.ipa ().nonzero_p ()) |
| 4709 | continue; |
| 4710 | |
| 4711 | desc_incoming_count_struct desc; |
| 4712 | desc.orig = orig_node; |
| 4713 | desc.processed_edges = &processed_edges; |
| 4714 | desc.count = profile_count::zero (); |
| 4715 | desc.unproc_orig_rec_edges = 0; |
| 4716 | analyze_clone_icoming_counts (node: n, desc: &desc); |
| 4717 | |
| 4718 | if (n->count.differs_from_p (other: desc.count)) |
| 4719 | { |
| 4720 | if (n->count > desc.count |
| 4721 | && desc.unproc_orig_rec_edges > 0) |
| 4722 | { |
| 4723 | desc.count = n->count - desc.count; |
| 4724 | desc.count = desc.count /= desc.unproc_orig_rec_edges; |
| 4725 | adjust_clone_incoming_counts (node: n, desc: &desc); |
| 4726 | } |
| 4727 | else if (dump_file) |
| 4728 | fprintf (stream: dump_file, |
| 4729 | format: " Unable to fix up incoming counts for %s.\n" , |
| 4730 | n->dump_name ()); |
| 4731 | } |
| 4732 | } |
| 4733 | |
| 4734 | if (dump_file) |
| 4735 | for (cgraph_node *n : self_gen_clones) |
| 4736 | dump_profile_updates (node: n, spec: n != orig_node); |
| 4737 | return; |
| 4738 | } |
| 4739 | |
| 4740 | /* After a specialized NEW_NODE version of ORIG_NODE has been created, update |
| 4741 | their profile information to reflect this. This function should not be used |
| 4742 | for clones generated for arithmetic pass-through jump functions on a |
| 4743 | self-recursive call graph edge, that situation is handled by |
| 4744 | update_counts_for_self_gen_clones. */ |
| 4745 | |
| 4746 | static void |
| 4747 | update_profiling_info (struct cgraph_node *orig_node, |
| 4748 | struct cgraph_node *new_node) |
| 4749 | { |
| 4750 | struct caller_statistics stats; |
| 4751 | profile_count new_sum; |
| 4752 | profile_count remainder, orig_node_count = orig_node->count.ipa (); |
| 4753 | |
| 4754 | if (!orig_node_count.nonzero_p ()) |
| 4755 | return; |
| 4756 | |
| 4757 | if (dump_file) |
| 4758 | { |
| 4759 | fprintf (stream: dump_file, format: " Updating profile from original count: " ); |
| 4760 | orig_node_count.dump (f: dump_file); |
| 4761 | fprintf (stream: dump_file, format: "\n" ); |
| 4762 | } |
| 4763 | |
| 4764 | init_caller_stats (stats: &stats, itself: new_node); |
| 4765 | new_node->call_for_symbol_thunks_and_aliases (callback: gather_caller_stats, data: &stats, |
| 4766 | include_overwritable: false); |
| 4767 | new_sum = stats.count_sum; |
| 4768 | |
| 4769 | bool orig_edges_processed = false; |
| 4770 | if (new_sum > orig_node_count) |
| 4771 | { |
| 4772 | /* Profile has alreay gone astray, keep what we have but lower it |
| 4773 | to global0adjusted or to local if we have partial training. */ |
| 4774 | if (opt_for_fn (orig_node->decl, flag_profile_partial_training)) |
| 4775 | orig_node->make_profile_local (); |
| 4776 | else |
| 4777 | orig_node->make_profile_global0 (quality: GUESSED_GLOBAL0_ADJUSTED); |
| 4778 | orig_edges_processed = true; |
| 4779 | } |
| 4780 | else if (stats.rec_count_sum.nonzero_p ()) |
| 4781 | { |
| 4782 | int new_nonrec_calls = stats.n_nonrec_calls; |
| 4783 | /* There are self-recursive edges which are likely to bring in the |
| 4784 | majority of calls but which we must divide in between the original and |
| 4785 | new node. */ |
| 4786 | init_caller_stats (stats: &stats, itself: orig_node); |
| 4787 | orig_node->call_for_symbol_thunks_and_aliases (callback: gather_caller_stats, |
| 4788 | data: &stats, include_overwritable: false); |
| 4789 | int orig_nonrec_calls = stats.n_nonrec_calls; |
| 4790 | profile_count orig_nonrec_call_count = stats.count_sum; |
| 4791 | |
| 4792 | if (orig_node->local) |
| 4793 | { |
| 4794 | if (!orig_nonrec_call_count.nonzero_p ()) |
| 4795 | { |
| 4796 | if (dump_file) |
| 4797 | fprintf (stream: dump_file, format: " The original is local and the only " |
| 4798 | "incoming edges from non-dead callers with nonzero " |
| 4799 | "counts are self-recursive, assuming it is cold.\n" ); |
| 4800 | /* The NEW_NODE count and counts of all its outgoing edges |
| 4801 | are still unmodified copies of ORIG_NODE's. Just clear |
| 4802 | the latter and bail out. */ |
| 4803 | if (opt_for_fn (orig_node->decl, flag_profile_partial_training)) |
| 4804 | orig_node->make_profile_local (); |
| 4805 | else |
| 4806 | orig_node->make_profile_global0 (quality: GUESSED_GLOBAL0_ADJUSTED); |
| 4807 | return; |
| 4808 | } |
| 4809 | } |
| 4810 | else |
| 4811 | { |
| 4812 | /* Let's behave as if there was another caller that accounts for all |
| 4813 | the calls that were either indirect or from other compilation |
| 4814 | units. */ |
| 4815 | orig_nonrec_calls++; |
| 4816 | profile_count pretend_caller_count |
| 4817 | = (orig_node_count - new_sum - orig_nonrec_call_count |
| 4818 | - stats.rec_count_sum); |
| 4819 | orig_nonrec_call_count += pretend_caller_count; |
| 4820 | } |
| 4821 | |
| 4822 | /* Divide all "unexplained" counts roughly proportionally to sums of |
| 4823 | counts of non-recursive calls. |
| 4824 | |
| 4825 | We put rather arbitrary limits on how many counts we claim because the |
| 4826 | number of non-self-recursive incoming count is only a rough guideline |
| 4827 | and there are cases (such as mcf) where using it blindly just takes |
| 4828 | too many. And if lattices are considered in the opposite order we |
| 4829 | could also take too few. */ |
| 4830 | profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count; |
| 4831 | |
| 4832 | int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls); |
| 4833 | profile_count new_part |
| 4834 | = MAX(MIN (unexp.apply_scale (new_sum, |
| 4835 | new_sum + orig_nonrec_call_count), |
| 4836 | unexp.apply_scale (limit_den - 1, limit_den)), |
| 4837 | unexp.apply_scale (new_nonrec_calls, limit_den)); |
| 4838 | if (dump_file) |
| 4839 | { |
| 4840 | fprintf (stream: dump_file, format: " Claiming " ); |
| 4841 | new_part.dump (f: dump_file); |
| 4842 | fprintf (stream: dump_file, format: " of unexplained " ); |
| 4843 | unexp.dump (f: dump_file); |
| 4844 | fprintf (stream: dump_file, format: " counts because of self-recursive " |
| 4845 | "calls\n" ); |
| 4846 | } |
| 4847 | new_sum += new_part; |
| 4848 | remainder = lenient_count_portion_handling (remainder: orig_node_count - new_sum, |
| 4849 | orig_node); |
| 4850 | } |
| 4851 | else |
| 4852 | remainder = lenient_count_portion_handling (remainder: orig_node_count - new_sum, |
| 4853 | orig_node); |
| 4854 | |
| 4855 | new_node->scale_profile_to (ipa_count: new_sum); |
| 4856 | |
| 4857 | if (!orig_edges_processed) |
| 4858 | orig_node->scale_profile_to (ipa_count: remainder); |
| 4859 | |
| 4860 | if (dump_file) |
| 4861 | { |
| 4862 | dump_profile_updates (node: new_node, spec: true); |
| 4863 | dump_profile_updates (node: orig_node, spec: false); |
| 4864 | } |
| 4865 | } |
| 4866 | |
| 4867 | /* Update the respective profile of specialized NEW_NODE and the original |
| 4868 | ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM |
| 4869 | have been redirected to the specialized version. */ |
| 4870 | |
| 4871 | static void |
| 4872 | update_specialized_profile (struct cgraph_node *new_node, |
| 4873 | struct cgraph_node *orig_node, |
| 4874 | profile_count redirected_sum) |
| 4875 | { |
| 4876 | if (dump_file) |
| 4877 | { |
| 4878 | fprintf (stream: dump_file, format: " the sum of counts of redirected edges is " ); |
| 4879 | redirected_sum.dump (f: dump_file); |
| 4880 | fprintf (stream: dump_file, format: "\n old ipa count of the original node is " ); |
| 4881 | orig_node->count.dump (f: dump_file); |
| 4882 | fprintf (stream: dump_file, format: "\n" ); |
| 4883 | } |
| 4884 | if (!orig_node->count.ipa ().nonzero_p () |
| 4885 | || !redirected_sum.nonzero_p ()) |
| 4886 | return; |
| 4887 | |
| 4888 | orig_node->scale_profile_to |
| 4889 | (ipa_count: lenient_count_portion_handling (remainder: orig_node->count.ipa () - redirected_sum, |
| 4890 | orig_node)); |
| 4891 | |
| 4892 | new_node->scale_profile_to (ipa_count: new_node->count.ipa () + redirected_sum); |
| 4893 | |
| 4894 | if (dump_file) |
| 4895 | { |
| 4896 | dump_profile_updates (node: new_node, spec: true); |
| 4897 | dump_profile_updates (node: orig_node, spec: false); |
| 4898 | } |
| 4899 | } |
| 4900 | |
| 4901 | static void adjust_references_in_caller (cgraph_edge *cs, |
| 4902 | symtab_node *symbol, int index); |
| 4903 | |
| 4904 | /* Simple structure to pass a symbol and index (with same meaning as parameters |
| 4905 | of adjust_references_in_caller) through a void* parameter of a |
| 4906 | call_for_symbol_thunks_and_aliases callback. */ |
| 4907 | struct symbol_and_index_together |
| 4908 | { |
| 4909 | symtab_node *symbol; |
| 4910 | int index; |
| 4911 | }; |
| 4912 | |
| 4913 | /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call |
| 4914 | adjust_references_in_caller on edges up in the call-graph, if necessary. */ |
| 4915 | static bool |
| 4916 | adjust_refs_in_act_callers (struct cgraph_node *node, void *data) |
| 4917 | { |
| 4918 | symbol_and_index_together *pack = (symbol_and_index_together *) data; |
| 4919 | for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller) |
| 4920 | if (!cs->caller->thunk) |
| 4921 | adjust_references_in_caller (cs, symbol: pack->symbol, index: pack->index); |
| 4922 | return false; |
| 4923 | } |
| 4924 | |
| 4925 | /* At INDEX of a function being called by CS there is an ADDR_EXPR of a |
| 4926 | variable which is only dereferenced and which is represented by SYMBOL. See |
| 4927 | if we can remove ADDR reference in callers assosiated witht the call. */ |
| 4928 | |
| 4929 | static void |
| 4930 | adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index) |
| 4931 | { |
| 4932 | ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs); |
| 4933 | ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i: index); |
| 4934 | if (jfunc->type == IPA_JF_CONST) |
| 4935 | { |
| 4936 | ipa_ref *to_del = cs->caller->find_reference (referred_node: symbol, stmt: cs->call_stmt, |
| 4937 | lto_stmt_uid: cs->lto_stmt_uid, |
| 4938 | use_type: IPA_REF_ADDR); |
| 4939 | if (!to_del) |
| 4940 | return; |
| 4941 | to_del->remove_reference (); |
| 4942 | ipa_zap_jf_refdesc (jfunc); |
| 4943 | if (dump_file) |
| 4944 | fprintf (stream: dump_file, format: " Removed a reference from %s to %s.\n" , |
| 4945 | cs->caller->dump_name (), symbol->dump_name ()); |
| 4946 | return; |
| 4947 | } |
| 4948 | |
| 4949 | if (jfunc->type != IPA_JF_PASS_THROUGH |
| 4950 | || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR |
| 4951 | || ipa_get_jf_pass_through_refdesc_decremented (jfunc)) |
| 4952 | return; |
| 4953 | |
| 4954 | int fidx = ipa_get_jf_pass_through_formal_id (jfunc); |
| 4955 | cgraph_node *caller = cs->caller; |
| 4956 | ipa_node_params *caller_info = ipa_node_params_sum->get (node: caller); |
| 4957 | /* TODO: This consistency check may be too big and not really |
| 4958 | that useful. Consider removing it. */ |
| 4959 | tree cst; |
| 4960 | if (caller_info->ipcp_orig_node) |
| 4961 | cst = caller_info->known_csts[fidx]; |
| 4962 | else |
| 4963 | { |
| 4964 | ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info: caller_info, i: fidx); |
| 4965 | gcc_assert (lat->is_single_const ()); |
| 4966 | cst = lat->values->value; |
| 4967 | } |
| 4968 | gcc_assert (TREE_CODE (cst) == ADDR_EXPR |
| 4969 | && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0))) |
| 4970 | == symbol)); |
| 4971 | |
| 4972 | int cuses = ipa_get_controlled_uses (info: caller_info, i: fidx); |
| 4973 | if (cuses == IPA_UNDESCRIBED_USE) |
| 4974 | return; |
| 4975 | gcc_assert (cuses > 0); |
| 4976 | cuses--; |
| 4977 | ipa_set_controlled_uses (info: caller_info, i: fidx, val: cuses); |
| 4978 | ipa_set_jf_pass_through_refdesc_decremented (jfunc, value: true); |
| 4979 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 4980 | fprintf (stream: dump_file, format: " Controlled uses of parameter %i of %s dropped " |
| 4981 | "to %i.\n" , fidx, caller->dump_name (), cuses); |
| 4982 | if (cuses) |
| 4983 | return; |
| 4984 | |
| 4985 | if (caller_info->ipcp_orig_node) |
| 4986 | { |
| 4987 | /* Cloning machinery has created a reference here, we need to either |
| 4988 | remove it or change it to a read one. */ |
| 4989 | ipa_ref *to_del = caller->find_reference (referred_node: symbol, NULL, lto_stmt_uid: 0, use_type: IPA_REF_ADDR); |
| 4990 | if (to_del) |
| 4991 | { |
| 4992 | to_del->remove_reference (); |
| 4993 | if (dump_file) |
| 4994 | fprintf (stream: dump_file, format: " Removed a reference from %s to %s.\n" , |
| 4995 | cs->caller->dump_name (), symbol->dump_name ()); |
| 4996 | if (ipa_get_param_load_dereferenced (info: caller_info, i: fidx)) |
| 4997 | { |
| 4998 | caller->create_reference (referred_node: symbol, use_type: IPA_REF_LOAD, NULL); |
| 4999 | if (dump_file) |
| 5000 | fprintf (stream: dump_file, |
| 5001 | format: " ...and replaced it with LOAD one.\n" ); |
| 5002 | } |
| 5003 | } |
| 5004 | } |
| 5005 | |
| 5006 | symbol_and_index_together pack; |
| 5007 | pack.symbol = symbol; |
| 5008 | pack.index = fidx; |
| 5009 | if (caller->can_change_signature) |
| 5010 | caller->call_for_symbol_thunks_and_aliases (callback: adjust_refs_in_act_callers, |
| 5011 | data: &pack, include_overwritable: true); |
| 5012 | } |
| 5013 | |
| 5014 | |
| 5015 | /* Return true if we would like to remove a parameter from NODE when cloning it |
| 5016 | with KNOWN_CSTS scalar constants. */ |
| 5017 | |
| 5018 | static bool |
| 5019 | want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts) |
| 5020 | { |
| 5021 | auto_vec<bool, 16> surviving; |
| 5022 | bool filled_vec = false; |
| 5023 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 5024 | int i, count = ipa_get_param_count (info); |
| 5025 | |
| 5026 | for (i = 0; i < count; i++) |
| 5027 | { |
| 5028 | if (!known_csts[i] && ipa_is_param_used (info, i)) |
| 5029 | continue; |
| 5030 | |
| 5031 | if (!filled_vec) |
| 5032 | { |
| 5033 | clone_info *info = clone_info::get (node); |
| 5034 | if (!info || !info->param_adjustments) |
| 5035 | return true; |
| 5036 | info->param_adjustments->get_surviving_params (surviving_params: &surviving); |
| 5037 | filled_vec = true; |
| 5038 | } |
| 5039 | if (surviving.length() < (unsigned) i && surviving[i]) |
| 5040 | return true; |
| 5041 | } |
| 5042 | return false; |
| 5043 | } |
| 5044 | |
| 5045 | /* Create a specialized version of NODE with known constants in KNOWN_CSTS, |
| 5046 | known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and |
| 5047 | redirect all edges in CALLERS to it. */ |
| 5048 | |
| 5049 | static struct cgraph_node * |
| 5050 | create_specialized_node (struct cgraph_node *node, |
| 5051 | vec<tree> known_csts, |
| 5052 | vec<ipa_polymorphic_call_context> known_contexts, |
| 5053 | vec<ipa_argagg_value, va_gc> *aggvals, |
| 5054 | vec<cgraph_edge *> &callers) |
| 5055 | { |
| 5056 | ipa_node_params *new_info, *info = ipa_node_params_sum->get (node); |
| 5057 | vec<ipa_replace_map *, va_gc> *replace_trees = NULL; |
| 5058 | vec<ipa_adjusted_param, va_gc> *new_params = NULL; |
| 5059 | struct cgraph_node *new_node; |
| 5060 | int i, count = ipa_get_param_count (info); |
| 5061 | clone_info *cinfo = clone_info::get (node); |
| 5062 | ipa_param_adjustments *old_adjustments = cinfo |
| 5063 | ? cinfo->param_adjustments : NULL; |
| 5064 | ipa_param_adjustments *new_adjustments; |
| 5065 | gcc_assert (!info->ipcp_orig_node); |
| 5066 | gcc_assert (node->can_change_signature |
| 5067 | || !old_adjustments); |
| 5068 | |
| 5069 | if (old_adjustments) |
| 5070 | { |
| 5071 | /* At the moment all IPA optimizations should use the number of |
| 5072 | parameters of the prevailing decl as the m_always_copy_start. |
| 5073 | Handling any other value would complicate the code below, so for the |
| 5074 | time bing let's only assert it is so. */ |
| 5075 | gcc_assert (old_adjustments->m_always_copy_start == count |
| 5076 | || old_adjustments->m_always_copy_start < 0); |
| 5077 | int old_adj_count = vec_safe_length (v: old_adjustments->m_adj_params); |
| 5078 | for (i = 0; i < old_adj_count; i++) |
| 5079 | { |
| 5080 | ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i]; |
| 5081 | if (!node->can_change_signature |
| 5082 | || old_adj->op != IPA_PARAM_OP_COPY |
| 5083 | || (!known_csts[old_adj->base_index] |
| 5084 | && ipa_is_param_used (info, i: old_adj->base_index))) |
| 5085 | { |
| 5086 | ipa_adjusted_param new_adj = *old_adj; |
| 5087 | |
| 5088 | new_adj.prev_clone_adjustment = true; |
| 5089 | new_adj.prev_clone_index = i; |
| 5090 | vec_safe_push (v&: new_params, obj: new_adj); |
| 5091 | } |
| 5092 | } |
| 5093 | bool skip_return = old_adjustments->m_skip_return; |
| 5094 | new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ()) |
| 5095 | ipa_param_adjustments (new_params, count, |
| 5096 | skip_return)); |
| 5097 | } |
| 5098 | else if (node->can_change_signature |
| 5099 | && want_remove_some_param_p (node, known_csts)) |
| 5100 | { |
| 5101 | ipa_adjusted_param adj; |
| 5102 | memset (s: &adj, c: 0, n: sizeof (adj)); |
| 5103 | adj.op = IPA_PARAM_OP_COPY; |
| 5104 | for (i = 0; i < count; i++) |
| 5105 | if (!known_csts[i] && ipa_is_param_used (info, i)) |
| 5106 | { |
| 5107 | adj.base_index = i; |
| 5108 | adj.prev_clone_index = i; |
| 5109 | vec_safe_push (v&: new_params, obj: adj); |
| 5110 | } |
| 5111 | new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ()) |
| 5112 | ipa_param_adjustments (new_params, count, false)); |
| 5113 | } |
| 5114 | else |
| 5115 | new_adjustments = NULL; |
| 5116 | |
| 5117 | auto_vec<cgraph_edge *, 2> self_recursive_calls; |
| 5118 | for (i = callers.length () - 1; i >= 0; i--) |
| 5119 | { |
| 5120 | cgraph_edge *cs = callers[i]; |
| 5121 | if (cs->caller == node) |
| 5122 | { |
| 5123 | self_recursive_calls.safe_push (obj: cs); |
| 5124 | callers.unordered_remove (ix: i); |
| 5125 | } |
| 5126 | } |
| 5127 | replace_trees = cinfo ? vec_safe_copy (src: cinfo->tree_map) : NULL; |
| 5128 | for (i = 0; i < count; i++) |
| 5129 | { |
| 5130 | tree t = known_csts[i]; |
| 5131 | if (!t) |
| 5132 | continue; |
| 5133 | |
| 5134 | gcc_checking_assert (TREE_CODE (t) != TREE_BINFO); |
| 5135 | |
| 5136 | bool load_ref = false; |
| 5137 | symtab_node *ref_symbol; |
| 5138 | if (TREE_CODE (t) == ADDR_EXPR) |
| 5139 | { |
| 5140 | tree base = get_base_address (TREE_OPERAND (t, 0)); |
| 5141 | if (TREE_CODE (base) == VAR_DECL |
| 5142 | && ipa_get_controlled_uses (info, i) == 0 |
| 5143 | && ipa_get_param_load_dereferenced (info, i) |
| 5144 | && (ref_symbol = symtab_node::get (decl: base))) |
| 5145 | { |
| 5146 | load_ref = true; |
| 5147 | if (node->can_change_signature) |
| 5148 | for (cgraph_edge *caller : callers) |
| 5149 | adjust_references_in_caller (cs: caller, symbol: ref_symbol, index: i); |
| 5150 | } |
| 5151 | } |
| 5152 | |
| 5153 | ipa_replace_map *replace_map = get_replacement_map (info, value: t, parm_num: i, force_load_ref: load_ref); |
| 5154 | if (replace_map) |
| 5155 | vec_safe_push (v&: replace_trees, obj: replace_map); |
| 5156 | } |
| 5157 | |
| 5158 | unsigned &suffix_counter = clone_num_suffixes->get_or_insert ( |
| 5159 | IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME ( |
| 5160 | node->decl))); |
| 5161 | new_node = node->create_virtual_clone (redirect_callers: callers, tree_map: replace_trees, |
| 5162 | param_adjustments: new_adjustments, suffix: "constprop" , |
| 5163 | num_suffix: suffix_counter); |
| 5164 | suffix_counter++; |
| 5165 | |
| 5166 | bool have_self_recursive_calls = !self_recursive_calls.is_empty (); |
| 5167 | for (unsigned j = 0; j < self_recursive_calls.length (); j++) |
| 5168 | { |
| 5169 | cgraph_edge *cs = get_next_cgraph_edge_clone (cs: self_recursive_calls[j]); |
| 5170 | /* Cloned edges can disappear during cloning as speculation can be |
| 5171 | resolved, check that we have one and that it comes from the last |
| 5172 | cloning. */ |
| 5173 | if (cs && cs->caller == new_node) |
| 5174 | cs->redirect_callee_duplicating_thunks (n: new_node); |
| 5175 | /* Any future code that would make more than one clone of an outgoing |
| 5176 | edge would confuse this mechanism, so let's check that does not |
| 5177 | happen. */ |
| 5178 | gcc_checking_assert (!cs |
| 5179 | || !get_next_cgraph_edge_clone (cs) |
| 5180 | || get_next_cgraph_edge_clone (cs)->caller != new_node); |
| 5181 | } |
| 5182 | if (have_self_recursive_calls) |
| 5183 | new_node->expand_all_artificial_thunks (); |
| 5184 | |
| 5185 | ipa_set_node_agg_value_chain (node: new_node, aggs: aggvals); |
| 5186 | for (const ipa_argagg_value &av : aggvals) |
| 5187 | new_node->maybe_create_reference (val: av.value, NULL); |
| 5188 | |
| 5189 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 5190 | { |
| 5191 | fprintf (stream: dump_file, format: " the new node is %s.\n" , new_node->dump_name ()); |
| 5192 | if (known_contexts.exists ()) |
| 5193 | { |
| 5194 | for (i = 0; i < count; i++) |
| 5195 | if (!known_contexts[i].useless_p ()) |
| 5196 | { |
| 5197 | fprintf (stream: dump_file, format: " known ctx %i is " , i); |
| 5198 | known_contexts[i].dump (f: dump_file); |
| 5199 | } |
| 5200 | } |
| 5201 | if (aggvals) |
| 5202 | { |
| 5203 | fprintf (stream: dump_file, format: " Aggregate replacements:" ); |
| 5204 | ipa_argagg_value_list avs (aggvals); |
| 5205 | avs.dump (f: dump_file); |
| 5206 | } |
| 5207 | } |
| 5208 | |
| 5209 | new_info = ipa_node_params_sum->get (node: new_node); |
| 5210 | new_info->ipcp_orig_node = node; |
| 5211 | new_node->ipcp_clone = true; |
| 5212 | new_info->known_csts = known_csts; |
| 5213 | new_info->known_contexts = known_contexts; |
| 5214 | |
| 5215 | ipcp_discover_new_direct_edges (node: new_node, known_csts, known_contexts, |
| 5216 | aggvals); |
| 5217 | |
| 5218 | return new_node; |
| 5219 | } |
| 5220 | |
| 5221 | /* Return true if JFUNC, which describes a i-th parameter of call CS, is a |
| 5222 | pass-through function to itself when the cgraph_node involved is not an |
| 5223 | IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple |
| 5224 | no-operation pass-through. */ |
| 5225 | |
| 5226 | static bool |
| 5227 | self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i, |
| 5228 | bool simple = true) |
| 5229 | { |
| 5230 | enum availability availability; |
| 5231 | if (cs->caller == cs->callee->function_symbol (avail: &availability) |
| 5232 | && availability > AVAIL_INTERPOSABLE |
| 5233 | && jfunc->type == IPA_JF_PASS_THROUGH |
| 5234 | && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) |
| 5235 | && ipa_get_jf_pass_through_formal_id (jfunc) == i |
| 5236 | && ipa_node_params_sum->get (node: cs->caller) |
| 5237 | && !ipa_node_params_sum->get (node: cs->caller)->ipcp_orig_node) |
| 5238 | return true; |
| 5239 | return false; |
| 5240 | } |
| 5241 | |
| 5242 | /* Return true if JFUNC, which describes a part of an aggregate represented or |
| 5243 | pointed to by the i-th parameter of call CS, is a pass-through function to |
| 5244 | itself when the cgraph_node involved is not an IPA-CP clone.. When |
| 5245 | SIMPLE is true, further check if JFUNC is a simple no-operation |
| 5246 | pass-through. */ |
| 5247 | |
| 5248 | static bool |
| 5249 | self_recursive_agg_pass_through_p (const cgraph_edge *cs, |
| 5250 | const ipa_agg_jf_item *jfunc, |
| 5251 | int i, bool simple = true) |
| 5252 | { |
| 5253 | enum availability availability; |
| 5254 | if (cs->caller == cs->callee->function_symbol (avail: &availability) |
| 5255 | && availability > AVAIL_INTERPOSABLE |
| 5256 | && jfunc->jftype == IPA_JF_LOAD_AGG |
| 5257 | && jfunc->offset == jfunc->value.load_agg.offset |
| 5258 | && (!simple || jfunc->value.pass_through.operation == NOP_EXPR) |
| 5259 | && jfunc->value.pass_through.formal_id == i |
| 5260 | && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type) |
| 5261 | && ipa_node_params_sum->get (node: cs->caller) |
| 5262 | && !ipa_node_params_sum->get (node: cs->caller)->ipcp_orig_node) |
| 5263 | return true; |
| 5264 | return false; |
| 5265 | } |
| 5266 | |
| 5267 | /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in |
| 5268 | KNOWN_CSTS with constants that are also known for all of the CALLERS. */ |
| 5269 | |
| 5270 | static void |
| 5271 | find_more_scalar_values_for_callers_subset (struct cgraph_node *node, |
| 5272 | vec<tree> &known_csts, |
| 5273 | const vec<cgraph_edge *> &callers) |
| 5274 | { |
| 5275 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 5276 | int i, count = ipa_get_param_count (info); |
| 5277 | |
| 5278 | for (i = 0; i < count; i++) |
| 5279 | { |
| 5280 | struct cgraph_edge *cs; |
| 5281 | tree newval = NULL_TREE; |
| 5282 | int j; |
| 5283 | bool first = true; |
| 5284 | tree type = ipa_get_type (info, i); |
| 5285 | |
| 5286 | if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i]) |
| 5287 | continue; |
| 5288 | |
| 5289 | FOR_EACH_VEC_ELT (callers, j, cs) |
| 5290 | { |
| 5291 | struct ipa_jump_func *jump_func; |
| 5292 | tree t; |
| 5293 | |
| 5294 | ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs); |
| 5295 | if (!args |
| 5296 | || i >= ipa_get_cs_argument_count (args) |
| 5297 | || (i == 0 |
| 5298 | && call_passes_through_thunk (cs))) |
| 5299 | { |
| 5300 | newval = NULL_TREE; |
| 5301 | break; |
| 5302 | } |
| 5303 | jump_func = ipa_get_ith_jump_func (args, i); |
| 5304 | |
| 5305 | /* Besides simple pass-through jump function, arithmetic jump |
| 5306 | function could also introduce argument-direct-pass-through for |
| 5307 | self-feeding recursive call. For example, |
| 5308 | |
| 5309 | fn (int i) |
| 5310 | { |
| 5311 | fn (i & 1); |
| 5312 | } |
| 5313 | |
| 5314 | Given that i is 0, recursive propagation via (i & 1) also gets |
| 5315 | 0. */ |
| 5316 | if (self_recursive_pass_through_p (cs, jfunc: jump_func, i, simple: false)) |
| 5317 | { |
| 5318 | gcc_assert (newval); |
| 5319 | enum tree_code opcode |
| 5320 | = ipa_get_jf_pass_through_operation (jfunc: jump_func); |
| 5321 | tree op_type = (opcode == NOP_EXPR) ? NULL_TREE |
| 5322 | : ipa_get_jf_pass_through_op_type (jfunc: jump_func); |
| 5323 | t = ipa_get_jf_arith_result (opcode, input: newval, |
| 5324 | operand: ipa_get_jf_pass_through_operand (jfunc: jump_func), |
| 5325 | res_type: op_type); |
| 5326 | t = ipacp_value_safe_for_type (param_type: type, value: t); |
| 5327 | } |
| 5328 | else |
| 5329 | t = ipa_value_from_jfunc (info: ipa_node_params_sum->get (node: cs->caller), |
| 5330 | jfunc: jump_func, parm_type: type); |
| 5331 | if (!t |
| 5332 | || (newval |
| 5333 | && !values_equal_for_ipcp_p (x: t, y: newval)) |
| 5334 | || (!first && !newval)) |
| 5335 | { |
| 5336 | newval = NULL_TREE; |
| 5337 | break; |
| 5338 | } |
| 5339 | else |
| 5340 | newval = t; |
| 5341 | first = false; |
| 5342 | } |
| 5343 | |
| 5344 | if (newval) |
| 5345 | { |
| 5346 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 5347 | { |
| 5348 | fprintf (stream: dump_file, format: " adding an extra known scalar value " ); |
| 5349 | print_ipcp_constant_value (f: dump_file, v: newval); |
| 5350 | fprintf (stream: dump_file, format: " for " ); |
| 5351 | ipa_dump_param (dump_file, info, i); |
| 5352 | fprintf (stream: dump_file, format: "\n" ); |
| 5353 | } |
| 5354 | |
| 5355 | known_csts[i] = newval; |
| 5356 | } |
| 5357 | } |
| 5358 | } |
| 5359 | |
| 5360 | /* Given a NODE and a subset of its CALLERS, try to populate plank slots in |
| 5361 | KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the |
| 5362 | CALLERS. */ |
| 5363 | |
| 5364 | static void |
| 5365 | find_more_contexts_for_caller_subset (cgraph_node *node, |
| 5366 | vec<ipa_polymorphic_call_context> |
| 5367 | *known_contexts, |
| 5368 | const vec<cgraph_edge *> &callers) |
| 5369 | { |
| 5370 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 5371 | int i, count = ipa_get_param_count (info); |
| 5372 | |
| 5373 | for (i = 0; i < count; i++) |
| 5374 | { |
| 5375 | cgraph_edge *cs; |
| 5376 | |
| 5377 | if (ipa_get_poly_ctx_lat (info, i)->bottom |
| 5378 | || (known_contexts->exists () |
| 5379 | && !(*known_contexts)[i].useless_p ())) |
| 5380 | continue; |
| 5381 | |
| 5382 | ipa_polymorphic_call_context newval; |
| 5383 | bool first = true; |
| 5384 | int j; |
| 5385 | |
| 5386 | FOR_EACH_VEC_ELT (callers, j, cs) |
| 5387 | { |
| 5388 | ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs); |
| 5389 | if (!args |
| 5390 | || i >= ipa_get_cs_argument_count (args)) |
| 5391 | return; |
| 5392 | ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i); |
| 5393 | ipa_polymorphic_call_context ctx; |
| 5394 | ctx = ipa_context_from_jfunc (info: ipa_node_params_sum->get (node: cs->caller), |
| 5395 | cs, csidx: i, jfunc); |
| 5396 | if (first) |
| 5397 | { |
| 5398 | newval = ctx; |
| 5399 | first = false; |
| 5400 | } |
| 5401 | else |
| 5402 | newval.meet_with (ctx); |
| 5403 | if (newval.useless_p ()) |
| 5404 | break; |
| 5405 | } |
| 5406 | |
| 5407 | if (!newval.useless_p ()) |
| 5408 | { |
| 5409 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 5410 | { |
| 5411 | fprintf (stream: dump_file, format: " adding an extra known polymorphic " |
| 5412 | "context " ); |
| 5413 | print_ipcp_constant_value (f: dump_file, v: newval); |
| 5414 | fprintf (stream: dump_file, format: " for " ); |
| 5415 | ipa_dump_param (dump_file, info, i); |
| 5416 | fprintf (stream: dump_file, format: "\n" ); |
| 5417 | } |
| 5418 | |
| 5419 | if (!known_contexts->exists ()) |
| 5420 | known_contexts->safe_grow_cleared (len: ipa_get_param_count (info), |
| 5421 | exact: true); |
| 5422 | (*known_contexts)[i] = newval; |
| 5423 | } |
| 5424 | |
| 5425 | } |
| 5426 | } |
| 5427 | |
| 5428 | /* Push all aggregate values coming along edge CS for parameter number INDEX to |
| 5429 | RES. If INTERIM is non-NULL, it contains the current interim state of |
| 5430 | collected aggregate values which can be used to compute values passed over |
| 5431 | self-recursive edges. |
| 5432 | |
| 5433 | This basically one iteration of push_agg_values_from_edge over one |
| 5434 | parameter, which allows for simpler early returns. */ |
| 5435 | |
| 5436 | static void |
| 5437 | push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index, |
| 5438 | vec<ipa_argagg_value> *res, |
| 5439 | const ipa_argagg_value_list *interim) |
| 5440 | { |
| 5441 | bool agg_values_from_caller = false; |
| 5442 | bool agg_jf_preserved = false; |
| 5443 | unsigned unit_delta = UINT_MAX; |
| 5444 | int src_idx = -1; |
| 5445 | ipa_jump_func *jfunc = ipa_get_ith_jump_func (args: ipa_edge_args_sum->get (edge: cs), |
| 5446 | i: index); |
| 5447 | |
| 5448 | if (jfunc->type == IPA_JF_PASS_THROUGH |
| 5449 | && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) |
| 5450 | { |
| 5451 | agg_values_from_caller = true; |
| 5452 | agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc); |
| 5453 | src_idx = ipa_get_jf_pass_through_formal_id (jfunc); |
| 5454 | unit_delta = 0; |
| 5455 | } |
| 5456 | else if (jfunc->type == IPA_JF_ANCESTOR |
| 5457 | && ipa_get_jf_ancestor_agg_preserved (jfunc)) |
| 5458 | { |
| 5459 | agg_values_from_caller = true; |
| 5460 | agg_jf_preserved = true; |
| 5461 | src_idx = ipa_get_jf_ancestor_formal_id (jfunc); |
| 5462 | unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT; |
| 5463 | } |
| 5464 | |
| 5465 | ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 5466 | if (agg_values_from_caller) |
| 5467 | { |
| 5468 | if (caller_info->ipcp_orig_node) |
| 5469 | { |
| 5470 | struct cgraph_node *orig_node = caller_info->ipcp_orig_node; |
| 5471 | ipcp_transformation *ts |
| 5472 | = ipcp_get_transformation_summary (node: cs->caller); |
| 5473 | ipa_node_params *orig_info = ipa_node_params_sum->get (node: orig_node); |
| 5474 | ipcp_param_lattices *orig_plats |
| 5475 | = ipa_get_parm_lattices (info: orig_info, i: src_idx); |
| 5476 | if (ts |
| 5477 | && orig_plats->aggs |
| 5478 | && (agg_jf_preserved || !orig_plats->aggs_by_ref)) |
| 5479 | { |
| 5480 | ipa_argagg_value_list src (ts); |
| 5481 | src.push_adjusted_values (src_index: src_idx, dest_index: index, unit_delta, res); |
| 5482 | return; |
| 5483 | } |
| 5484 | } |
| 5485 | else |
| 5486 | { |
| 5487 | ipcp_param_lattices *src_plats |
| 5488 | = ipa_get_parm_lattices (info: caller_info, i: src_idx); |
| 5489 | if (src_plats->aggs |
| 5490 | && !src_plats->aggs_bottom |
| 5491 | && (agg_jf_preserved || !src_plats->aggs_by_ref)) |
| 5492 | { |
| 5493 | if (interim && self_recursive_pass_through_p (cs, jfunc, i: index)) |
| 5494 | { |
| 5495 | interim->push_adjusted_values (src_index: src_idx, dest_index: index, unit_delta, |
| 5496 | res); |
| 5497 | return; |
| 5498 | } |
| 5499 | if (!src_plats->aggs_contain_variable) |
| 5500 | { |
| 5501 | push_agg_values_from_plats (plats: src_plats, dest_index: index, unit_delta, |
| 5502 | res); |
| 5503 | return; |
| 5504 | } |
| 5505 | } |
| 5506 | } |
| 5507 | } |
| 5508 | |
| 5509 | if (!jfunc->agg.items) |
| 5510 | return; |
| 5511 | bool first = true; |
| 5512 | unsigned prev_unit_offset = 0; |
| 5513 | for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items) |
| 5514 | { |
| 5515 | tree value, srcvalue; |
| 5516 | /* Besides simple pass-through aggregate jump function, arithmetic |
| 5517 | aggregate jump function could also bring same aggregate value as |
| 5518 | parameter passed-in for self-feeding recursive call. For example, |
| 5519 | |
| 5520 | fn (int *i) |
| 5521 | { |
| 5522 | int j = *i & 1; |
| 5523 | fn (&j); |
| 5524 | } |
| 5525 | |
| 5526 | Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */ |
| 5527 | if (interim |
| 5528 | && self_recursive_agg_pass_through_p (cs, jfunc: &agg_jf, i: index, simple: false) |
| 5529 | && (srcvalue = interim->get_value(index, |
| 5530 | unit_offset: agg_jf.offset / BITS_PER_UNIT))) |
| 5531 | { |
| 5532 | value = ipa_get_jf_arith_result (opcode: agg_jf.value.pass_through.operation, |
| 5533 | input: srcvalue, |
| 5534 | operand: agg_jf.value.pass_through.operand, |
| 5535 | res_type: agg_jf.value.pass_through.op_type); |
| 5536 | value = ipacp_value_safe_for_type (param_type: agg_jf.type, value); |
| 5537 | } |
| 5538 | else |
| 5539 | value = ipa_agg_value_from_jfunc (info: caller_info, node: cs->caller, |
| 5540 | item: &agg_jf); |
| 5541 | if (value) |
| 5542 | { |
| 5543 | struct ipa_argagg_value iav; |
| 5544 | iav.value = value; |
| 5545 | iav.unit_offset = agg_jf.offset / BITS_PER_UNIT; |
| 5546 | iav.index = index; |
| 5547 | iav.by_ref = jfunc->agg.by_ref; |
| 5548 | iav.killed = false; |
| 5549 | |
| 5550 | gcc_assert (first |
| 5551 | || iav.unit_offset > prev_unit_offset); |
| 5552 | prev_unit_offset = iav.unit_offset; |
| 5553 | first = false; |
| 5554 | |
| 5555 | res->safe_push (obj: iav); |
| 5556 | } |
| 5557 | } |
| 5558 | return; |
| 5559 | } |
| 5560 | |
| 5561 | /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the |
| 5562 | description of ultimate callee of CS or the one it was cloned from (the |
| 5563 | summary where lattices are). If INTERIM is non-NULL, it contains the |
| 5564 | current interim state of collected aggregate values which can be used to |
| 5565 | compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION |
| 5566 | is true) and to skip values which clearly will not be part of intersection |
| 5567 | with INTERIM. */ |
| 5568 | |
| 5569 | static void |
| 5570 | push_agg_values_from_edge (struct cgraph_edge *cs, |
| 5571 | ipa_node_params *dest_info, |
| 5572 | vec<ipa_argagg_value> *res, |
| 5573 | const ipa_argagg_value_list *interim, |
| 5574 | bool optimize_self_recursion) |
| 5575 | { |
| 5576 | ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs); |
| 5577 | if (!args) |
| 5578 | return; |
| 5579 | |
| 5580 | int count = MIN (ipa_get_param_count (dest_info), |
| 5581 | ipa_get_cs_argument_count (args)); |
| 5582 | |
| 5583 | unsigned interim_index = 0; |
| 5584 | for (int index = 0; index < count; index++) |
| 5585 | { |
| 5586 | if (interim) |
| 5587 | { |
| 5588 | while (interim_index < interim->m_elts.size () |
| 5589 | && interim->m_elts[interim_index].value |
| 5590 | && interim->m_elts[interim_index].index < index) |
| 5591 | interim_index++; |
| 5592 | if (interim_index >= interim->m_elts.size () |
| 5593 | || interim->m_elts[interim_index].index > index) |
| 5594 | continue; |
| 5595 | } |
| 5596 | |
| 5597 | ipcp_param_lattices *plats = ipa_get_parm_lattices (info: dest_info, i: index); |
| 5598 | if (!ipa_is_param_used (info: dest_info, i: index) |
| 5599 | || plats->aggs_bottom) |
| 5600 | continue; |
| 5601 | push_agg_values_for_index_from_edge (cs, index, res, |
| 5602 | interim: optimize_self_recursion ? interim |
| 5603 | : NULL); |
| 5604 | } |
| 5605 | } |
| 5606 | |
| 5607 | |
| 5608 | /* Look at edges in CALLERS and collect all known aggregate values that arrive |
| 5609 | from all of them. Return nullptr if there are none. */ |
| 5610 | |
| 5611 | static struct vec<ipa_argagg_value, va_gc> * |
| 5612 | find_aggregate_values_for_callers_subset (struct cgraph_node *node, |
| 5613 | const vec<cgraph_edge *> &callers) |
| 5614 | { |
| 5615 | ipa_node_params *dest_info = ipa_node_params_sum->get (node); |
| 5616 | if (dest_info->ipcp_orig_node) |
| 5617 | dest_info = ipa_node_params_sum->get (node: dest_info->ipcp_orig_node); |
| 5618 | |
| 5619 | /* gather_edges_for_value puts a non-recursive call into the first element of |
| 5620 | callers if it can. */ |
| 5621 | auto_vec<ipa_argagg_value, 32> interim; |
| 5622 | push_agg_values_from_edge (cs: callers[0], dest_info, res: &interim, NULL, optimize_self_recursion: true); |
| 5623 | |
| 5624 | unsigned valid_entries = interim.length (); |
| 5625 | if (!valid_entries) |
| 5626 | return nullptr; |
| 5627 | |
| 5628 | unsigned caller_count = callers.length(); |
| 5629 | for (unsigned i = 1; i < caller_count; i++) |
| 5630 | { |
| 5631 | auto_vec<ipa_argagg_value, 32> last; |
| 5632 | ipa_argagg_value_list avs (&interim); |
| 5633 | push_agg_values_from_edge (cs: callers[i], dest_info, res: &last, interim: &avs, optimize_self_recursion: true); |
| 5634 | |
| 5635 | valid_entries = intersect_argaggs_with (elts&: interim, other: last); |
| 5636 | if (!valid_entries) |
| 5637 | return nullptr; |
| 5638 | } |
| 5639 | |
| 5640 | vec<ipa_argagg_value, va_gc> *res = NULL; |
| 5641 | vec_safe_reserve_exact (v&: res, nelems: valid_entries); |
| 5642 | for (const ipa_argagg_value &av : interim) |
| 5643 | if (av.value) |
| 5644 | res->quick_push(obj: av); |
| 5645 | gcc_checking_assert (res->length () == valid_entries); |
| 5646 | return res; |
| 5647 | } |
| 5648 | |
| 5649 | /* Determine whether CS also brings all scalar values that the NODE is |
| 5650 | specialized for. */ |
| 5651 | |
| 5652 | static bool |
| 5653 | cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs, |
| 5654 | struct cgraph_node *node) |
| 5655 | { |
| 5656 | ipa_node_params *dest_info = ipa_node_params_sum->get (node); |
| 5657 | int count = ipa_get_param_count (info: dest_info); |
| 5658 | class ipa_node_params *caller_info; |
| 5659 | class ipa_edge_args *args; |
| 5660 | int i; |
| 5661 | |
| 5662 | caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 5663 | args = ipa_edge_args_sum->get (edge: cs); |
| 5664 | for (i = 0; i < count; i++) |
| 5665 | { |
| 5666 | struct ipa_jump_func *jump_func; |
| 5667 | tree val, t; |
| 5668 | |
| 5669 | val = dest_info->known_csts[i]; |
| 5670 | if (!val) |
| 5671 | continue; |
| 5672 | |
| 5673 | if (i >= ipa_get_cs_argument_count (args)) |
| 5674 | return false; |
| 5675 | jump_func = ipa_get_ith_jump_func (args, i); |
| 5676 | t = ipa_value_from_jfunc (info: caller_info, jfunc: jump_func, |
| 5677 | parm_type: ipa_get_type (info: dest_info, i)); |
| 5678 | if (!t || !values_equal_for_ipcp_p (x: val, y: t)) |
| 5679 | return false; |
| 5680 | } |
| 5681 | return true; |
| 5682 | } |
| 5683 | |
| 5684 | /* Determine whether CS also brings all aggregate values that NODE is |
| 5685 | specialized for. */ |
| 5686 | |
| 5687 | static bool |
| 5688 | cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs, |
| 5689 | struct cgraph_node *node) |
| 5690 | { |
| 5691 | ipcp_transformation *ts = ipcp_get_transformation_summary (node); |
| 5692 | if (!ts || vec_safe_is_empty (v: ts->m_agg_values)) |
| 5693 | return true; |
| 5694 | |
| 5695 | const ipa_argagg_value_list existing (ts->m_agg_values); |
| 5696 | auto_vec<ipa_argagg_value, 32> edge_values; |
| 5697 | ipa_node_params *dest_info = ipa_node_params_sum->get (node); |
| 5698 | gcc_checking_assert (dest_info->ipcp_orig_node); |
| 5699 | dest_info = ipa_node_params_sum->get (node: dest_info->ipcp_orig_node); |
| 5700 | push_agg_values_from_edge (cs, dest_info, res: &edge_values, interim: &existing, optimize_self_recursion: false); |
| 5701 | const ipa_argagg_value_list avl (&edge_values); |
| 5702 | return avl.superset_of_p (other: existing); |
| 5703 | } |
| 5704 | |
| 5705 | /* Given an original NODE and a VAL for which we have already created a |
| 5706 | specialized clone, look whether there are incoming edges that still lead |
| 5707 | into the old node but now also bring the requested value and also conform to |
| 5708 | all other criteria such that they can be redirected the special node. |
| 5709 | This function can therefore redirect the final edge in a SCC. */ |
| 5710 | |
| 5711 | template <typename valtype> |
| 5712 | static void |
| 5713 | perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val) |
| 5714 | { |
| 5715 | ipcp_value_source<valtype> *src; |
| 5716 | profile_count redirected_sum = profile_count::zero (); |
| 5717 | |
| 5718 | for (src = val->sources; src; src = src->next) |
| 5719 | { |
| 5720 | struct cgraph_edge *cs = src->cs; |
| 5721 | while (cs) |
| 5722 | { |
| 5723 | if (cgraph_edge_brings_value_p (cs, src, node, val) |
| 5724 | && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node) |
| 5725 | && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node)) |
| 5726 | { |
| 5727 | if (dump_file) |
| 5728 | fprintf (dump_file, " - adding an extra caller %s of %s\n" , |
| 5729 | cs->caller->dump_name (), |
| 5730 | val->spec_node->dump_name ()); |
| 5731 | |
| 5732 | cs->redirect_callee_duplicating_thunks (n: val->spec_node); |
| 5733 | val->spec_node->expand_all_artificial_thunks (); |
| 5734 | if (cs->count.ipa ().initialized_p ()) |
| 5735 | redirected_sum = redirected_sum + cs->count.ipa (); |
| 5736 | } |
| 5737 | cs = get_next_cgraph_edge_clone (cs); |
| 5738 | } |
| 5739 | } |
| 5740 | |
| 5741 | if (redirected_sum.nonzero_p ()) |
| 5742 | update_specialized_profile (val->spec_node, node, redirected_sum); |
| 5743 | } |
| 5744 | |
| 5745 | /* Return true if KNOWN_CONTEXTS contain at least one useful context. */ |
| 5746 | |
| 5747 | static bool |
| 5748 | known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts) |
| 5749 | { |
| 5750 | ipa_polymorphic_call_context *ctx; |
| 5751 | int i; |
| 5752 | |
| 5753 | FOR_EACH_VEC_ELT (known_contexts, i, ctx) |
| 5754 | if (!ctx->useless_p ()) |
| 5755 | return true; |
| 5756 | return false; |
| 5757 | } |
| 5758 | |
| 5759 | /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */ |
| 5760 | |
| 5761 | static vec<ipa_polymorphic_call_context> |
| 5762 | copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts) |
| 5763 | { |
| 5764 | if (known_contexts_useful_p (known_contexts)) |
| 5765 | return known_contexts.copy (); |
| 5766 | else |
| 5767 | return vNULL; |
| 5768 | } |
| 5769 | |
| 5770 | /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy |
| 5771 | according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its |
| 5772 | copy too. */ |
| 5773 | |
| 5774 | static void |
| 5775 | copy_known_vectors_add_val (ipa_auto_call_arg_values *avals, |
| 5776 | vec<tree> *known_csts, |
| 5777 | vec<ipa_polymorphic_call_context> *known_contexts, |
| 5778 | ipcp_value<tree> *val, int index) |
| 5779 | { |
| 5780 | *known_csts = avals->m_known_vals.copy (); |
| 5781 | *known_contexts = copy_useful_known_contexts (known_contexts: avals->m_known_contexts); |
| 5782 | (*known_csts)[index] = val->value; |
| 5783 | } |
| 5784 | |
| 5785 | /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy |
| 5786 | contexts to KNOWN_CONTEXTS and modify the copy according to VAL and |
| 5787 | INDEX. */ |
| 5788 | |
| 5789 | static void |
| 5790 | copy_known_vectors_add_val (ipa_auto_call_arg_values *avals, |
| 5791 | vec<tree> *known_csts, |
| 5792 | vec<ipa_polymorphic_call_context> *known_contexts, |
| 5793 | ipcp_value<ipa_polymorphic_call_context> *val, |
| 5794 | int index) |
| 5795 | { |
| 5796 | *known_csts = avals->m_known_vals.copy (); |
| 5797 | *known_contexts = avals->m_known_contexts.copy (); |
| 5798 | (*known_contexts)[index] = val->value; |
| 5799 | } |
| 5800 | |
| 5801 | /* Return true if OFFSET indicates this was not an aggregate value or there is |
| 5802 | a replacement equivalent to VALUE, INDEX and OFFSET among those in the |
| 5803 | AGGVALS list. */ |
| 5804 | |
| 5805 | DEBUG_FUNCTION bool |
| 5806 | ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *aggvals, |
| 5807 | int index, HOST_WIDE_INT offset, tree value) |
| 5808 | { |
| 5809 | if (offset == -1) |
| 5810 | return true; |
| 5811 | |
| 5812 | const ipa_argagg_value_list avl (aggvals); |
| 5813 | tree v = avl.get_value (index, unit_offset: offset / BITS_PER_UNIT); |
| 5814 | return v && values_equal_for_ipcp_p (x: v, y: value); |
| 5815 | } |
| 5816 | |
| 5817 | /* Return true if offset is minus one because source of a polymorphic context |
| 5818 | cannot be an aggregate value. */ |
| 5819 | |
| 5820 | DEBUG_FUNCTION bool |
| 5821 | ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *, |
| 5822 | int , HOST_WIDE_INT offset, |
| 5823 | ipa_polymorphic_call_context) |
| 5824 | { |
| 5825 | return offset == -1; |
| 5826 | } |
| 5827 | |
| 5828 | /* Decide whether to create a special version of NODE for value VAL of |
| 5829 | parameter at the given INDEX. If OFFSET is -1, the value is for the |
| 5830 | parameter itself, otherwise it is stored at the given OFFSET of the |
| 5831 | parameter. AVALS describes the other already known values. SELF_GEN_CLONES |
| 5832 | is a vector which contains clones created for self-recursive calls with an |
| 5833 | arithmetic pass-through jump function. */ |
| 5834 | |
| 5835 | template <typename valtype> |
| 5836 | static bool |
| 5837 | decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset, |
| 5838 | ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals, |
| 5839 | vec<cgraph_node *> *self_gen_clones) |
| 5840 | { |
| 5841 | int caller_count; |
| 5842 | sreal freq_sum; |
| 5843 | profile_count count_sum, rec_count_sum; |
| 5844 | vec<cgraph_edge *> callers; |
| 5845 | bool called_without_ipa_profile; |
| 5846 | |
| 5847 | if (val->spec_node) |
| 5848 | { |
| 5849 | perhaps_add_new_callers (node, val); |
| 5850 | return false; |
| 5851 | } |
| 5852 | else if (val->local_size_cost + overall_size > get_max_overall_size (node)) |
| 5853 | { |
| 5854 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 5855 | fprintf (dump_file, " Ignoring candidate value because " |
| 5856 | "maximum unit size would be reached with %li.\n" , |
| 5857 | val->local_size_cost + overall_size); |
| 5858 | return false; |
| 5859 | } |
| 5860 | else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count, |
| 5861 | &rec_count_sum, &count_sum, |
| 5862 | &called_without_ipa_profile)) |
| 5863 | return false; |
| 5864 | |
| 5865 | if (!dbg_cnt (index: ipa_cp_values)) |
| 5866 | return false; |
| 5867 | |
| 5868 | if (val->self_recursion_generated_p ()) |
| 5869 | { |
| 5870 | /* The edge counts in this case might not have been adjusted yet. |
| 5871 | Nevertleless, even if they were it would be only a guesswork which we |
| 5872 | can do now. The recursive part of the counts can be derived from the |
| 5873 | count of the original node anyway. */ |
| 5874 | if (node->count.ipa ().nonzero_p ()) |
| 5875 | { |
| 5876 | unsigned dem = self_gen_clones->length () + 1; |
| 5877 | rec_count_sum = node->count.ipa () / dem; |
| 5878 | } |
| 5879 | else |
| 5880 | rec_count_sum = profile_count::zero (); |
| 5881 | } |
| 5882 | |
| 5883 | /* get_info_about_necessary_edges only sums up ipa counts. */ |
| 5884 | count_sum += rec_count_sum; |
| 5885 | |
| 5886 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 5887 | { |
| 5888 | fprintf (stream: dump_file, format: " - considering value " ); |
| 5889 | print_ipcp_constant_value (dump_file, val->value); |
| 5890 | fprintf (stream: dump_file, format: " for " ); |
| 5891 | ipa_dump_param (dump_file, info: ipa_node_params_sum->get (node), i: index); |
| 5892 | if (offset != -1) |
| 5893 | fprintf (stream: dump_file, format: ", offset: " HOST_WIDE_INT_PRINT_DEC, offset); |
| 5894 | fprintf (stream: dump_file, format: " (caller_count: %i)\n" , caller_count); |
| 5895 | } |
| 5896 | |
| 5897 | if (!good_cloning_opportunity_p (node, val->local_time_benefit, |
| 5898 | freq_sum, count_sum, |
| 5899 | val->local_size_cost, |
| 5900 | called_without_ipa_profile) |
| 5901 | && !good_cloning_opportunity_p (node, val->prop_time_benefit, |
| 5902 | freq_sum, count_sum, val->prop_size_cost, |
| 5903 | called_without_ipa_profile)) |
| 5904 | return false; |
| 5905 | |
| 5906 | if (dump_file) |
| 5907 | fprintf (stream: dump_file, format: " Creating a specialized node of %s.\n" , |
| 5908 | node->dump_name ()); |
| 5909 | |
| 5910 | vec<tree> known_csts; |
| 5911 | vec<ipa_polymorphic_call_context> known_contexts; |
| 5912 | |
| 5913 | callers = gather_edges_for_value (val, node, caller_count); |
| 5914 | if (offset == -1) |
| 5915 | copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index); |
| 5916 | else |
| 5917 | { |
| 5918 | known_csts = avals->m_known_vals.copy (); |
| 5919 | known_contexts = copy_useful_known_contexts (known_contexts: avals->m_known_contexts); |
| 5920 | } |
| 5921 | find_more_scalar_values_for_callers_subset (node, known_csts, callers); |
| 5922 | find_more_contexts_for_caller_subset (node, known_contexts: &known_contexts, callers); |
| 5923 | vec<ipa_argagg_value, va_gc> *aggvals |
| 5924 | = find_aggregate_values_for_callers_subset (node, callers); |
| 5925 | gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index, |
| 5926 | offset, val->value)); |
| 5927 | val->spec_node = create_specialized_node (node, known_csts, known_contexts, |
| 5928 | aggvals, callers); |
| 5929 | |
| 5930 | if (val->self_recursion_generated_p ()) |
| 5931 | self_gen_clones->safe_push (obj: val->spec_node); |
| 5932 | else |
| 5933 | update_profiling_info (node, val->spec_node); |
| 5934 | |
| 5935 | callers.release (); |
| 5936 | overall_size += val->local_size_cost; |
| 5937 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 5938 | fprintf (stream: dump_file, format: " overall size reached %li\n" , |
| 5939 | overall_size); |
| 5940 | |
| 5941 | /* TODO: If for some lattice there is only one other known value |
| 5942 | left, make a special node for it too. */ |
| 5943 | |
| 5944 | return true; |
| 5945 | } |
| 5946 | |
| 5947 | /* Like irange::contains_p(), but convert VAL to the range of R if |
| 5948 | necessary. */ |
| 5949 | |
| 5950 | static inline bool |
| 5951 | ipa_range_contains_p (const vrange &r, tree val) |
| 5952 | { |
| 5953 | if (r.undefined_p ()) |
| 5954 | return false; |
| 5955 | |
| 5956 | tree type = r.type (); |
| 5957 | if (!wi::fits_to_tree_p (x: wi::to_wide (t: val), type)) |
| 5958 | return false; |
| 5959 | |
| 5960 | val = fold_convert (type, val); |
| 5961 | return r.contains_p (cst: val); |
| 5962 | } |
| 5963 | |
| 5964 | /* Decide whether and what specialized clones of NODE should be created. */ |
| 5965 | |
| 5966 | static bool |
| 5967 | decide_whether_version_node (struct cgraph_node *node) |
| 5968 | { |
| 5969 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 5970 | int i, count = ipa_get_param_count (info); |
| 5971 | bool ret = false; |
| 5972 | |
| 5973 | if (count == 0) |
| 5974 | return false; |
| 5975 | |
| 5976 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 5977 | fprintf (stream: dump_file, format: "\nEvaluating opportunities for %s.\n" , |
| 5978 | node->dump_name ()); |
| 5979 | |
| 5980 | auto_vec <cgraph_node *, 9> self_gen_clones; |
| 5981 | ipa_auto_call_arg_values avals; |
| 5982 | gather_context_independent_values (info, avals: &avals, calculate_aggs: false, NULL); |
| 5983 | |
| 5984 | for (i = 0; i < count;i++) |
| 5985 | { |
| 5986 | if (!ipa_is_param_used (info, i)) |
| 5987 | continue; |
| 5988 | |
| 5989 | class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); |
| 5990 | ipcp_lattice<tree> *lat = &plats->itself; |
| 5991 | ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat; |
| 5992 | |
| 5993 | if (!lat->bottom |
| 5994 | && !avals.m_known_vals[i]) |
| 5995 | { |
| 5996 | ipcp_value<tree> *val; |
| 5997 | for (val = lat->values; val; val = val->next) |
| 5998 | { |
| 5999 | /* If some values generated for self-recursive calls with |
| 6000 | arithmetic jump functions fall outside of the known |
| 6001 | range for the parameter, we can skip them. */ |
| 6002 | if (TREE_CODE (val->value) == INTEGER_CST |
| 6003 | && !plats->m_value_range.bottom_p () |
| 6004 | && !ipa_range_contains_p (r: plats->m_value_range.m_vr, |
| 6005 | val: val->value)) |
| 6006 | { |
| 6007 | /* This can happen also if a constant present in the source |
| 6008 | code falls outside of the range of parameter's type, so we |
| 6009 | cannot assert. */ |
| 6010 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 6011 | { |
| 6012 | fprintf (stream: dump_file, format: " - skipping%s value " , |
| 6013 | val->self_recursion_generated_p () |
| 6014 | ? " self_recursion_generated" : "" ); |
| 6015 | print_ipcp_constant_value (f: dump_file, v: val->value); |
| 6016 | fprintf (stream: dump_file, format: " because it is outside known " |
| 6017 | "value range.\n" ); |
| 6018 | } |
| 6019 | continue; |
| 6020 | } |
| 6021 | ret |= decide_about_value (node, index: i, offset: -1, val, avals: &avals, |
| 6022 | self_gen_clones: &self_gen_clones); |
| 6023 | } |
| 6024 | } |
| 6025 | |
| 6026 | if (!plats->aggs_bottom) |
| 6027 | { |
| 6028 | struct ipcp_agg_lattice *aglat; |
| 6029 | ipcp_value<tree> *val; |
| 6030 | for (aglat = plats->aggs; aglat; aglat = aglat->next) |
| 6031 | if (!aglat->bottom && aglat->values |
| 6032 | /* If the following is false, the one value has been considered |
| 6033 | for cloning for all contexts. */ |
| 6034 | && (plats->aggs_contain_variable |
| 6035 | || !aglat->is_single_const ())) |
| 6036 | for (val = aglat->values; val; val = val->next) |
| 6037 | ret |= decide_about_value (node, index: i, offset: aglat->offset, val, avals: &avals, |
| 6038 | self_gen_clones: &self_gen_clones); |
| 6039 | } |
| 6040 | |
| 6041 | if (!ctxlat->bottom |
| 6042 | && avals.m_known_contexts[i].useless_p ()) |
| 6043 | { |
| 6044 | ipcp_value<ipa_polymorphic_call_context> *val; |
| 6045 | for (val = ctxlat->values; val; val = val->next) |
| 6046 | ret |= decide_about_value (node, index: i, offset: -1, val, avals: &avals, |
| 6047 | self_gen_clones: &self_gen_clones); |
| 6048 | } |
| 6049 | } |
| 6050 | |
| 6051 | if (!self_gen_clones.is_empty ()) |
| 6052 | { |
| 6053 | self_gen_clones.safe_push (obj: node); |
| 6054 | update_counts_for_self_gen_clones (orig_node: node, self_gen_clones); |
| 6055 | } |
| 6056 | |
| 6057 | if (info->do_clone_for_all_contexts) |
| 6058 | { |
| 6059 | if (!dbg_cnt (index: ipa_cp_values)) |
| 6060 | { |
| 6061 | info->do_clone_for_all_contexts = false; |
| 6062 | return ret; |
| 6063 | } |
| 6064 | |
| 6065 | struct cgraph_node *clone; |
| 6066 | auto_vec<cgraph_edge *> callers = node->collect_callers (); |
| 6067 | |
| 6068 | for (int i = callers.length () - 1; i >= 0; i--) |
| 6069 | { |
| 6070 | cgraph_edge *cs = callers[i]; |
| 6071 | ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 6072 | |
| 6073 | if (caller_info && caller_info->node_dead) |
| 6074 | callers.unordered_remove (ix: i); |
| 6075 | } |
| 6076 | |
| 6077 | if (!adjust_callers_for_value_intersection (callers, node)) |
| 6078 | { |
| 6079 | /* If node is not called by anyone, or all its caller edges are |
| 6080 | self-recursive, the node is not really in use, no need to do |
| 6081 | cloning. */ |
| 6082 | info->do_clone_for_all_contexts = false; |
| 6083 | return ret; |
| 6084 | } |
| 6085 | |
| 6086 | if (dump_file) |
| 6087 | fprintf (stream: dump_file, format: " - Creating a specialized node of %s " |
| 6088 | "for all known contexts.\n" , node->dump_name ()); |
| 6089 | |
| 6090 | vec<tree> known_csts = avals.m_known_vals.copy (); |
| 6091 | vec<ipa_polymorphic_call_context> known_contexts |
| 6092 | = copy_useful_known_contexts (known_contexts: avals.m_known_contexts); |
| 6093 | find_more_scalar_values_for_callers_subset (node, known_csts, callers); |
| 6094 | find_more_contexts_for_caller_subset (node, known_contexts: &known_contexts, callers); |
| 6095 | vec<ipa_argagg_value, va_gc> *aggvals |
| 6096 | = find_aggregate_values_for_callers_subset (node, callers); |
| 6097 | |
| 6098 | if (!known_contexts_useful_p (known_contexts)) |
| 6099 | { |
| 6100 | known_contexts.release (); |
| 6101 | known_contexts = vNULL; |
| 6102 | } |
| 6103 | clone = create_specialized_node (node, known_csts, known_contexts, |
| 6104 | aggvals, callers); |
| 6105 | info->do_clone_for_all_contexts = false; |
| 6106 | ipa_node_params_sum->get (node: clone)->is_all_contexts_clone = true; |
| 6107 | ret = true; |
| 6108 | } |
| 6109 | |
| 6110 | return ret; |
| 6111 | } |
| 6112 | |
| 6113 | /* Transitively mark all callees of NODE within the same SCC as not dead. */ |
| 6114 | |
| 6115 | static void |
| 6116 | spread_undeadness (struct cgraph_node *node) |
| 6117 | { |
| 6118 | struct cgraph_edge *cs; |
| 6119 | |
| 6120 | for (cs = node->callees; cs; cs = cs->next_callee) |
| 6121 | if (ipa_edge_within_scc (cs)) |
| 6122 | { |
| 6123 | struct cgraph_node *callee; |
| 6124 | class ipa_node_params *info; |
| 6125 | |
| 6126 | callee = cs->callee->function_symbol (NULL); |
| 6127 | info = ipa_node_params_sum->get (node: callee); |
| 6128 | |
| 6129 | if (info && info->node_dead) |
| 6130 | { |
| 6131 | info->node_dead = 0; |
| 6132 | spread_undeadness (node: callee); |
| 6133 | } |
| 6134 | } |
| 6135 | } |
| 6136 | |
| 6137 | /* Return true if NODE has a caller from outside of its SCC that is not |
| 6138 | dead. Worker callback for cgraph_for_node_and_aliases. */ |
| 6139 | |
| 6140 | static bool |
| 6141 | has_undead_caller_from_outside_scc_p (struct cgraph_node *node, |
| 6142 | void *data ATTRIBUTE_UNUSED) |
| 6143 | { |
| 6144 | struct cgraph_edge *cs; |
| 6145 | |
| 6146 | for (cs = node->callers; cs; cs = cs->next_caller) |
| 6147 | if (cs->caller->thunk |
| 6148 | && cs->caller->call_for_symbol_thunks_and_aliases |
| 6149 | (callback: has_undead_caller_from_outside_scc_p, NULL, include_overwritable: true)) |
| 6150 | return true; |
| 6151 | else if (!ipa_edge_within_scc (cs)) |
| 6152 | { |
| 6153 | ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller); |
| 6154 | if (!caller_info /* Unoptimized caller are like dead ones. */ |
| 6155 | || !caller_info->node_dead) |
| 6156 | return true; |
| 6157 | } |
| 6158 | return false; |
| 6159 | } |
| 6160 | |
| 6161 | |
| 6162 | /* Identify nodes within the same SCC as NODE which are no longer needed |
| 6163 | because of new clones and will be removed as unreachable. */ |
| 6164 | |
| 6165 | static void |
| 6166 | identify_dead_nodes (struct cgraph_node *node) |
| 6167 | { |
| 6168 | struct cgraph_node *v; |
| 6169 | for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) |
| 6170 | if (v->local) |
| 6171 | { |
| 6172 | ipa_node_params *info = ipa_node_params_sum->get (node: v); |
| 6173 | if (info |
| 6174 | && !v->call_for_symbol_thunks_and_aliases |
| 6175 | (callback: has_undead_caller_from_outside_scc_p, NULL, include_overwritable: true)) |
| 6176 | info->node_dead = 1; |
| 6177 | } |
| 6178 | |
| 6179 | for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) |
| 6180 | { |
| 6181 | ipa_node_params *info = ipa_node_params_sum->get (node: v); |
| 6182 | if (info && !info->node_dead) |
| 6183 | spread_undeadness (node: v); |
| 6184 | } |
| 6185 | |
| 6186 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 6187 | { |
| 6188 | for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) |
| 6189 | if (ipa_node_params_sum->get (node: v) |
| 6190 | && ipa_node_params_sum->get (node: v)->node_dead) |
| 6191 | fprintf (stream: dump_file, format: " Marking node as dead: %s.\n" , |
| 6192 | v->dump_name ()); |
| 6193 | } |
| 6194 | } |
| 6195 | |
| 6196 | /* The decision stage. Iterate over the topological order of call graph nodes |
| 6197 | TOPO and make specialized clones if deemed beneficial. */ |
| 6198 | |
| 6199 | static void |
| 6200 | ipcp_decision_stage (class ipa_topo_info *topo) |
| 6201 | { |
| 6202 | int i; |
| 6203 | |
| 6204 | if (dump_file) |
| 6205 | fprintf (stream: dump_file, format: "\nIPA decision stage:\n\n" ); |
| 6206 | |
| 6207 | for (i = topo->nnodes - 1; i >= 0; i--) |
| 6208 | { |
| 6209 | struct cgraph_node *node = topo->order[i]; |
| 6210 | bool change = false, iterate = true; |
| 6211 | |
| 6212 | while (iterate) |
| 6213 | { |
| 6214 | struct cgraph_node *v; |
| 6215 | iterate = false; |
| 6216 | for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) |
| 6217 | if (v->has_gimple_body_p () |
| 6218 | && ipcp_versionable_function_p (node: v)) |
| 6219 | iterate |= decide_whether_version_node (node: v); |
| 6220 | |
| 6221 | change |= iterate; |
| 6222 | } |
| 6223 | if (change) |
| 6224 | identify_dead_nodes (node); |
| 6225 | } |
| 6226 | } |
| 6227 | |
| 6228 | /* Look up all VR and bits information that we have discovered and copy it |
| 6229 | over to the transformation summary. */ |
| 6230 | |
| 6231 | static void |
| 6232 | ipcp_store_vr_results (void) |
| 6233 | { |
| 6234 | cgraph_node *node; |
| 6235 | |
| 6236 | FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) |
| 6237 | { |
| 6238 | ipa_node_params *info = ipa_node_params_sum->get (node); |
| 6239 | bool dumped_sth = false; |
| 6240 | bool found_useful_result = false; |
| 6241 | bool do_vr = true; |
| 6242 | bool do_bits = true; |
| 6243 | |
| 6244 | if (!info || !opt_for_fn (node->decl, flag_ipa_vrp)) |
| 6245 | { |
| 6246 | if (dump_file) |
| 6247 | fprintf (stream: dump_file, format: "Not considering %s for VR discovery " |
| 6248 | "and propagate; -fipa-ipa-vrp: disabled.\n" , |
| 6249 | node->dump_name ()); |
| 6250 | do_vr = false; |
| 6251 | } |
| 6252 | if (!info || !opt_for_fn (node->decl, flag_ipa_bit_cp)) |
| 6253 | { |
| 6254 | if (dump_file) |
| 6255 | fprintf (stream: dump_file, format: "Not considering %s for ipa bitwise " |
| 6256 | "propagation ; -fipa-bit-cp: disabled.\n" , |
| 6257 | node->dump_name ()); |
| 6258 | do_bits = false; |
| 6259 | } |
| 6260 | if (!do_bits && !do_vr) |
| 6261 | continue; |
| 6262 | |
| 6263 | if (info->ipcp_orig_node) |
| 6264 | info = ipa_node_params_sum->get (node: info->ipcp_orig_node); |
| 6265 | if (info->lattices.is_empty ()) |
| 6266 | /* Newly expanded artificial thunks do not have lattices. */ |
| 6267 | continue; |
| 6268 | |
| 6269 | unsigned count = ipa_get_param_count (info); |
| 6270 | for (unsigned i = 0; i < count; i++) |
| 6271 | { |
| 6272 | ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); |
| 6273 | if (do_vr |
| 6274 | && !plats->m_value_range.bottom_p () |
| 6275 | && !plats->m_value_range.top_p ()) |
| 6276 | { |
| 6277 | found_useful_result = true; |
| 6278 | break; |
| 6279 | } |
| 6280 | if (do_bits && plats->bits_lattice.constant_p ()) |
| 6281 | { |
| 6282 | found_useful_result = true; |
| 6283 | break; |
| 6284 | } |
| 6285 | } |
| 6286 | if (!found_useful_result) |
| 6287 | continue; |
| 6288 | |
| 6289 | ipcp_transformation_initialize (); |
| 6290 | ipcp_transformation *ts = ipcp_transformation_sum->get_create (node); |
| 6291 | vec_safe_reserve_exact (v&: ts->m_vr, nelems: count); |
| 6292 | |
| 6293 | for (unsigned i = 0; i < count; i++) |
| 6294 | { |
| 6295 | ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); |
| 6296 | ipcp_bits_lattice *bits = NULL; |
| 6297 | |
| 6298 | if (do_bits |
| 6299 | && plats->bits_lattice.constant_p () |
| 6300 | && dbg_cnt (index: ipa_cp_bits)) |
| 6301 | bits = &plats->bits_lattice; |
| 6302 | |
| 6303 | if (do_vr |
| 6304 | && !plats->m_value_range.bottom_p () |
| 6305 | && !plats->m_value_range.top_p () |
| 6306 | && dbg_cnt (index: ipa_cp_vr)) |
| 6307 | { |
| 6308 | if (bits) |
| 6309 | { |
| 6310 | value_range tmp = plats->m_value_range.m_vr; |
| 6311 | tree type = ipa_get_type (info, i); |
| 6312 | irange_bitmask bm (wide_int::from (x: bits->get_value (), |
| 6313 | TYPE_PRECISION (type), |
| 6314 | TYPE_SIGN (type)), |
| 6315 | wide_int::from (x: bits->get_mask (), |
| 6316 | TYPE_PRECISION (type), |
| 6317 | TYPE_SIGN (type))); |
| 6318 | tmp.update_bitmask (bm); |
| 6319 | // Reflecting the bitmask on the ranges can sometime |
| 6320 | // produce an UNDEFINED value if the the bitmask update |
| 6321 | // was previously deferred. See PR 120048. |
| 6322 | if (tmp.undefined_p ()) |
| 6323 | tmp.set_varying (type); |
| 6324 | ipa_vr vr (tmp); |
| 6325 | ts->m_vr->quick_push (obj: vr); |
| 6326 | } |
| 6327 | else |
| 6328 | { |
| 6329 | ipa_vr vr (plats->m_value_range.m_vr); |
| 6330 | ts->m_vr->quick_push (obj: vr); |
| 6331 | } |
| 6332 | } |
| 6333 | else if (bits) |
| 6334 | { |
| 6335 | tree type = ipa_get_type (info, i); |
| 6336 | value_range tmp; |
| 6337 | tmp.set_varying (type); |
| 6338 | irange_bitmask bm (wide_int::from (x: bits->get_value (), |
| 6339 | TYPE_PRECISION (type), |
| 6340 | TYPE_SIGN (type)), |
| 6341 | wide_int::from (x: bits->get_mask (), |
| 6342 | TYPE_PRECISION (type), |
| 6343 | TYPE_SIGN (type))); |
| 6344 | tmp.update_bitmask (bm); |
| 6345 | // Reflecting the bitmask on the ranges can sometime |
| 6346 | // produce an UNDEFINED value if the the bitmask update |
| 6347 | // was previously deferred. See PR 120048. |
| 6348 | if (tmp.undefined_p ()) |
| 6349 | tmp.set_varying (type); |
| 6350 | ipa_vr vr (tmp); |
| 6351 | ts->m_vr->quick_push (obj: vr); |
| 6352 | } |
| 6353 | else |
| 6354 | { |
| 6355 | ipa_vr vr; |
| 6356 | ts->m_vr->quick_push (obj: vr); |
| 6357 | } |
| 6358 | |
| 6359 | if (!dump_file || !bits) |
| 6360 | continue; |
| 6361 | |
| 6362 | if (!dumped_sth) |
| 6363 | { |
| 6364 | fprintf (stream: dump_file, format: "Propagated bits info for function %s:\n" , |
| 6365 | node->dump_name ()); |
| 6366 | dumped_sth = true; |
| 6367 | } |
| 6368 | fprintf (stream: dump_file, format: " param %i: value = " , i); |
| 6369 | ipcp_print_widest_int (f: dump_file, value: bits->get_value ()); |
| 6370 | fprintf (stream: dump_file, format: ", mask = " ); |
| 6371 | ipcp_print_widest_int (f: dump_file, value: bits->get_mask ()); |
| 6372 | fprintf (stream: dump_file, format: "\n" ); |
| 6373 | } |
| 6374 | } |
| 6375 | } |
| 6376 | |
| 6377 | /* The IPCP driver. */ |
| 6378 | |
| 6379 | static unsigned int |
| 6380 | ipcp_driver (void) |
| 6381 | { |
| 6382 | class ipa_topo_info topo; |
| 6383 | |
| 6384 | if (edge_clone_summaries == NULL) |
| 6385 | edge_clone_summaries = new edge_clone_summary_t (symtab); |
| 6386 | |
| 6387 | ipa_check_create_node_params (); |
| 6388 | ipa_check_create_edge_args (); |
| 6389 | clone_num_suffixes = new hash_map<const char *, unsigned>; |
| 6390 | |
| 6391 | if (dump_file) |
| 6392 | { |
| 6393 | fprintf (stream: dump_file, format: "\nIPA structures before propagation:\n" ); |
| 6394 | if (dump_flags & TDF_DETAILS) |
| 6395 | ipa_print_all_params (dump_file); |
| 6396 | ipa_print_all_jump_functions (f: dump_file); |
| 6397 | } |
| 6398 | |
| 6399 | /* Topological sort. */ |
| 6400 | build_toporder_info (topo: &topo); |
| 6401 | /* Do the interprocedural propagation. */ |
| 6402 | ipcp_propagate_stage (topo: &topo); |
| 6403 | /* Decide what constant propagation and cloning should be performed. */ |
| 6404 | ipcp_decision_stage (topo: &topo); |
| 6405 | /* Store results of value range and bits propagation. */ |
| 6406 | ipcp_store_vr_results (); |
| 6407 | |
| 6408 | /* Free all IPCP structures. */ |
| 6409 | delete clone_num_suffixes; |
| 6410 | free_toporder_info (topo: &topo); |
| 6411 | delete edge_clone_summaries; |
| 6412 | edge_clone_summaries = NULL; |
| 6413 | ipa_free_all_structures_after_ipa_cp (); |
| 6414 | if (dump_file) |
| 6415 | fprintf (stream: dump_file, format: "\nIPA constant propagation end\n" ); |
| 6416 | return 0; |
| 6417 | } |
| 6418 | |
| 6419 | /* Initialization and computation of IPCP data structures. This is the initial |
| 6420 | intraprocedural analysis of functions, which gathers information to be |
| 6421 | propagated later on. */ |
| 6422 | |
| 6423 | static void |
| 6424 | ipcp_generate_summary (void) |
| 6425 | { |
| 6426 | struct cgraph_node *node; |
| 6427 | |
| 6428 | if (dump_file) |
| 6429 | fprintf (stream: dump_file, format: "\nIPA constant propagation start:\n" ); |
| 6430 | ipa_register_cgraph_hooks (); |
| 6431 | |
| 6432 | FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) |
| 6433 | ipa_analyze_node (node); |
| 6434 | } |
| 6435 | |
| 6436 | namespace { |
| 6437 | |
| 6438 | const pass_data pass_data_ipa_cp = |
| 6439 | { |
| 6440 | .type: IPA_PASS, /* type */ |
| 6441 | .name: "cp" , /* name */ |
| 6442 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
| 6443 | .tv_id: TV_IPA_CONSTANT_PROP, /* tv_id */ |
| 6444 | .properties_required: 0, /* properties_required */ |
| 6445 | .properties_provided: 0, /* properties_provided */ |
| 6446 | .properties_destroyed: 0, /* properties_destroyed */ |
| 6447 | .todo_flags_start: 0, /* todo_flags_start */ |
| 6448 | .todo_flags_finish: ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */ |
| 6449 | }; |
| 6450 | |
| 6451 | class pass_ipa_cp : public ipa_opt_pass_d |
| 6452 | { |
| 6453 | public: |
| 6454 | pass_ipa_cp (gcc::context *ctxt) |
| 6455 | : ipa_opt_pass_d (pass_data_ipa_cp, ctxt, |
| 6456 | ipcp_generate_summary, /* generate_summary */ |
| 6457 | NULL, /* write_summary */ |
| 6458 | NULL, /* read_summary */ |
| 6459 | ipcp_write_transformation_summaries, /* |
| 6460 | write_optimization_summary */ |
| 6461 | ipcp_read_transformation_summaries, /* |
| 6462 | read_optimization_summary */ |
| 6463 | NULL, /* stmt_fixup */ |
| 6464 | 0, /* function_transform_todo_flags_start */ |
| 6465 | ipcp_transform_function, /* function_transform */ |
| 6466 | NULL) /* variable_transform */ |
| 6467 | {} |
| 6468 | |
| 6469 | /* opt_pass methods: */ |
| 6470 | bool gate (function *) final override |
| 6471 | { |
| 6472 | /* FIXME: We should remove the optimize check after we ensure we never run |
| 6473 | IPA passes when not optimizing. */ |
| 6474 | return (flag_ipa_cp && optimize) || in_lto_p; |
| 6475 | } |
| 6476 | |
| 6477 | unsigned int execute (function *) final override { return ipcp_driver (); } |
| 6478 | |
| 6479 | }; // class pass_ipa_cp |
| 6480 | |
| 6481 | } // anon namespace |
| 6482 | |
| 6483 | ipa_opt_pass_d * |
| 6484 | make_pass_ipa_cp (gcc::context *ctxt) |
| 6485 | { |
| 6486 | return new pass_ipa_cp (ctxt); |
| 6487 | } |
| 6488 | |
| 6489 | /* Reset all state within ipa-cp.cc so that we can rerun the compiler |
| 6490 | within the same process. For use by toplev::finalize. */ |
| 6491 | |
| 6492 | void |
| 6493 | ipa_cp_cc_finalize (void) |
| 6494 | { |
| 6495 | overall_size = 0; |
| 6496 | orig_overall_size = 0; |
| 6497 | ipcp_free_transformation_sum (); |
| 6498 | } |
| 6499 | |
| 6500 | /* Given PARAM which must be a parameter of function FNDECL described by THIS, |
| 6501 | return its index in the DECL_ARGUMENTS chain, using a pre-computed |
| 6502 | DECL_UID-sorted vector if available (which is pre-computed only if there are |
| 6503 | many parameters). Can return -1 if param is static chain not represented |
| 6504 | among DECL_ARGUMENTS. */ |
| 6505 | |
| 6506 | int |
| 6507 | ipcp_transformation::get_param_index (const_tree fndecl, const_tree param) const |
| 6508 | { |
| 6509 | gcc_assert (TREE_CODE (param) == PARM_DECL); |
| 6510 | if (m_uid_to_idx) |
| 6511 | { |
| 6512 | unsigned puid = DECL_UID (param); |
| 6513 | const ipa_uid_to_idx_map_elt *res |
| 6514 | = std::lower_bound (first: m_uid_to_idx->begin(), last: m_uid_to_idx->end (), val: puid, |
| 6515 | comp: [] (const ipa_uid_to_idx_map_elt &elt, unsigned uid) |
| 6516 | { |
| 6517 | return elt.uid < uid; |
| 6518 | }); |
| 6519 | if (res == m_uid_to_idx->end () |
| 6520 | || res->uid != puid) |
| 6521 | { |
| 6522 | gcc_assert (DECL_STATIC_CHAIN (fndecl)); |
| 6523 | return -1; |
| 6524 | } |
| 6525 | return res->index; |
| 6526 | } |
| 6527 | |
| 6528 | unsigned index = 0; |
| 6529 | for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++) |
| 6530 | if (p == param) |
| 6531 | return (int) index; |
| 6532 | |
| 6533 | gcc_assert (DECL_STATIC_CHAIN (fndecl)); |
| 6534 | return -1; |
| 6535 | } |
| 6536 | |
| 6537 | /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements |
| 6538 | according to the uid. */ |
| 6539 | |
| 6540 | static int |
| 6541 | compare_uids (const void *a, const void *b) |
| 6542 | { |
| 6543 | const ipa_uid_to_idx_map_elt *e1 = (const ipa_uid_to_idx_map_elt *) a; |
| 6544 | const ipa_uid_to_idx_map_elt *e2 = (const ipa_uid_to_idx_map_elt *) b; |
| 6545 | if (e1->uid < e2->uid) |
| 6546 | return -1; |
| 6547 | if (e1->uid > e2->uid) |
| 6548 | return 1; |
| 6549 | gcc_unreachable (); |
| 6550 | } |
| 6551 | |
| 6552 | /* Assuming THIS describes FNDECL and it has sufficiently many parameters to |
| 6553 | justify the overhead, create a DECL_UID-sorted vector to speed up mapping |
| 6554 | from parameters to their indices in DECL_ARGUMENTS chain. */ |
| 6555 | |
| 6556 | void |
| 6557 | ipcp_transformation::maybe_create_parm_idx_map (tree fndecl) |
| 6558 | { |
| 6559 | int c = count_formal_params (fndecl); |
| 6560 | if (c < 32) |
| 6561 | return; |
| 6562 | |
| 6563 | m_uid_to_idx = NULL; |
| 6564 | vec_safe_reserve (v&: m_uid_to_idx, nelems: c, exact: true); |
| 6565 | unsigned index = 0; |
| 6566 | for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++) |
| 6567 | { |
| 6568 | ipa_uid_to_idx_map_elt elt; |
| 6569 | elt.uid = DECL_UID (p); |
| 6570 | elt.index = index; |
| 6571 | m_uid_to_idx->quick_push (obj: elt); |
| 6572 | } |
| 6573 | m_uid_to_idx->qsort (compare_uids); |
| 6574 | } |
| 6575 | |