1/* Top-level LTO routines.
2 Copyright (C) 2009-2023 Free Software Foundation, Inc.
3 Contributed by CodeSourcery, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#define INCLUDE_STRING
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "function.h"
27#include "bitmap.h"
28#include "basic-block.h"
29#include "tree.h"
30#include "gimple.h"
31#include "cfghooks.h"
32#include "alloc-pool.h"
33#include "tree-pass.h"
34#include "tree-streamer.h"
35#include "cgraph.h"
36#include "opts.h"
37#include "toplev.h"
38#include "stor-layout.h"
39#include "symbol-summary.h"
40#include "tree-vrp.h"
41#include "ipa-prop.h"
42#include "debug.h"
43#include "lto.h"
44#include "lto-section-names.h"
45#include "splay-tree.h"
46#include "lto-partition.h"
47#include "context.h"
48#include "pass_manager.h"
49#include "ipa-fnsummary.h"
50#include "ipa-utils.h"
51#include "gomp-constants.h"
52#include "lto-symtab.h"
53#include "stringpool.h"
54#include "fold-const.h"
55#include "attribs.h"
56#include "builtins.h"
57#include "lto-common.h"
58#include "opts-jobserver.h"
59
60/* Number of parallel tasks to run. */
61static int lto_parallelism;
62
63/* Number of active WPA streaming processes. */
64static int nruns = 0;
65
66/* GNU make's jobserver info. */
67static jobserver_info *jinfo = NULL;
68
69/* Return true when NODE has a clone that is analyzed (i.e. we need
70 to load its body even if the node itself is not needed). */
71
72static bool
73has_analyzed_clone_p (struct cgraph_node *node)
74{
75 struct cgraph_node *orig = node;
76 node = node->clones;
77 if (node)
78 while (node != orig)
79 {
80 if (node->analyzed)
81 return true;
82 if (node->clones)
83 node = node->clones;
84 else if (node->next_sibling_clone)
85 node = node->next_sibling_clone;
86 else
87 {
88 while (node != orig && !node->next_sibling_clone)
89 node = node->clone_of;
90 if (node != orig)
91 node = node->next_sibling_clone;
92 }
93 }
94 return false;
95}
96
97/* Read the function body for the function associated with NODE. */
98
99static void
100lto_materialize_function (struct cgraph_node *node)
101{
102 tree decl;
103
104 decl = node->decl;
105 /* Read in functions with body (analyzed nodes)
106 and also functions that are needed to produce virtual clones. */
107 if ((node->has_gimple_body_p () && node->analyzed)
108 || node->used_as_abstract_origin
109 || has_analyzed_clone_p (node))
110 {
111 /* Clones don't need to be read. */
112 if (node->clone_of)
113 return;
114 if (DECL_FUNCTION_PERSONALITY (decl) && !first_personality_decl)
115 first_personality_decl = DECL_FUNCTION_PERSONALITY (decl);
116 /* If the file contains a function with a language specific EH
117 personality set or with EH enabled initialize the backend EH
118 machinery. */
119 if (DECL_FUNCTION_PERSONALITY (decl)
120 || opt_for_fn (decl, flag_exceptions))
121 lto_init_eh ();
122 }
123
124 /* Let the middle end know about the function. */
125 rest_of_decl_compilation (decl, 1, 0);
126}
127
128/* Materialize all the bodies for all the nodes in the callgraph. */
129
130static void
131materialize_cgraph (void)
132{
133 struct cgraph_node *node;
134 timevar_id_t lto_timer;
135
136 if (!quiet_flag)
137 fprintf (stderr,
138 flag_wpa ? "Materializing decls:" : "Reading function bodies:");
139
140 /* Start the appropriate timer depending on the mode that we are
141 operating in. */
142 lto_timer = (flag_wpa) ? TV_WHOPR_WPA
143 : (flag_ltrans) ? TV_WHOPR_LTRANS
144 : TV_LTO;
145 timevar_push (tv: lto_timer);
146
147 FOR_EACH_FUNCTION (node)
148 {
149 if (node->lto_file_data)
150 {
151 lto_materialize_function (node);
152 lto_stats.num_input_cgraph_nodes++;
153 }
154 }
155
156 current_function_decl = NULL;
157 set_cfun (NULL);
158
159 if (!quiet_flag)
160 fprintf (stderr, format: "\n");
161
162 timevar_pop (tv: lto_timer);
163}
164
165/* Actually stream out ENCODER into TEMP_FILENAME. */
166
167static void
168stream_out (char *temp_filename, lto_symtab_encoder_t encoder, int part)
169{
170 lto_file *file = lto_obj_file_open (filename: temp_filename, writable: true);
171 if (!file)
172 fatal_error (input_location, "%<lto_obj_file_open()%> failed");
173 lto_set_current_out_file (file);
174
175 gcc_assert (!dump_file);
176 streamer_dump_file = dump_begin (TDI_lto_stream_out, NULL, part);
177 ipa_write_optimization_summaries (encoder);
178
179 free (CONST_CAST (char *, file->filename));
180
181 lto_set_current_out_file (NULL);
182 lto_obj_file_close (file);
183 free (ptr: file);
184 if (streamer_dump_file)
185 {
186 dump_end (TDI_lto_stream_out, streamer_dump_file);
187 streamer_dump_file = NULL;
188 }
189}
190
191/* Wait for forked process and signal errors. */
192#ifdef HAVE_WORKING_FORK
193static void
194wait_for_child ()
195{
196 int status;
197 do
198 {
199#ifndef WCONTINUED
200#define WCONTINUED 0
201#endif
202 int w = waitpid (pid: 0, stat_loc: &status, WUNTRACED | WCONTINUED);
203 if (w == -1)
204 fatal_error (input_location, "waitpid failed");
205
206 if (WIFEXITED (status) && WEXITSTATUS (status))
207 fatal_error (input_location, "streaming subprocess failed");
208 else if (WIFSIGNALED (status))
209 fatal_error (input_location,
210 "streaming subprocess was killed by signal");
211 }
212 while (!WIFEXITED (status) && !WIFSIGNALED (status));
213
214 --nruns;
215
216 /* Return token to the jobserver if active. */
217 if (jinfo != NULL && jinfo->is_connected)
218 jinfo->return_token ();
219}
220#endif
221
222static void
223stream_out_partitions_1 (char *temp_filename, int blen, int min, int max)
224{
225 /* Write all the nodes in SET. */
226 for (int p = min; p < max; p ++)
227 {
228 sprintf (s: temp_filename + blen, format: "%u.o", p);
229 stream_out (temp_filename, encoder: ltrans_partitions[p]->encoder, part: p);
230 ltrans_partitions[p]->encoder = NULL;
231 }
232}
233
234/* Stream out ENCODER into TEMP_FILENAME
235 Fork if that seems to help. */
236
237static void
238stream_out_partitions (char *temp_filename, int blen, int min, int max,
239 bool ARG_UNUSED (last))
240{
241#ifdef HAVE_WORKING_FORK
242 if (lto_parallelism <= 1)
243 {
244 stream_out_partitions_1 (temp_filename, blen, min, max);
245 return;
246 }
247
248 if (lto_parallelism > 0 && nruns >= lto_parallelism)
249 wait_for_child ();
250
251 /* If this is not the last parallel partition, execute new
252 streaming process. */
253 if (!last)
254 {
255 if (jinfo != NULL && jinfo->is_connected)
256 while (true)
257 {
258 if (jinfo->get_token ())
259 break;
260 if (nruns > 0)
261 wait_for_child ();
262 else
263 {
264 /* There are no free tokens, lets do the job outselves. */
265 stream_out_partitions_1 (temp_filename, blen, min, max);
266 asm_nodes_output = true;
267 return;
268 }
269 }
270
271 pid_t cpid = fork ();
272
273 if (!cpid)
274 {
275 setproctitle ("lto1-wpa-streaming");
276 stream_out_partitions_1 (temp_filename, blen, min, max);
277 exit (status: 0);
278 }
279 /* Fork failed; lets do the job ourseleves. */
280 else if (cpid == -1)
281 stream_out_partitions_1 (temp_filename, blen, min, max);
282 else
283 nruns++;
284 }
285 /* Last partition; stream it and wait for all children to die. */
286 else
287 {
288 stream_out_partitions_1 (temp_filename, blen, min, max);
289 while (nruns > 0)
290 wait_for_child ();
291
292 if (jinfo != NULL && jinfo->is_connected)
293 jinfo->disconnect ();
294 }
295 asm_nodes_output = true;
296#else
297 stream_out_partitions_1 (temp_filename, blen, min, max);
298#endif
299}
300
301/* Write all output files in WPA mode and the file with the list of
302 LTRANS units. */
303
304static void
305lto_wpa_write_files (void)
306{
307 unsigned i, n_sets;
308 ltrans_partition part;
309 FILE *ltrans_output_list_stream;
310 char *temp_filename;
311 auto_vec <char *>temp_filenames;
312 auto_vec <int>temp_priority;
313 size_t blen;
314
315 /* Open the LTRANS output list. */
316 if (!ltrans_output_list)
317 fatal_error (input_location, "no LTRANS output list filename provided");
318
319 timevar_push (tv: TV_WHOPR_WPA);
320
321 FOR_EACH_VEC_ELT (ltrans_partitions, i, part)
322 lto_stats.num_output_symtab_nodes
323 += lto_symtab_encoder_size (encoder: part->encoder);
324
325 timevar_pop (tv: TV_WHOPR_WPA);
326
327 timevar_push (tv: TV_WHOPR_WPA_IO);
328
329 cgraph_node *node;
330 /* Do body modifications needed for streaming before we fork out
331 worker processes. */
332 FOR_EACH_FUNCTION (node)
333 if (!node->clone_of && gimple_has_body_p (node->decl))
334 lto_prepare_function_for_streaming (node);
335
336 ggc_trim ();
337 report_heap_memory_use ();
338
339 /* Generate a prefix for the LTRANS unit files. */
340 blen = strlen (ltrans_output_list);
341 temp_filename = (char *) xmalloc (blen + sizeof ("2147483648.o"));
342 strcpy (dest: temp_filename, ltrans_output_list);
343 if (blen > sizeof (".out")
344 && strcmp (s1: temp_filename + blen - sizeof (".out") + 1,
345 s2: ".out") == 0)
346 temp_filename[blen - sizeof (".out") + 1] = '\0';
347 blen = strlen (s: temp_filename);
348
349 n_sets = ltrans_partitions.length ();
350 unsigned sets_per_worker = n_sets;
351 if (lto_parallelism > 1)
352 {
353 if (lto_parallelism > (int)n_sets)
354 lto_parallelism = n_sets;
355 sets_per_worker = (n_sets + lto_parallelism - 1) / lto_parallelism;
356 }
357
358 for (i = 0; i < n_sets; i++)
359 {
360 ltrans_partition part = ltrans_partitions[i];
361
362 /* Write all the nodes in SET. */
363 sprintf (s: temp_filename + blen, format: "%u.o", i);
364
365 if (!quiet_flag)
366 fprintf (stderr, format: " %s (%s %i insns)", temp_filename, part->name,
367 part->insns);
368 if (symtab->dump_file)
369 {
370 lto_symtab_encoder_iterator lsei;
371
372 fprintf (stream: symtab->dump_file,
373 format: "Writing partition %s to file %s, %i insns\n",
374 part->name, temp_filename, part->insns);
375 fprintf (stream: symtab->dump_file, format: " Symbols in partition: ");
376 for (lsei = lsei_start_in_partition (encoder: part->encoder);
377 !lsei_end_p (lsei);
378 lsei_next_in_partition (lsei: &lsei))
379 {
380 symtab_node *node = lsei_node (lsei);
381 fprintf (stream: symtab->dump_file, format: "%s ", node->dump_asm_name ());
382 }
383 fprintf (stream: symtab->dump_file, format: "\n Symbols in boundary: ");
384 for (lsei = lsei_start (encoder: part->encoder); !lsei_end_p (lsei);
385 lsei_next (lsei: &lsei))
386 {
387 symtab_node *node = lsei_node (lsei);
388 if (!lto_symtab_encoder_in_partition_p (part->encoder, node))
389 {
390 fprintf (stream: symtab->dump_file, format: "%s ", node->dump_asm_name ());
391 cgraph_node *cnode = dyn_cast <cgraph_node *> (p: node);
392 if (cnode
393 && lto_symtab_encoder_encode_body_p (part->encoder,
394 cnode))
395 fprintf (stream: symtab->dump_file, format: "(body included)");
396 else
397 {
398 varpool_node *vnode = dyn_cast <varpool_node *> (p: node);
399 if (vnode
400 && lto_symtab_encoder_encode_initializer_p (part->encoder,
401 vnode))
402 fprintf (stream: symtab->dump_file, format: "(initializer included)");
403 }
404 }
405 }
406 fprintf (stream: symtab->dump_file, format: "\n");
407 }
408 gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i);
409
410 temp_priority.safe_push (obj: part->insns);
411 temp_filenames.safe_push (obj: xstrdup (temp_filename));
412 }
413 memory_block_pool::trim (nblocks: 0);
414
415 for (int set = 0; set < MAX (lto_parallelism, 1); set++)
416 {
417 stream_out_partitions (temp_filename, blen, min: set * sets_per_worker,
418 MIN ((set + 1) * sets_per_worker, n_sets),
419 last: set == MAX (lto_parallelism, 1) - 1);
420 }
421
422 ltrans_output_list_stream = fopen (ltrans_output_list, modes: "w");
423 if (ltrans_output_list_stream == NULL)
424 fatal_error (input_location,
425 "opening LTRANS output list %s: %m", ltrans_output_list);
426 for (i = 0; i < n_sets; i++)
427 {
428 unsigned int len = strlen (s: temp_filenames[i]);
429 if (fprintf (stream: ltrans_output_list_stream, format: "%i\n", temp_priority[i]) < 0
430 || fwrite (ptr: temp_filenames[i], size: 1, n: len, s: ltrans_output_list_stream) < len
431 || fwrite (ptr: "\n", size: 1, n: 1, s: ltrans_output_list_stream) < 1)
432 fatal_error (input_location, "writing to LTRANS output list %s: %m",
433 ltrans_output_list);
434 free (ptr: temp_filenames[i]);
435 }
436
437 lto_stats.num_output_files += n_sets;
438
439 /* Close the LTRANS output list. */
440 if (fclose (stream: ltrans_output_list_stream))
441 fatal_error (input_location,
442 "closing LTRANS output list %s: %m", ltrans_output_list);
443
444 free_ltrans_partitions ();
445 free (ptr: temp_filename);
446
447 timevar_pop (tv: TV_WHOPR_WPA_IO);
448}
449
450/* Create artificial pointers for "omp declare target link" vars. */
451
452static void
453offload_handle_link_vars (void)
454{
455#ifdef ACCEL_COMPILER
456 varpool_node *var;
457 FOR_EACH_VARIABLE (var)
458 if (lookup_attribute ("omp declare target link",
459 DECL_ATTRIBUTES (var->decl)))
460 {
461 tree type = build_pointer_type (TREE_TYPE (var->decl));
462 tree link_ptr_var = build_decl (UNKNOWN_LOCATION, VAR_DECL,
463 clone_function_name (var->decl,
464 "linkptr"), type);
465 TREE_USED (link_ptr_var) = 1;
466 TREE_STATIC (link_ptr_var) = 1;
467 TREE_PUBLIC (link_ptr_var) = TREE_PUBLIC (var->decl);
468 DECL_ARTIFICIAL (link_ptr_var) = 1;
469 SET_DECL_ASSEMBLER_NAME (link_ptr_var, DECL_NAME (link_ptr_var));
470 SET_DECL_VALUE_EXPR (var->decl, build_simple_mem_ref (link_ptr_var));
471 DECL_HAS_VALUE_EXPR_P (var->decl) = 1;
472 }
473#endif
474}
475
476/* Perform whole program analysis (WPA) on the callgraph and write out the
477 optimization plan. */
478
479static void
480do_whole_program_analysis (void)
481{
482 symtab_node *node;
483
484 lto_parallelism = 1;
485
486 if (!strcmp (flag_wpa, s2: "jobserver"))
487 {
488 jinfo = new jobserver_info ();
489 if (jinfo->is_active)
490 jinfo->connect ();
491
492 lto_parallelism = param_max_lto_streaming_parallelism;
493 }
494 else
495 {
496 lto_parallelism = atoi (flag_wpa);
497 if (lto_parallelism <= 0)
498 lto_parallelism = 0;
499 if (lto_parallelism >= param_max_lto_streaming_parallelism)
500 lto_parallelism = param_max_lto_streaming_parallelism;
501 }
502
503 timevar_start (TV_PHASE_OPT_GEN);
504
505 /* Note that since we are in WPA mode, materialize_cgraph will not
506 actually read in all the function bodies. It only materializes
507 the decls and cgraph nodes so that analysis can be performed. */
508 materialize_cgraph ();
509
510 /* Reading in the cgraph uses different timers, start timing WPA now. */
511 timevar_push (tv: TV_WHOPR_WPA);
512
513 if (pre_ipa_mem_report)
514 dump_memory_report ("Memory consumption before IPA");
515
516 symtab->function_flags_ready = true;
517
518 if (symtab->dump_file)
519 symtab->dump (f: symtab->dump_file);
520 bitmap_obstack_initialize (NULL);
521 symtab->state = IPA_SSA;
522
523 execute_ipa_pass_list (g->get_passes ()->all_regular_ipa_passes);
524
525 /* When WPA analysis raises errors, do not bother to output anything. */
526 if (seen_error ())
527 return;
528
529 /* We are about to launch the final LTRANS phase, stop the WPA timer. */
530 timevar_pop (tv: TV_WHOPR_WPA);
531
532 /* We are no longer going to stream in anything. Free some memory. */
533 lto_free_file_name_hash ();
534
535
536 timevar_push (tv: TV_WHOPR_PARTITIONING);
537
538 gcc_assert (!dump_file);
539 dump_file = dump_begin (partition_dump_id, NULL);
540
541 if (dump_file)
542 symtab->dump (f: dump_file);
543
544 symtab_node::checking_verify_symtab_nodes ();
545 bitmap_obstack_release (NULL);
546 if (flag_lto_partition == LTO_PARTITION_1TO1)
547 lto_1_to_1_map ();
548 else if (flag_lto_partition == LTO_PARTITION_MAX)
549 lto_max_map ();
550 else if (flag_lto_partition == LTO_PARTITION_ONE)
551 lto_balanced_map (1, INT_MAX);
552 else if (flag_lto_partition == LTO_PARTITION_BALANCED)
553 lto_balanced_map (param_lto_partitions,
554 param_max_partition_size);
555 else
556 gcc_unreachable ();
557
558 /* Size summaries are needed for balanced partitioning. Free them now so
559 the memory can be used for streamer caches. */
560 ipa_free_size_summary ();
561
562 /* AUX pointers are used by partitioning code to bookkeep number of
563 partitions symbol is in. This is no longer needed. */
564 FOR_EACH_SYMBOL (node)
565 node->aux = NULL;
566
567 lto_stats.num_cgraph_partitions += ltrans_partitions.length ();
568
569 /* Find out statics that need to be promoted
570 to globals with hidden visibility because they are accessed from multiple
571 partitions. */
572 lto_promote_cross_file_statics ();
573 offload_handle_link_vars ();
574 if (dump_file)
575 dump_end (partition_dump_id, dump_file);
576 dump_file = NULL;
577 timevar_pop (tv: TV_WHOPR_PARTITIONING);
578
579 timevar_stop (TV_PHASE_OPT_GEN);
580
581 /* Collect a last time - in lto_wpa_write_files we may end up forking
582 with the idea that this doesn't increase memory usage. So we
583 absoultely do not want to collect after that. */
584 ggc_collect ();
585
586 timevar_start (TV_PHASE_STREAM_OUT);
587 if (!quiet_flag)
588 {
589 fprintf (stderr, format: "\nStreaming out");
590 fflush (stderr);
591 }
592 lto_wpa_write_files ();
593 if (!quiet_flag)
594 fprintf (stderr, format: "\n");
595 timevar_stop (TV_PHASE_STREAM_OUT);
596
597 if (post_ipa_mem_report)
598 dump_memory_report ("Memory consumption after IPA");
599
600 /* Show the LTO report before launching LTRANS. */
601 if (flag_lto_report || (flag_wpa && flag_lto_report_wpa))
602 print_lto_report_1 ();
603 if (mem_report_wpa)
604 dump_memory_report ("Final");
605}
606
607unsigned int
608lto_option_lang_mask (void)
609{
610 return CL_LTO;
611}
612
613/* Main entry point for the GIMPLE front end. This front end has
614 three main personalities:
615
616 - LTO (-flto). All the object files on the command line are
617 loaded in memory and processed as a single translation unit.
618 This is the traditional link-time optimization behavior.
619
620 - WPA (-fwpa). Only the callgraph and summary information for
621 files in the command file are loaded. A single callgraph
622 (without function bodies) is instantiated for the whole set of
623 files. IPA passes are only allowed to analyze the call graph
624 and make transformation decisions. The callgraph is
625 partitioned, each partition is written to a new object file
626 together with the transformation decisions.
627
628 - LTRANS (-fltrans). Similar to -flto but it prevents the IPA
629 summary files from running again. Since WPA computed summary
630 information and decided what transformations to apply, LTRANS
631 simply applies them. */
632
633void
634lto_main (void)
635{
636 /* LTO is called as a front end, even though it is not a front end.
637 Because it is called as a front end, TV_PHASE_PARSING and
638 TV_PARSE_GLOBAL are active, and we need to turn them off while
639 doing LTO. Later we turn them back on so they are active up in
640 toplev.cc. */
641 timevar_pop (tv: TV_PARSE_GLOBAL);
642 timevar_stop (TV_PHASE_PARSING);
643
644 timevar_start (TV_PHASE_SETUP);
645
646 /* Initialize the LTO front end. */
647 lto_fe_init ();
648
649 timevar_stop (TV_PHASE_SETUP);
650 timevar_start (TV_PHASE_STREAM_IN);
651
652 /* Read all the symbols and call graph from all the files in the
653 command line. */
654 read_cgraph_and_symbols (num_in_fnames, in_fnames);
655
656 timevar_stop (TV_PHASE_STREAM_IN);
657
658 if (!seen_error ())
659 {
660 offload_handle_link_vars ();
661
662 /* If WPA is enabled analyze the whole call graph and create an
663 optimization plan. Otherwise, read in all the function
664 bodies and continue with optimization. */
665 if (flag_wpa)
666 do_whole_program_analysis ();
667 else
668 {
669 timevar_start (TV_PHASE_OPT_GEN);
670
671 materialize_cgraph ();
672 if (!flag_ltrans)
673 {
674 lto_promote_statics_nonwpa ();
675 offload_handle_link_vars ();
676 }
677
678 /* Annotate the CU DIE and mark the early debug phase as finished. */
679 debuginfo_early_start ();
680 debug_hooks->early_finish ("<artificial>");
681 debuginfo_early_stop ();
682
683 /* Let the middle end know that we have read and merged all of
684 the input files. */
685 symtab->compile ();
686
687 timevar_stop (TV_PHASE_OPT_GEN);
688
689 /* FIXME lto, if the processes spawned by WPA fail, we miss
690 the chance to print WPA's report, so WPA will call
691 print_lto_report before launching LTRANS. If LTRANS was
692 launched directly by the driver we would not need to do
693 this. */
694 if (flag_lto_report || (flag_wpa && flag_lto_report_wpa))
695 print_lto_report_1 ();
696 }
697 }
698
699 /* Here we make LTO pretend to be a parser. */
700 timevar_start (TV_PHASE_PARSING);
701 timevar_push (tv: TV_PARSE_GLOBAL);
702}
703

source code of gcc/lto/lto.cc