pair-fusion.cc source code [gcc/pair-fusion.cc]

1	// Pass to fuse adjacent loads/stores into paired memory accesses.
2	// Copyright (C) 2023-2025 Free Software Foundation, Inc.
3	//
4	// This file is part of GCC.
5	//
6	// GCC is free software; you can redistribute it and/or modify it
7	// under the terms of the GNU General Public License as published by
8	// the Free Software Foundation; either version 3, or (at your option)
9	// any later version.
10	//
11	// GCC is distributed in the hope that it will be useful, but
12	// WITHOUT ANY WARRANTY; without even the implied warranty of
13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	// General Public License for more details.
15	//
16	// You should have received a copy of the GNU General Public License
17	// along with GCC; see the file COPYING3. If not see
18	// <http://www.gnu.org/licenses/>.
19
20	#define INCLUDE_ALGORITHM
21	#define INCLUDE_FUNCTIONAL
22	#define INCLUDE_LIST
23	#define INCLUDE_TYPE_TRAITS
24	#define INCLUDE_ARRAY
25	#include "config.h"
26	#include "system.h"
27	#include "coretypes.h"
28	#include "backend.h"
29	#include "rtl.h"
30	#include "df.h"
31	#include "rtl-iter.h"
32	#include "rtl-ssa.h"
33	#include "cfgcleanup.h"
34	#include "tree-pass.h"
35	#include "ordered-hash-map.h"
36	#include "tree-dfa.h"
37	#include "fold-const.h"
38	#include "tree-hash-traits.h"
39	#include "print-tree.h"
40	#include "pair-fusion.h"
41
42	using namespace rtl_ssa;
43
44	// We pack these fields (load_p, fpsimd_p, and size) into an integer
45	// (LFS) which we use as part of the key into the main hash tables.
46	//
47	// The idea is that we group candidates together only if they agree on
48	// the fields below. Candidates that disagree on any of these
49	// properties shouldn't be merged together.
50	struct lfs_fields
51	{
52	bool load_p;
53	bool fpsimd_p;
54	unsigned size;
55	};
56
57	using insn_list_t = std::list<insn_info *>;
58
59	// Information about the accesses at a given offset from a particular
60	// base. Stored in an access_group, see below.
61	struct access_record
62	{
63	poly_int64 offset;
64	std::list<insn_info *> cand_insns;
65	std::list<access_record>::iterator place;
66
67	access_record (poly_int64 off) : offset (off) {}
68	};
69
70	// A group of accesses where adjacent accesses could be ldp/stp
71	// candidates. The splay tree supports efficient insertion,
72	// while the list supports efficient iteration.
73	struct access_group
74	{
75	splay_tree<access_record *> tree;
76	std::list<access_record> list;
77
78	template<typename Alloc>
79	inline void track (Alloc node_alloc, poly_int64 offset, insn_info *insn);
80	};
81
82	// Test if this base candidate is viable according to HAZARDS.
83	bool
84	base_cand::viable () const
85	{
86	return !hazards[`0`] \|\| !hazards[`1`] \|\| (hazards[`0`] > hazards[`1`]);
87	}
88
89	// Information about an alternate base. For a def_info D, it may
90	// instead be expressed as D = BASE + OFFSET.
91	struct alt_base
92	{
93	def_info *base;
94	poly_int64 offset;
95	};
96
97	// Virtual base class for load/store walkers used in alias analysis.
98	struct alias_walker
99	{
100	virtual bool conflict_p (int &budget) const = `0`;
101	virtual insn_info insn () const* = `0`;
102	virtual bool valid () const = `0`;
103	virtual void advance () = `0`;
104	};
105
106
107	pair_fusion::pair_fusion ()
108	{
109	calculate_dominance_info (CDI_DOMINATORS);
110	df_analyze ();
111	crtl->ssa = new rtl_ssa::function_info (cfun);
112	}
113
114	pair_fusion::~pair_fusion ()
115	{
116	if (crtl->ssa->perform_pending_updates ())
117	cleanup_cfg (`0`);
118
119	free_dominance_info (CDI_DOMINATORS);
120
121	delete crtl->ssa;
122	crtl->ssa = nullptr;
123	}
124
125	// This is the main function to start the pass.
126	void
127	pair_fusion::run ()
128	{
129	if (!track_loads_p () && !track_stores_p ())
130	return;
131
132	for (auto bb : crtl->ssa->bbs ())
133	process_block (bb);
134	}
135
136	// State used by the pass for a given basic block.
137	struct pair_fusion_bb_info
138	{
139	using def_hash = nofree_ptr_hash<def_info>;
140	using expr_key_t = pair_hash<tree_operand_hash, int_hash<int, -`1`, -`2`>>;
141	using def_key_t = pair_hash<def_hash, int_hash<int, -`1`, -`2`>>;
142
143	// Map of <tree base, LFS> -> access_group.
144	ordered_hash_map<expr_key_t, access_group> expr_map;
145
146	// Map of <RTL-SSA def_info , LFS> -> access_group.*
147	ordered_hash_map<def_key_t, access_group> def_map;
148
149	// Given the def_info for an RTL base register, express it as an offset from
150	// some canonical base instead.
151	//
152	// Canonicalizing bases in this way allows us to identify adjacent accesses
153	// even if they see different base register defs.
154	hash_map<def_hash, alt_base> canon_base_map;
155
156	static const size_t obstack_alignment = sizeof (void *);
157
158	pair_fusion_bb_info (bb_info bb, pair_fusion d)
159	: m_bb (bb), m_pass (d), m_emitted_tombstone (false)
160	{
161	obstack_specify_allocation (&m_obstack, OBSTACK_CHUNK_SIZE,
162	obstack_alignment, obstack_chunk_alloc,
163	obstack_chunk_free);
164	}
165	~pair_fusion_bb_info ()
166	{
167	obstack_free (&m_obstack, nullptr);
168
169	if (m_emitted_tombstone)
170	{
171	bitmap_release (head: &m_tombstone_bitmap);
172	bitmap_obstack_release (&m_bitmap_obstack);
173	}
174	}
175
176	inline void track_access (insn_info , bool* load, rtx mem);
177	inline void transform ();
178	inline void cleanup_tombstones ();
179
180	private:
181	obstack m_obstack;
182	bb_info *m_bb;
183	pair_fusion *m_pass;
184
185	// State for keeping track of tombstone insns emitted for this BB.
186	bitmap_obstack m_bitmap_obstack;
187	bitmap_head m_tombstone_bitmap;
188	bool m_emitted_tombstone;
189
190	inline splay_tree_node<access_record > node_alloc (access_record *);
191
192	template<typename Map>
193	inline void traverse_base_map (Map &map);
194	inline void transform_for_base (int load_size, access_group &group);
195
196	inline void merge_pairs (insn_list_t &, insn_list_t &,
197	bool load_p, unsigned access_size);
198
199	inline bool try_fuse_pair (bool load_p, unsigned access_size,
200	insn_info i1, insn_info i2);
201
202	inline bool fuse_pair (bool load_p, unsigned access_size,
203	int writeback,
204	insn_info i1, insn_info i2,
205	base_cand &base,
206	const insn_range_info &move_range);
207
208	inline void track_tombstone (int uid);
209
210	inline bool track_via_mem_expr (insn_info *, rtx mem, lfs_fields lfs);
211	};
212
213	splay_tree_node<access_record >
214	pair_fusion_bb_info::node_alloc (access_record *access)
215	{
216	using T = splay_tree_node<access_record *>;
217	void addr = obstack_alloc (&m_obstack, sizeof* (T));
218	return new (addr) T (access);
219	}
220
221	// Given a mem MEM, if the address has side effects, return a MEM that accesses
222	// the same address but without the side effects. Otherwise, return
223	// MEM unchanged.
224	static rtx
225	drop_writeback (rtx mem)
226	{
227	rtx addr = XEXP (mem, `0`);
228
229	if (!side_effects_p (addr))
230	return mem;
231
232	switch (GET_CODE (addr))
233	{
234	case PRE_MODIFY:
235	addr = XEXP (addr, `1`);
236	break;
237	case POST_MODIFY:
238	case POST_INC:
239	case POST_DEC:
240	addr = XEXP (addr, `0`);
241	break;
242	case PRE_INC:
243	case PRE_DEC:
244	{
245	poly_int64 adjustment = GET_MODE_SIZE (GET_MODE (mem));
246	if (GET_CODE (addr) == PRE_DEC)
247	adjustment *= -`1`;
248	addr = plus_constant (GET_MODE (addr), XEXP (addr, `0`), adjustment);
249	break;
250	}
251	default:
252	gcc_unreachable ();
253	}
254
255	return change_address (mem, GET_MODE (mem), addr);
256	}
257
258	// Convenience wrapper around strip_offset that can also look through
259	// RTX_AUTOINC addresses. The interface is like strip_offset except we take a
260	// MEM so that we know the mode of the access.
261	static rtx
262	pair_mem_strip_offset (rtx mem, poly_int64 *offset)
263	{
264	rtx addr = XEXP (mem, `0`);
265
266	switch (GET_CODE (addr))
267	{
268	case PRE_MODIFY:
269	case POST_MODIFY:
270	addr = strip_offset (XEXP (addr, `1`), offset);
271	gcc_checking_assert (REG_P (addr));
272	gcc_checking_assert (rtx_equal_p (XEXP (XEXP (mem, `0`), `0`), addr));
273	break;
274	case PRE_INC:
275	case POST_INC:
276	addr = XEXP (addr, `0`);
277	*offset = GET_MODE_SIZE (GET_MODE (mem));
278	gcc_checking_assert (REG_P (addr));
279	break;
280	case PRE_DEC:
281	case POST_DEC:
282	addr = XEXP (addr, `0`);
283	*offset = -GET_MODE_SIZE (GET_MODE (mem));
284	gcc_checking_assert (REG_P (addr));
285	break;
286
287	default:
288	addr = strip_offset (addr, offset);
289	}
290
291	return addr;
292	}
293
294	// Return true if X is a PRE_{INC,DEC,MODIFY} rtx.
295	static bool
296	any_pre_modify_p (rtx x)
297	{
298	const auto code = GET_CODE (x);
299	return code == PRE_INC \|\| code == PRE_DEC \|\| code == PRE_MODIFY;
300	}
301
302	// Return true if X is a POST_{INC,DEC,MODIFY} rtx.
303	static bool
304	any_post_modify_p (rtx x)
305	{
306	const auto code = GET_CODE (x);
307	return code == POST_INC \|\| code == POST_DEC \|\| code == POST_MODIFY;
308	}
309
310	// Given LFS (load_p, fpsimd_p, size) fields in FIELDS, encode these
311	// into an integer for use as a hash table key.
312	static int
313	encode_lfs (lfs_fields fields)
314	{
315	int size_log2 = exact_log2 (x: fields.size);
316	gcc_checking_assert (size_log2 >= `2` && size_log2 <= `4`);
317	return ((int)fields.load_p << `3`)
318	\| ((int)fields.fpsimd_p << `2`)
319	\| (size_log2 - `2`);
320	}
321
322	// Inverse of encode_lfs.
323	static lfs_fields
324	decode_lfs (int lfs)
325	{
326	bool load_p = (lfs & (`1` << `3`));
327	bool fpsimd_p = (lfs & (`1` << `2`));
328	unsigned size = `1U` << ((lfs & `3`) + `2`);
329	return { .load_p: load_p, .fpsimd_p: fpsimd_p, .size: size };
330	}
331
332	// Track the access INSN at offset OFFSET in this access group.
333	// ALLOC_NODE is used to allocate splay tree nodes.
334	template<typename Alloc>
335	void
336	access_group::track (Alloc alloc_node, poly_int64 offset, insn_info *insn)
337	{
338	auto insert_before = [&](std::list<access_record>::iterator after)
339	{
340	auto it = list.emplace (position: after, args&: offset);
341	it ->cand_insns.push_back (x: insn);
342	it ->place = it;
343	return &*it;
344	};
345
346	if (!list.size ())
347	{
348	auto access = insert_before (list.end ());
349	tree.insert_max_node (new_node: alloc_node (access));
350	return;
351	}
352
353	auto compare = [&](splay_tree_node<access_record > node)
354	{
355	return compare_sizes_for_sort (a: offset, b: node->value ()->offset);
356	};
357	auto result = tree.lookup (compare);
358	splay_tree_node<access_record > node = tree.root ();
359	if (result == `0`)
360	node->value ()->cand_insns.push_back (x: insn);
361	else
362	{
363	auto it = node->value ()->place;
364	auto after = (result > `0`) ? std::next (x: it) : it;
365	auto access = insert_before (after);
366	tree.insert_child (node, index: result > `0`, child: alloc_node (access));
367	}
368	}
369
370	// Given a candidate access INSN (with mem MEM), see if it has a suitable
371	// MEM_EXPR base (i.e. a tree decl) relative to which we can track the access.
372	// LFS is used as part of the key to the hash table, see track_access.
373	bool
374	pair_fusion_bb_info::track_via_mem_expr (insn_info *insn, rtx mem,
375	lfs_fields lfs)
376	{
377	if (!MEM_EXPR (mem) \|\| !MEM_OFFSET_KNOWN_P (mem))
378	return false;
379
380	poly_int64 offset;
381	tree base_expr = get_addr_base_and_unit_offset (MEM_EXPR (mem),
382	&offset);
383	if (!base_expr \|\| !DECL_P (base_expr))
384	return false;
385
386	offset += MEM_OFFSET (mem);
387
388	const machine_mode mem_mode = GET_MODE (mem);
389	const HOST_WIDE_INT mem_size = GET_MODE_SIZE (mode: mem_mode).to_constant ();
390
391	// Punt on misaligned offsets. Paired memory access instructions require
392	// offsets to be a multiple of the access size, and we believe that
393	// misaligned offsets on MEM_EXPR bases are likely to lead to misaligned
394	// offsets w.r.t. RTL bases.
395	if (!multiple_p (a: offset, b: mem_size))
396	return false;
397
398	const auto key = std::make_pair (x&: base_expr, y: encode_lfs (fields: lfs));
399	access_group &group = expr_map.get_or_insert (k: key, NULL);
400	auto alloc = [&](access_record access) { return* node_alloc (access); };
401	group.track (alloc_node: alloc, offset, insn);
402
403	if (dump_file)
404	{
405	fprintf (stream: dump_file, format: "[bb %u] tracking insn %d via ",
406	m_bb->index (), insn->uid ());
407	print_node_brief (dump_file, "mem expr", base_expr, `0`);
408	fprintf (stream: dump_file, format: " [L=%d FP=%d, %smode, off=",
409	lfs.load_p, lfs.fpsimd_p, mode_name[mem_mode]);
410	print_dec (value: offset, file: dump_file);
411	fprintf (stream: dump_file, format: "]\n");
412	}
413
414	return true;
415	}
416
417	// Main function to begin pair discovery. Given a memory access INSN,
418	// determine whether it could be a candidate for fusing into a paired
419	// access, and if so, track it in the appropriate data structure for
420	// this basic block. LOAD_P is true if the access is a load, and MEM
421	// is the mem rtx that occurs in INSN.
422	void
423	pair_fusion_bb_info::track_access (insn_info insn, bool* load_p, rtx mem)
424	{
425	// We can't combine volatile MEMs, so punt on these.
426	if (MEM_VOLATILE_P (mem))
427	return;
428
429	// Ignore writeback accesses if the hook says to do so.
430	if (!m_pass->should_handle_writeback (which: writeback_type::EXISTING)
431	&& GET_RTX_CLASS (GET_CODE (XEXP (mem, `0`))) == RTX_AUTOINC)
432	return;
433
434	const machine_mode mem_mode = GET_MODE (mem);
435	if (!m_pass->pair_operand_mode_ok_p (mode: mem_mode))
436	return;
437
438	rtx reg_op = XEXP (PATTERN (insn->rtl ()), !load_p);
439
440	if (!m_pass->pair_reg_operand_ok_p (load_p, reg_op, mode: mem_mode))
441	return;
442
443	// We want to segregate FP/SIMD accesses from GPR accesses.
444	const bool fpsimd_op_p = m_pass->fpsimd_op_p (reg_op, mem_mode, load_p);
445
446	// Note pair_operand_mode_ok_p already rejected VL modes.
447	const unsigned mem_size = GET_MODE_SIZE (mode: mem_mode).to_constant ();
448	const lfs_fields lfs = { .load_p: load_p, .fpsimd_p: fpsimd_op_p, .size: mem_size };
449
450	if (track_via_mem_expr (insn, mem, lfs))
451	return;
452
453	poly_int64 mem_off;
454	rtx addr = XEXP (mem, `0`);
455	const bool autoinc_p = GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC;
456	rtx base = pair_mem_strip_offset (mem, offset: &mem_off);
457	if (!REG_P (base))
458	return;
459
460	// Need to calculate two (possibly different) offsets:
461	// - Offset at which the access occurs.
462	// - Offset of the new base def.
463	poly_int64 access_off;
464	if (autoinc_p && any_post_modify_p (x: addr))
465	access_off = `0`;
466	else
467	access_off = mem_off;
468
469	poly_int64 new_def_off = mem_off;
470
471	// Punt on accesses relative to eliminable regs. Since we don't know the
472	// elimination offset pre-RA, we should postpone forming pairs on such
473	// accesses until after RA.
474	//
475	// As it stands, addresses in range for an individual load/store but not
476	// for a paired access are currently reloaded inefficiently,
477	// ending up with a separate base register for each pair.
478	//
479	// In theory LRA should make use of
480	// targetm.legitimize_address_displacement to promote sharing of
481	// bases among multiple (nearby) address reloads, but the current
482	// LRA code returns early from process_address_1 for operands that
483	// satisfy "m", even if they don't satisfy the real (relaxed) address
484	// constraint; this early return means we never get to the code
485	// that calls targetm.legitimize_address_displacement.
486	//
487	// So for now, it's better to punt when we can't be sure that the
488	// offset is in range for paired access. On aarch64, out-of-range cases
489	// can then be handled after RA by the out-of-range LDP/STP peepholes.
490	// Eventually, it would be nice to handle known out-of-range opportunities
491	// in the pass itself (for stack accesses, this would be in the post-RA pass).
492	if (!reload_completed
493	&& (REGNO (base) == FRAME_POINTER_REGNUM
494	\|\| REGNO (base) == ARG_POINTER_REGNUM))
495	return;
496
497	// Now need to find def of base register.
498	use_info *base_use = find_access (accesses: insn->uses (), REGNO (base));
499	gcc_assert (base_use);
500	def_info *base_def = base_use->def ();
501	if (!base_def)
502	{
503	if (dump_file)
504	fprintf (stream: dump_file,
505	format: "base register (regno %d) of insn %d is undefined",
506	REGNO (base), insn->uid ());
507	return;
508	}
509
510	alt_base *canon_base = canon_base_map.get (k: base_def);
511	if (canon_base)
512	{
513	// Express this as the combined offset from the canonical base.
514	base_def = canon_base->base;
515	new_def_off += canon_base->offset;
516	access_off += canon_base->offset;
517	}
518
519	if (autoinc_p)
520	{
521	auto def = find_access (accesses: insn->defs (), REGNO (base));
522	gcc_assert (def);
523
524	// Record that DEF = BASE_DEF + MEM_OFF.
525	if (dump_file)
526	{
527	pretty_printer pp;
528	pp_access (&pp, def, flags: `0`);
529	pp_string (&pp, " = ");
530	pp_access (&pp, base_def, flags: `0`);
531	fprintf (stream: dump_file, format: "[bb %u] recording %s + ",
532	m_bb->index (), pp_formatted_text (&pp));
533	print_dec (value: new_def_off, file: dump_file);
534	fprintf (stream: dump_file, format: "\n");
535	}
536
537	alt_base base_rec { .base: base_def, .offset: new_def_off };
538	if (canon_base_map.put (k: def, v: base_rec))
539	gcc_unreachable (); // Base defs should be unique.
540	}
541
542	// Punt on misaligned offsets. Paired memory accesses require offsets
543	// to be a multiple of the access size.
544	if (!multiple_p (a: mem_off, b: mem_size))
545	return;
546
547	const auto key = std::make_pair (x&: base_def, y: encode_lfs (fields: lfs));
548	access_group &group = def_map.get_or_insert (k: key, NULL);
549	auto alloc = [&](access_record access) { return* node_alloc (access); };
550	group.track (alloc_node: alloc, offset: access_off, insn);
551
552	if (dump_file)
553	{
554	pretty_printer pp;
555	pp_access (&pp, base_def, flags: `0`);
556
557	fprintf (stream: dump_file, format: "[bb %u] tracking insn %d via %s",
558	m_bb->index (), insn->uid (), pp_formatted_text (&pp));
559	fprintf (stream: dump_file,
560	format: " [L=%d, WB=%d, FP=%d, %smode, off=",
561	lfs.load_p, autoinc_p, lfs.fpsimd_p, mode_name[mem_mode]);
562	print_dec (value: access_off, file: dump_file);
563	fprintf (stream: dump_file, format: "]\n");
564	}
565	}
566
567	// Return the latest dataflow hazard before INSN.
568	//
569	// If IGNORE is non-NULL, this points to a sub-rtx which we should ignore for
570	// dataflow purposes. This is needed when considering changing the RTL base of
571	// an access discovered through a MEM_EXPR base.
572	//
573	// If IGNORE_INSN is non-NULL, we should further ignore any hazards arising
574	// from that insn.
575	//
576	// IS_LOAD_STORE is true if INSN is one of the loads or stores in the
577	// candidate pair. We ignore any defs/uses of memory in such instructions
578	// as we deal with that separately, making use of alias disambiguation.
579	static insn_info *
580	latest_hazard_before (insn_info insn, rtx ignore,
581	insn_info ignore_insn = nullptr*,
582	bool is_load_store = true)
583	{
584	insn_info result = nullptr*;
585
586	// If the insn can throw then it is at the end of a BB and we can't
587	// move it, model this by recording a hazard in the previous insn
588	// which will prevent moving the insn up.
589	if (cfun->can_throw_non_call_exceptions
590	&& find_reg_note (insn->rtl (), REG_EH_REGION, NULL_RTX))
591	return insn->prev_nondebug_insn ();
592
593	if (!is_load_store
594	&& accesses_include_memory (accesses: insn->defs ()))
595	return insn->prev_nondebug_insn ();
596
597	// Return true if we registered the hazard.
598	auto hazard = [&](insn_info h) -> bool*
599	{
600	gcc_checking_assert (h < insn);
601	if (h == ignore_insn)
602	return false;
603
604	if (!result \|\| h > result)
605	result = h;
606
607	return true;
608	};
609
610	rtx pat = PATTERN (insn: insn->rtl ());
611	auto ignore_use = [&](use_info *u)
612	{
613	if (u->is_mem ())
614	return true;
615
616	return !refers_to_regno_p (u->regno (), u->regno () + `1`, pat, ignore);
617	};
618
619	// Find defs of uses in INSN (RaW).
620	for (auto use : insn->uses ())
621	if (!ignore_use (use) && use->def ())
622	hazard (use->def ()->insn ());
623
624	// Find previous defs (WaW) or previous uses (WaR) of defs in INSN.
625	for (auto def : insn->defs ())
626	{
627	if (def->is_mem ())
628	continue;
629
630	if (def->prev_def ())
631	{
632	hazard (def->prev_def ()->insn ()); // WaW
633
634	auto set = dyn_cast<set_info *> (p: def->prev_def ());
635	if (set && set->has_nondebug_insn_uses ())
636	for (auto use : set->reverse_nondebug_insn_uses ())
637	if (use->insn () != insn && hazard (use->insn ())) // WaR
638	break;
639	}
640
641	if (!HARD_REGISTER_NUM_P (def->regno ()))
642	continue;
643
644	// Also need to check backwards for call clobbers (WaW).
645	for (auto call_group : def->ebb ()->call_clobbers ())
646	{
647	if (!call_group->clobbers (resource: def->resource ()))
648	continue;
649
650	auto clobber_insn = prev_call_clobbers (tree&: *call_group, insn: def->insn (),
651	ignore: ignore_nothing ());
652	if (clobber_insn)
653	hazard (clobber_insn);
654	}
655
656	}
657
658	return result;
659	}
660
661	// Return the first dataflow hazard after INSN.
662	//
663	// If IGNORE is non-NULL, this points to a sub-rtx which we should ignore for
664	// dataflow purposes. This is needed when considering changing the RTL base of
665	// an access discovered through a MEM_EXPR base.
666	//
667	// N.B. we ignore any defs/uses of memory here as we deal with that separately,
668	// making use of alias disambiguation.
669	static insn_info *
670	first_hazard_after (insn_info insn, rtx ignore)
671	{
672	insn_info result = nullptr*;
673	auto hazard = [insn, &result](insn_info *h)
674	{
675	gcc_checking_assert (h > insn);
676	if (!result \|\| h < result)
677	result = h;
678	};
679
680	rtx pat = PATTERN (insn: insn->rtl ());
681	auto ignore_use = [&](use_info *u)
682	{
683	if (u->is_mem ())
684	return true;
685
686	return !refers_to_regno_p (u->regno (), u->regno () + `1`, pat, ignore);
687	};
688
689	for (auto def : insn->defs ())
690	{
691	if (def->is_mem ())
692	continue;
693
694	if (def->next_def ())
695	hazard (def->next_def ()->insn ()); // WaW
696
697	auto set = dyn_cast<set_info *> (p: def);
698	if (set && set->has_nondebug_insn_uses ())
699	hazard (set->first_nondebug_insn_use ()->insn ()); // RaW
700
701	if (!HARD_REGISTER_NUM_P (def->regno ()))
702	continue;
703
704	// Also check for call clobbers of this def (WaW).
705	for (auto call_group : def->ebb ()->call_clobbers ())
706	{
707	if (!call_group->clobbers (resource: def->resource ()))
708	continue;
709
710	auto clobber_insn = next_call_clobbers (tree&: *call_group, insn: def->insn (),
711	ignore: ignore_nothing ());
712	if (clobber_insn)
713	hazard (clobber_insn);
714	}
715	}
716
717	// Find any subsequent defs of uses in INSN (WaR).
718	for (auto use : insn->uses ())
719	{
720	if (ignore_use (use))
721	continue;
722
723	if (use->def ())
724	{
725	auto def = use->def ()->next_def ();
726	if (def && def->insn () == insn)
727	def = def->next_def ();
728
729	if (def)
730	hazard (def->insn ());
731	}
732
733	if (!HARD_REGISTER_NUM_P (use->regno ()))
734	continue;
735
736	// Also need to handle call clobbers of our uses (again WaR).
737	//
738	// See restrict_movement_for_uses for why we don't need to check
739	// backwards for call clobbers.
740	for (auto call_group : use->ebb ()->call_clobbers ())
741	{
742	if (!call_group->clobbers (resource: use->resource ()))
743	continue;
744
745	auto clobber_insn = next_call_clobbers (tree&: *call_group, insn: use->insn (),
746	ignore: ignore_nothing ());
747	if (clobber_insn)
748	hazard (clobber_insn);
749	}
750	}
751
752	return result;
753	}
754
755	// Return true iff R1 and R2 overlap.
756	static bool
757	ranges_overlap_p (const insn_range_info &r1, const insn_range_info &r2)
758	{
759	// If either range is empty, then their intersection is empty.
760	if (!r1 \|\| !r2)
761	return false;
762
763	// When do they not overlap? When one range finishes before the other
764	// starts, i.e. (r1.last < r2.first \|\| r2.last < r1.first).
765	// Inverting this, we get the below.
766	return r1.last >= r2.first && r2.last >= r1.first;
767	}
768
769	// Get the range of insns that def feeds.
770	static insn_range_info get_def_range (def_info *def)
771	{
772	insn_info *last = def->next_def ()->insn ()->prev_nondebug_insn ();
773	return { def->insn (), last };
774	}
775
776	// Given a def (of memory), return the downwards range within which we
777	// can safely move this def.
778	static insn_range_info
779	def_downwards_move_range (def_info *def)
780	{
781	auto range = get_def_range (def);
782
783	auto set = dyn_cast<set_info *> (p: def);
784	if (!set \|\| !set->has_any_uses ())
785	return range;
786
787	auto use = set->first_nondebug_insn_use ();
788	if (use)
789	range = move_earlier_than (range, insn: use->insn ());
790
791	return range;
792	}
793
794	// Given a def (of memory), return the upwards range within which we can
795	// safely move this def.
796	static insn_range_info
797	def_upwards_move_range (def_info *def)
798	{
799	def_info *prev = def->prev_def ();
800	insn_range_info range { prev->insn (), def->insn () };
801
802	auto set = dyn_cast<set_info *> (p: prev);
803	if (!set \|\| !set->has_any_uses ())
804	return range;
805
806	auto use = set->last_nondebug_insn_use ();
807	if (use)
808	range = move_later_than (range, insn: use->insn ());
809
810	return range;
811	}
812
813	// Class that implements a state machine for building the changes needed to form
814	// a store pair instruction. This allows us to easily build the changes in
815	// program order, as required by rtl-ssa.
816	struct store_change_builder
817	{
818	enum class state
819	{
820	FIRST,
821	INSERT,
822	FIXUP_USE,
823	LAST,
824	DONE
825	};
826
827	enum class action
828	{
829	TOMBSTONE,
830	CHANGE,
831	INSERT,
832	FIXUP_USE
833	};
834
835	struct change
836	{
837	action type;
838	insn_info *insn;
839	};
840
841	bool done () const { return m_state == state::DONE; }
842
843	store_change_builder (insn_info *insns[`2`],
844	insn_info *repurpose,
845	insn_info *dest)
846	: m_state (state::FIRST), m_insns { insns[`0`], insns[`1`] },
847	m_repurpose (repurpose), m_dest (dest), m_use (nullptr) {}
848
849	change get_change () const
850	{
851	switch (m_state)
852	{
853	case state::FIRST:
854	return {
855	.type: m_insns[`0`] == m_repurpose ? action::CHANGE : action::TOMBSTONE,
856	.insn: m_insns[`0`]
857	};
858	case state::LAST:
859	return {
860	.type: m_insns[`1`] == m_repurpose ? action::CHANGE : action::TOMBSTONE,
861	.insn: m_insns[`1`]
862	};
863	case state::INSERT:
864	return { .type: action::INSERT, .insn: m_dest };
865	case state::FIXUP_USE:
866	return { .type: action::FIXUP_USE, .insn: m_use->insn () };
867	case state::DONE:
868	break;
869	}
870
871	gcc_unreachable ();
872	}
873
874	// Transition to the next state.
875	void advance ()
876	{
877	switch (m_state)
878	{
879	case state::FIRST:
880	if (m_repurpose)
881	m_state = state::LAST;
882	else
883	m_state = state::INSERT;
884	break;
885	case state::INSERT:
886	{
887	def_info *def = memory_access (accesses: m_insns[`0`]->defs ());
888	while (def->next_def ()->insn () <= m_dest)
889	def = def->next_def ();
890
891	// Now we know DEF feeds the insertion point for the new stp.
892	// Look for any uses of DEF that will consume the new stp.
893	gcc_assert (def->insn () <= m_dest
894	&& def->next_def ()->insn () > m_dest);
895
896	auto set = as_a<set_info *> (p: def);
897	for (auto use : set->nondebug_insn_uses ())
898	if (use->insn () > m_dest)
899	{
900	m_use = use;
901	break;
902	}
903
904	if (m_use)
905	m_state = state::FIXUP_USE;
906	else
907	m_state = state::LAST;
908	break;
909	}
910	case state::FIXUP_USE:
911	m_use = m_use->next_nondebug_insn_use ();
912	if (!m_use)
913	m_state = state::LAST;
914	break;
915	case state::LAST:
916	m_state = state::DONE;
917	break;
918	case state::DONE:
919	gcc_unreachable ();
920	}
921	}
922
923	private:
924	state m_state;
925
926	// Original candidate stores.
927	insn_info *m_insns[`2`];
928
929	// If non-null, this is a candidate insn to change into an stp. Otherwise we
930	// are deleting both original insns and inserting a new insn for the stp.
931	insn_info *m_repurpose;
932
933	// Destionation of the stp, it will be placed immediately after m_dest.
934	insn_info *m_dest;
935
936	// Current nondebug use that needs updating due to stp insertion.
937	use_info *m_use;
938	};
939
940	// Given candidate store insns FIRST and SECOND, see if we can re-purpose one
941	// of them (together with its def of memory) for the stp insn. If so, return
942	// that insn. Otherwise, return null.
943	static insn_info *
944	try_repurpose_store (insn_info *first,
945	insn_info *second,
946	const insn_range_info &move_range)
947	{
948	def_info * const defs[`2`] = {
949	memory_access (accesses: first->defs ()),
950	memory_access (accesses: second->defs ())
951	};
952
953	if (move_range.includes (insn: first)
954	\|\| ranges_overlap_p (r1: move_range, r2: def_downwards_move_range (def: defs[`0`])))
955	return first;
956
957	if (move_range.includes (insn: second)
958	\|\| ranges_overlap_p (r1: move_range, r2: def_upwards_move_range (def: defs[`1`])))
959	return second;
960
961	return nullptr;
962	}
963
964	// Generate the RTL pattern for a "tombstone"; used temporarily during this pass
965	// to replace stores that are marked for deletion where we can't immediately
966	// delete the store (since there are uses of mem hanging off the store).
967	//
968	// These are deleted at the end of the pass and uses re-parented appropriately
969	// at this point.
970	static rtx
971	gen_tombstone (void)
972	{
973	return gen_rtx_CLOBBER (VOIDmode,
974	gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)));
975	}
976
977	// Go through the reg notes rooted at NOTE, dropping those that we should drop,
978	// and preserving those that we want to keep by prepending them to (and
979	// returning) RESULT. EH_REGION is used to make sure we have at most one
980	// REG_EH_REGION note in the resulting list. FR_EXPR is used to return any
981	// REG_FRAME_RELATED_EXPR note we find, as these can need special handling in
982	// combine_reg_notes.
983	static rtx
984	filter_notes (rtx note, rtx result, bool eh_region, rtx fr_expr)
985	{
986	for (; note; note = XEXP (note, `1`))
987	{
988	switch (REG_NOTE_KIND (note))
989	{
990	case REG_DEAD:
991	// REG_DEAD notes aren't required to be maintained.
992	case REG_EQUAL:
993	case REG_EQUIV:
994	case REG_UNUSED:
995	case REG_NOALIAS:
996	// These can all be dropped. For REG_EQU{AL,IV} they cannot apply to
997	// non-single_set insns, and REG_UNUSED is re-computed by RTl-SSA, see
998	// rtl-ssa/changes.cc:update_notes.
999	//
1000	// Similarly, REG_NOALIAS cannot apply to a parallel.
1001	case REG_INC:
1002	// When we form the pair insn, the reg update is implemented
1003	// as just another SET in the parallel, so isn't really an
1004	// auto-increment in the RTL sense, hence we drop the note.
1005	break;
1006	case REG_EH_REGION:
1007	gcc_assert (!*eh_region);
1008	eh_region = true*;
1009	result = alloc_reg_note (REG_EH_REGION, XEXP (note, `0`), result);
1010	break;
1011	case REG_CFA_DEF_CFA:
1012	case REG_CFA_OFFSET:
1013	case REG_CFA_RESTORE:
1014	result = alloc_reg_note (REG_NOTE_KIND (note),
1015	copy_rtx (XEXP (note, `0`)),
1016	result);
1017	break;
1018	case REG_FRAME_RELATED_EXPR:
1019	gcc_assert (!*fr_expr);
1020	*fr_expr = copy_rtx (XEXP (note, `0`));
1021	break;
1022	default:
1023	// Unexpected REG_NOTE kind.
1024	gcc_unreachable ();
1025	}
1026	}
1027
1028	return result;
1029	}
1030
1031	// Return the notes that should be attached to a combination of I1 and I2, where
1032	// I1 < I2. LOAD_P is true for loads.
1033	static rtx
1034	combine_reg_notes (insn_info i1, insn_info i2, bool load_p)
1035	{
1036	// Temporary storage for REG_FRAME_RELATED_EXPR notes.
1037	rtx fr_expr[`2`] = {};
1038
1039	bool found_eh_region = false;
1040	rtx result = NULL_RTX;
1041	result = filter_notes (REG_NOTES (i2->rtl ()), result,
1042	eh_region: &found_eh_region, fr_expr: fr_expr + `1`);
1043	result = filter_notes (REG_NOTES (i1->rtl ()), result,
1044	eh_region: &found_eh_region, fr_expr);
1045
1046	if (!load_p)
1047	{
1048	// Simple frame-related sp-relative saves don't need CFI notes, but when
1049	// we combine them into an stp we will need a CFI note as dwarf2cfi can't
1050	// interpret the unspec pair representation directly.
1051	if (RTX_FRAME_RELATED_P (i1->rtl ()) && !fr_expr[`0`])
1052	fr_expr[`0`] = copy_rtx (PATTERN (insn: i1->rtl ()));
1053	if (RTX_FRAME_RELATED_P (i2->rtl ()) && !fr_expr[`1`])
1054	fr_expr[`1`] = copy_rtx (PATTERN (insn: i2->rtl ()));
1055	}
1056
1057	rtx fr_pat = NULL_RTX;
1058	if (fr_expr[`0`] && fr_expr[`1`])
1059	{
1060	// Combining two frame-related insns, need to construct
1061	// a REG_FRAME_RELATED_EXPR note which represents the combined
1062	// operation.
1063	RTX_FRAME_RELATED_P (fr_expr[`1`]) = `1`;
1064	fr_pat = gen_rtx_PARALLEL (VOIDmode,
1065	gen_rtvec (`2`, fr_expr[`0`], fr_expr[`1`]));
1066	}
1067	else
1068	fr_pat = fr_expr[`0`] ? fr_expr[`0`] : fr_expr[`1`];
1069
1070	if (fr_pat)
1071	result = alloc_reg_note (REG_FRAME_RELATED_EXPR,
1072	fr_pat, result);
1073
1074	return result;
1075	}
1076
1077	// Given two memory accesses in PATS, at least one of which is of a
1078	// writeback form, extract two non-writeback memory accesses addressed
1079	// relative to the initial value of the base register, and output these
1080	// in PATS. Return an rtx that represents the overall change to the
1081	// base register.
1082	static rtx
1083	extract_writebacks (bool load_p, rtx pats[`2`], int changed)
1084	{
1085	rtx base_reg = NULL_RTX;
1086	poly_int64 current_offset = `0`;
1087
1088	poly_int64 offsets[`2`];
1089
1090	for (int i = `0`; i < `2`; i++)
1091	{
1092	rtx mem = XEXP (pats[i], load_p);
1093	rtx reg = XEXP (pats[i], !load_p);
1094
1095	rtx addr = XEXP (mem, `0`);
1096	const bool autoinc_p = GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC;
1097
1098	poly_int64 offset;
1099	rtx this_base = pair_mem_strip_offset (mem, offset: &offset);
1100	gcc_assert (REG_P (this_base));
1101	if (base_reg)
1102	gcc_assert (rtx_equal_p (base_reg, this_base));
1103	else
1104	base_reg = this_base;
1105
1106	// If we changed base for the current insn, then we already
1107	// derived the correct mem for this insn from the effective
1108	// address of the other access.
1109	if (i == changed)
1110	{
1111	gcc_checking_assert (!autoinc_p);
1112	offsets[i] = offset;
1113	continue;
1114	}
1115
1116	if (autoinc_p && any_pre_modify_p (x: addr))
1117	current_offset += offset;
1118
1119	poly_int64 this_off = current_offset;
1120	if (!autoinc_p)
1121	this_off += offset;
1122
1123	offsets[i] = this_off;
1124	rtx new_mem = change_address (mem, GET_MODE (mem),
1125	plus_constant (GET_MODE (base_reg),
1126	base_reg, this_off));
1127	pats[i] = load_p
1128	? gen_rtx_SET (reg, new_mem)
1129	: gen_rtx_SET (new_mem, reg);
1130
1131	if (autoinc_p && any_post_modify_p (x: addr))
1132	current_offset += offset;
1133	}
1134
1135	if (known_eq (current_offset, `0`))
1136	return NULL_RTX;
1137
1138	return gen_rtx_SET (base_reg, plus_constant (GET_MODE (base_reg),
1139	base_reg, current_offset));
1140	}
1141
1142	// INSNS contains either {nullptr, pair insn} (when promoting an existing
1143	// non-writeback pair) or contains the candidate insns used to form the pair
1144	// (when fusing a new pair).
1145	//
1146	// PAIR_RANGE specifies where we want to form the final pair.
1147	// INITIAL_OFFSET gives the current base offset for the pair.
1148	// Bit I of INITIAL_WRITEBACK is set if INSNS[I] initially had writeback.
1149	// ACCESS_SIZE gives the access size for a single arm of the pair.
1150	// BASE_DEF gives the initial def of the base register consumed by the pair.
1151	//
1152	// Given the above, this function looks for a trailing destructive update of the
1153	// base register. If there is one, we choose the first such update after
1154	// PAIR_DST that is still in the same BB as our pair. We return the new def in
1155	// ADD_DEF and the resulting writeback effect in WRITEBACK_EFFECT.
1156	insn_info *
1157	pair_fusion::find_trailing_add (insn_info *insns[`2`],
1158	const insn_range_info &pair_range,
1159	int initial_writeback,
1160	rtx *writeback_effect,
1161	def_info **add_def,
1162	def_info *base_def,
1163	poly_int64 initial_offset,
1164	unsigned access_size)
1165	{
1166	// Punt on frame-related insns, it is better to be conservative and
1167	// not try to form writeback pairs here, and means we don't have to
1168	// worry about the writeback case in forming REG_FRAME_RELATED_EXPR
1169	// notes (see combine_reg_notes).
1170	if ((insns[`0`] && RTX_FRAME_RELATED_P (insns[`0`]->rtl ()))
1171	\|\| RTX_FRAME_RELATED_P (insns[`1`]->rtl ()))
1172	return nullptr;
1173
1174	insn_info *pair_dst = pair_range.singleton ();
1175	gcc_assert (pair_dst);
1176
1177	def_info *def = base_def->next_def ();
1178
1179	// In the case that either of the initial pair insns had writeback,
1180	// then there will be intervening defs of the base register.
1181	// Skip over these.
1182	for (int i = `0`; i < `2`; i++)
1183	if (initial_writeback & (`1` << i))
1184	{
1185	gcc_assert (def->insn () == insns[i]);
1186	def = def->next_def ();
1187	}
1188
1189	if (!def \|\| def->bb () != pair_dst->bb ())
1190	return nullptr;
1191
1192	// DEF should now be the first def of the base register after PAIR_DST.
1193	insn_info *cand = def->insn ();
1194	gcc_assert (cand > pair_dst);
1195
1196	const auto base_regno = base_def->regno ();
1197
1198	// If CAND doesn't also use our base register,
1199	// it can't destructively update it.
1200	if (!find_access (accesses: cand->uses (), regno: base_regno))
1201	return nullptr;
1202
1203	auto rti = cand->rtl ();
1204
1205	if (!INSN_P (rti))
1206	return nullptr;
1207
1208	auto pat = PATTERN (insn: rti);
1209	if (GET_CODE (pat) != SET)
1210	return nullptr;
1211
1212	auto dest = XEXP (pat, `0`);
1213	if (!REG_P (dest) \|\| REGNO (dest) != base_regno)
1214	return nullptr;
1215
1216	poly_int64 offset;
1217	rtx rhs_base = strip_offset (XEXP (pat, `1`), &offset);
1218	if (!REG_P (rhs_base)
1219	\|\| REGNO (rhs_base) != base_regno
1220	\|\| !offset.is_constant ())
1221	return nullptr;
1222
1223	// If the initial base offset is zero, we can handle any add offset
1224	// (post-inc). Otherwise, we require the offsets to match (pre-inc).
1225	if (!known_eq (initial_offset, `0`) && !known_eq (offset, initial_offset))
1226	return nullptr;
1227
1228	auto off_hwi = offset.to_constant ();
1229
1230	if (off_hwi % access_size != `0`)
1231	return nullptr;
1232
1233	off_hwi /= access_size;
1234
1235	if (!pair_mem_in_range_p (offset: off_hwi))
1236	return nullptr;
1237
1238	auto dump_prefix = [&]()
1239	{
1240	if (!insns[`0`])
1241	fprintf (stream: dump_file, format: "existing pair i%d: ", insns[`1`]->uid ());
1242	else
1243	fprintf (stream: dump_file, format: " (%d,%d)",
1244	insns[`0`]->uid (), insns[`1`]->uid ());
1245	};
1246
1247	insn_info hazard = latest_hazard_before (insn: cand, ignore: nullptr, ignore_insn: insns[`1`], is_load_store: false*);
1248	if (!hazard \|\| hazard <= pair_dst)
1249	{
1250	if (dump_file)
1251	{
1252	dump_prefix ();
1253	fprintf (stream: dump_file,
1254	format: "folding in trailing add (%d) to use writeback form\n",
1255	cand->uid ());
1256	}
1257
1258	*add_def = def;
1259	*writeback_effect = copy_rtx (pat);
1260	return cand;
1261	}
1262
1263	if (dump_file)
1264	{
1265	dump_prefix ();
1266	fprintf (stream: dump_file,
1267	format: "can't fold in trailing add (%d), hazard = %d\n",
1268	cand->uid (), hazard->uid ());
1269	}
1270
1271	return nullptr;
1272	}
1273
1274	// We just emitted a tombstone with uid UID, track it in a bitmap for
1275	// this BB so we can easily identify it later when cleaning up tombstones.
1276	void
1277	pair_fusion_bb_info::track_tombstone (int uid)
1278	{
1279	if (!m_emitted_tombstone)
1280	{
1281	// Lazily initialize the bitmap for tracking tombstone insns.
1282	bitmap_obstack_initialize (&m_bitmap_obstack);
1283	bitmap_initialize (head: &m_tombstone_bitmap, obstack: &m_bitmap_obstack);
1284	m_emitted_tombstone = true;
1285	}
1286
1287	if (!bitmap_set_bit (&m_tombstone_bitmap, uid))
1288	gcc_unreachable (); // Bit should have changed.
1289	}
1290
1291	// Reset the debug insn containing USE (the debug insn has been
1292	// optimized away).
1293	static void
1294	reset_debug_use (use_info *use)
1295	{
1296	auto use_insn = use->insn ();
1297	auto use_rtl = use_insn->rtl ();
1298	insn_change change (use_insn);
1299	change.new_uses = {};
1300	INSN_VAR_LOCATION_LOC (use_rtl) = gen_rtx_UNKNOWN_VAR_LOC ();
1301	crtl->ssa->change_insn (change);
1302	}
1303
1304	// USE is a debug use that needs updating because DEF (a def of the same
1305	// register) is being re-ordered over it. If BASE is non-null, then DEF
1306	// is an update of the register BASE by a constant, given by WB_OFFSET,
1307	// and we can preserve debug info by accounting for the change in side
1308	// effects.
1309	static void
1310	fixup_debug_use (obstack_watermark &attempt,
1311	use_info *use,
1312	def_info *def,
1313	rtx base,
1314	poly_int64 wb_offset)
1315	{
1316	auto use_insn = use->insn ();
1317	if (base)
1318	{
1319	auto use_rtl = use_insn->rtl ();
1320	insn_change change (use_insn);
1321
1322	gcc_checking_assert (REG_P (base) && use->regno () == REGNO (base));
1323	change.new_uses = check_remove_regno_access (watermark&: attempt,
1324	accesses: change.new_uses,
1325	regno: use->regno ());
1326
1327	// The effect of the writeback is to add WB_OFFSET to BASE. If
1328	// we're re-ordering DEF below USE, then we update USE by adding
1329	// WB_OFFSET to it. Otherwise, if we're re-ordering DEF above
1330	// USE, we update USE by undoing the effect of the writeback
1331	// (subtracting WB_OFFSET).
1332	use_info *new_use;
1333	if (def->insn () > use_insn)
1334	{
1335	// We now need USE_INSN to consume DEF. Create a new use of DEF.
1336	//
1337	// N.B. this means until we call change_insns for the main change
1338	// group we will temporarily have a debug use consuming a def that
1339	// comes after it, but RTL-SSA doesn't currently support updating
1340	// debug insns as part of the main change group (together with
1341	// nondebug changes), so we will have to live with this update
1342	// leaving the IR being temporarily inconsistent. It seems to
1343	// work out OK once the main change group is applied.
1344	wb_offset *= -`1`;
1345	new_use = crtl->ssa->create_use (watermark&: attempt,
1346	insn: use_insn,
1347	set: as_a<set_info *> (p: def));
1348	}
1349	else
1350	new_use = find_access (accesses: def->insn ()->uses (), regno: use->regno ());
1351
1352	change.new_uses = insert_access (watermark&: attempt, access1: new_use, accesses2: change.new_uses);
1353
1354	if (dump_file)
1355	{
1356	const char dir = (def->insn () < *use_insn) ? "down" : "up";
1357	pretty_printer pp;
1358	pp_string (&pp, "[");
1359	pp_access (&pp, use, flags: `0`);
1360	pp_string (&pp, "]");
1361	pp_string (&pp, " due to wb def ");
1362	pp_string (&pp, "[");
1363	pp_access (&pp, def, flags: `0`);
1364	pp_string (&pp, "]");
1365	fprintf (stream: dump_file,
1366	format: " i%d: fix up debug use %s re-ordered %s, "
1367	"sub r%u -> r%u + ",
1368	use_insn->uid (), pp_formatted_text (&pp),
1369	dir, REGNO (base), REGNO (base));
1370	print_dec (value: wb_offset, file: dump_file);
1371	fprintf (stream: dump_file, format: "\n");
1372	}
1373
1374	insn_propagation prop (use_rtl, base,
1375	plus_constant (GET_MODE (base), base, wb_offset));
1376	if (prop.apply_to_pattern (&INSN_VAR_LOCATION_LOC (use_rtl)))
1377	crtl->ssa->change_insn (change);
1378	else
1379	{
1380	if (dump_file)
1381	fprintf (stream: dump_file, format: " i%d: RTL substitution failed (%s)"
1382	", resetting debug insn", use_insn->uid (),
1383	prop.failure_reason);
1384	reset_debug_use (use);
1385	}
1386	}
1387	else
1388	{
1389	if (dump_file)
1390	{
1391	pretty_printer pp;
1392	pp_string (&pp, "[");
1393	pp_access (&pp, use, flags: `0`);
1394	pp_string (&pp, "] due to re-ordered load def [");
1395	pp_access (&pp, def, flags: `0`);
1396	pp_string (&pp, "]");
1397	fprintf (stream: dump_file, format: " i%d: resetting debug use %s\n",
1398	use_insn->uid (), pp_formatted_text (&pp));
1399	}
1400	reset_debug_use (use);
1401	}
1402	}
1403
1404	// Update debug uses when folding in a trailing add insn to form a
1405	// writeback pair.
1406	//
1407	// ATTEMPT is used to allocate RTL-SSA temporaries for the changes,
1408	// the final pair is placed immediately after PAIR_DST, TRAILING_ADD
1409	// is a trailing add insn which is being folded into the pair to make it
1410	// use writeback addressing, and WRITEBACK_EFFECT is the pattern for
1411	// TRAILING_ADD.
1412	static void
1413	fixup_debug_uses_trailing_add (obstack_watermark &attempt,
1414	insn_info *pair_dst,
1415	insn_info *trailing_add,
1416	rtx writeback_effect)
1417	{
1418	rtx base = SET_DEST (writeback_effect);
1419
1420	poly_int64 wb_offset;
1421	rtx base2 = strip_offset (SET_SRC (writeback_effect), &wb_offset);
1422	gcc_checking_assert (rtx_equal_p (base, base2));
1423
1424	auto defs = trailing_add->defs ();
1425	gcc_checking_assert (defs.size () == `1`);
1426	def_info *def = defs [`0`];
1427
1428	if (auto set = safe_dyn_cast<set_info *> (p: def->prev_def ()))
1429	for (auto use : iterate_safely (range: set->debug_insn_uses ()))
1430	if (use->insn () > pair_dst)
1431	// DEF is getting re-ordered above USE, fix up USE accordingly.
1432	fixup_debug_use (attempt, use, def, base, wb_offset);
1433	}
1434
1435	// Called from fuse_pair, fixes up any debug uses that will be affected
1436	// by the changes.
1437	//
1438	// ATTEMPT is the obstack watermark used to allocate RTL-SSA temporaries for
1439	// the changes, INSNS gives the candidate insns: at this point the use/def
1440	// information should still be as on entry to fuse_pair, but the patterns may
1441	// have changed, hence we pass ORIG_RTL which contains the original patterns
1442	// for the candidate insns.
1443	//
1444	// The final pair will be placed immediately after PAIR_DST, LOAD_P is true if
1445	// it is a load pair, bit I of WRITEBACK is set if INSNS[I] originally had
1446	// writeback, and WRITEBACK_EFFECT is an rtx describing the overall update to
1447	// the base register in the final pair (if any). BASE_REGNO gives the register
1448	// number of the base register used in the final pair.
1449	static void
1450	fixup_debug_uses (obstack_watermark &attempt,
1451	insn_info *insns[`2`],
1452	rtx orig_rtl[`2`],
1453	insn_info *pair_dst,
1454	insn_info *trailing_add,
1455	bool load_p,
1456	int writeback,
1457	rtx writeback_effect,
1458	unsigned base_regno)
1459	{
1460	// USE is a debug use that needs updating because DEF (a def of the
1461	// resource) is being re-ordered over it. If WRITEBACK_PAT is non-NULL,
1462	// then it gives the original RTL pattern for DEF's insn, and DEF is a
1463	// writeback update of the base register.
1464	//
1465	// This simply unpacks WRITEBACK_PAT if needed and calls fixup_debug_use.
1466	auto update_debug_use = [&](use_info use, def_info def,
1467	rtx writeback_pat)
1468	{
1469	poly_int64 offset = `0`;
1470	rtx base = NULL_RTX;
1471	if (writeback_pat)
1472	{
1473	rtx mem = XEXP (writeback_pat, load_p);
1474	gcc_checking_assert (GET_RTX_CLASS (GET_CODE (XEXP (mem, `0`)))
1475	== RTX_AUTOINC);
1476
1477	base = pair_mem_strip_offset (mem, offset: &offset);
1478	gcc_checking_assert (REG_P (base) && REGNO (base) == base_regno);
1479	}
1480	fixup_debug_use (attempt, use, def, base, wb_offset: offset);
1481	};
1482
1483	// Reset any debug uses of mem over which we re-ordered a store.
1484	//
1485	// It would be nice to try and preserve debug info here, but it seems that
1486	// would require doing alias analysis to see if the store aliases with the
1487	// debug use, which seems a little extravagant just to preserve debug info.
1488	if (!load_p)
1489	{
1490	auto def = memory_access (accesses: insns[`0`]->defs ());
1491	auto last_def = memory_access (accesses: insns[`1`]->defs ());
1492	for (; def != last_def; def = def->next_def ())
1493	{
1494	auto set = as_a<set_info *> (p: def);
1495	for (auto use : iterate_safely (range: set->debug_insn_uses ()))
1496	{
1497	if (dump_file)
1498	fprintf (stream: dump_file, format: " i%d: resetting debug use of mem\n",
1499	use->insn ()->uid ());
1500	reset_debug_use (use);
1501	}
1502	}
1503	}
1504
1505	// Now let's take care of register uses, starting with debug uses
1506	// attached to defs from our first insn.
1507	for (auto def : insns[`0`]->defs ())
1508	{
1509	auto set = dyn_cast<set_info *> (p: def);
1510	if (!set \|\| set->is_mem () \|\| !set->first_debug_insn_use ())
1511	continue;
1512
1513	def_info *defs[`2`] = {
1514	def,
1515	find_access (accesses: insns[`1`]->defs (), regno: def->regno ())
1516	};
1517
1518	rtx writeback_pats[`2`] = {};
1519	if (def->regno () == base_regno)
1520	for (int i = `0`; i < `2`; i++)
1521	if (writeback & (`1` << i))
1522	{
1523	gcc_checking_assert (defs[i]);
1524	writeback_pats[i] = orig_rtl[i];
1525	}
1526
1527	// Now that we've characterized the defs involved, go through the
1528	// debug uses and determine how to update them (if needed).
1529	for (auto use : iterate_safely (range: set->debug_insn_uses ()))
1530	{
1531	if (pair_dst < use->insn () && defs[`1`])
1532	// We're re-ordering defs[1] above a previous use of the
1533	// same resource.
1534	update_debug_use (use, defs[`1`], writeback_pats[`1`]);
1535	else if (pair_dst >= use->insn ())
1536	// We're re-ordering defs[0] below its use.
1537	update_debug_use (use, defs[`0`], writeback_pats[`0`]);
1538	}
1539	}
1540
1541	// Now let's look at registers which are def'd by the second insn
1542	// but not by the first insn, there may still be debug uses of a
1543	// previous def which can be affected by moving the second insn up.
1544	for (auto def : insns[`1`]->defs ())
1545	{
1546	// This should be M log N where N is the number of defs in
1547	// insns[0] and M is the number of defs in insns[1].
1548	if (def->is_mem () \|\| find_access (accesses: insns[`0`]->defs (), regno: def->regno ()))
1549	continue;
1550
1551	auto prev_set = safe_dyn_cast<set_info *> (p: def->prev_def ());
1552	if (!prev_set)
1553	continue;
1554
1555	rtx writeback_pat = NULL_RTX;
1556	if (def->regno () == base_regno && (writeback & `2`))
1557	writeback_pat = orig_rtl[`1`];
1558
1559	// We have a def in insns[1] which isn't def'd by the first insn.
1560	// Look to the previous def and see if it has any debug uses.
1561	for (auto use : iterate_safely (range: prev_set->debug_insn_uses ()))
1562	if (pair_dst < use->insn ())
1563	// We're ordering DEF above a previous use of the same register.
1564	update_debug_use (use, def, writeback_pat);
1565	}
1566
1567	if ((writeback & `2`) && !writeback_effect)
1568	{
1569	// If the second insn initially had writeback but the final
1570	// pair does not, then there may be trailing debug uses of the
1571	// second writeback def which need re-parenting: do that.
1572	auto def = find_access (accesses: insns[`1`]->defs (), regno: base_regno);
1573	gcc_assert (def);
1574	auto set = as_a<set_info *> (p: def);
1575	for (auto use : iterate_safely (range: set->debug_insn_uses ()))
1576	{
1577	insn_change change (use->insn ());
1578	change.new_uses = check_remove_regno_access (watermark&: attempt,
1579	accesses: change.new_uses,
1580	regno: base_regno);
1581	auto new_use = find_access (accesses: insns[`0`]->uses (), regno: base_regno);
1582
1583	// N.B. insns must have already shared a common base due to writeback.
1584	gcc_assert (new_use);
1585
1586	if (dump_file)
1587	fprintf (stream: dump_file,
1588	format: " i%d: cancelling wb, re-parenting trailing debug use\n",
1589	use->insn ()->uid ());
1590
1591	change.new_uses = insert_access (watermark&: attempt, access1: new_use, accesses2: change.new_uses);
1592	crtl->ssa->change_insn (change);
1593	}
1594	}
1595	else if (trailing_add)
1596	fixup_debug_uses_trailing_add (attempt, pair_dst, trailing_add,
1597	writeback_effect);
1598	}
1599
1600	// Try and actually fuse the pair given by insns I1 and I2.
1601	//
1602	// Here we've done enough analysis to know this is safe, we only
1603	// reject the pair at this stage if either the tuning policy says to,
1604	// or recog fails on the final pair insn.
1605	//
1606	// LOAD_P is true for loads, ACCESS_SIZE gives the access size of each
1607	// candidate insn. Bit i of WRITEBACK is set if the ith insn (in program
1608	// order) uses writeback.
1609	//
1610	// BASE gives the chosen base candidate for the pair and MOVE_RANGE is
1611	// a singleton range which says where to place the pair.
1612	bool
1613	pair_fusion_bb_info::fuse_pair (bool load_p,
1614	unsigned access_size,
1615	int writeback,
1616	insn_info i1, insn_info i2,
1617	base_cand &base,
1618	const insn_range_info &move_range)
1619	{
1620	auto attempt = crtl->ssa->new_change_attempt ();
1621
1622	auto make_change = [&attempt](insn_info *insn)
1623	{
1624	return crtl->ssa->change_alloc<insn_change> (wm&: attempt, args: insn);
1625	};
1626	auto make_delete = [&attempt](insn_info *insn)
1627	{
1628	return crtl->ssa->change_alloc<insn_change> (wm&: attempt,
1629	args: insn,
1630	args: insn_change::DELETE);
1631	};
1632
1633	insn_info first = (i1 < *i2) ? i1 : i2;
1634	insn_info *second = (first == i1) ? i2 : i1;
1635
1636	insn_info *pair_dst = move_range.singleton ();
1637	gcc_assert (pair_dst);
1638
1639	insn_info *insns[`2`] = { first, second };
1640
1641	auto_vec<insn_change *> changes;
1642	auto_vec<int, `3`> tombstone_uids;
1643
1644	rtx pats[`2`] = {
1645	PATTERN (insn: first->rtl ()),
1646	PATTERN (insn: second->rtl ())
1647	};
1648
1649	// Make copies of the patterns as we might need to refer to the original RTL
1650	// later, for example when updating debug uses (which is after we've updated
1651	// one or both of the patterns in the candidate insns).
1652	rtx orig_rtl[`2`];
1653	for (int i = `0`; i < `2`; i++)
1654	orig_rtl[i] = copy_rtx (pats[i]);
1655
1656	use_array input_uses[`2`] = { first->uses (), second->uses () };
1657	def_array input_defs[`2`] = { first->defs (), second->defs () };
1658
1659	int changed_insn = -`1`;
1660	if (base.from_insn != -`1`)
1661	{
1662	// If we're not already using a shared base, we need
1663	// to re-write one of the accesses to use the base from
1664	// the other insn.
1665	gcc_checking_assert (base.from_insn == `0` \|\| base.from_insn == `1`);
1666	changed_insn = !base.from_insn;
1667
1668	rtx base_pat = pats[base.from_insn];
1669	rtx change_pat = pats[changed_insn];
1670	rtx base_mem = XEXP (base_pat, load_p);
1671	rtx change_mem = XEXP (change_pat, load_p);
1672
1673	const bool lower_base_p = (insns[base.from_insn] == i1);
1674	HOST_WIDE_INT adjust_amt = access_size;
1675	if (!lower_base_p)
1676	adjust_amt *= -`1`;
1677
1678	rtx change_reg = XEXP (change_pat, !load_p);
1679	rtx effective_base = drop_writeback (mem: base_mem);
1680	rtx adjusted_addr = plus_constant (Pmode,
1681	XEXP (effective_base, `0`),
1682	adjust_amt);
1683	rtx new_mem = replace_equiv_address_nv (change_mem, adjusted_addr);
1684	rtx new_set = load_p
1685	? gen_rtx_SET (change_reg, new_mem)
1686	: gen_rtx_SET (new_mem, change_reg);
1687
1688	pats[changed_insn] = new_set;
1689
1690	auto keep_use = [&](use_info *u)
1691	{
1692	return refers_to_regno_p (u->regno (), u->regno () + `1`,
1693	change_pat, &XEXP (change_pat, load_p));
1694	};
1695
1696	// Drop any uses that only occur in the old address.
1697	input_uses[changed_insn] = filter_accesses (watermark&: attempt,
1698	accesses: input_uses[changed_insn],
1699	predicate: keep_use);
1700	}
1701
1702	rtx writeback_effect = NULL_RTX;
1703	if (writeback)
1704	writeback_effect = extract_writebacks (load_p, pats, changed: changed_insn);
1705
1706	const auto base_regno = base.def->regno ();
1707
1708	if (base.from_insn == -`1` && (writeback & `1`))
1709	{
1710	// If the first of the candidate insns had a writeback form, we'll need to
1711	// drop the use of the updated base register from the second insn's uses.
1712	//
1713	// N.B. we needn't worry about the base register occurring as a store
1714	// operand, as we checked that there was no non-address true dependence
1715	// between the insns in try_fuse_pair.
1716	gcc_checking_assert (find_access (input_uses[`1`], base_regno));
1717	input_uses[`1`] = check_remove_regno_access (watermark&: attempt,
1718	accesses: input_uses[`1`],
1719	regno: base_regno);
1720	}
1721
1722	// Go through and drop uses that only occur in register notes,
1723	// as we won't be preserving those.
1724	for (int i = `0`; i < `2`; i++)
1725	{
1726	auto rti = insns[i]->rtl ();
1727	if (!REG_NOTES (rti))
1728	continue;
1729
1730	input_uses[i] = remove_note_accesses (watermark&: attempt, accesses: input_uses[i]);
1731	}
1732
1733	// Get the uses that the pair instruction would have, and punt if
1734	// the unpaired instructions use different definitions of the same
1735	// register. That would normally be caught as a side-effect of
1736	// hazard detection below, but this check also deals with cases
1737	// in which one use is undefined and the other isn't.
1738	auto new_uses = merge_access_arrays (watermark&: attempt,
1739	accesses1: drop_memory_access (accesses: input_uses[`0`]),
1740	accesses2: drop_memory_access (accesses: input_uses[`1`]));
1741	if (!new_uses.is_valid ())
1742	{
1743	if (dump_file)
1744	fprintf (stream: dump_file,
1745	format: " rejecting pair: i%d and i%d use different definiitions of"
1746	" the same register\n",
1747	insns[`0`]->uid (), insns[`1`]->uid ());
1748	return false;
1749	}
1750
1751	// Edge case: if the first insn is a writeback load and the
1752	// second insn is a non-writeback load which transfers into the base
1753	// register, then we should drop the writeback altogether as the
1754	// update of the base register from the second load should prevail.
1755	//
1756	// For example:
1757	// ldr x2, [x1], #8
1758	// ldr x1, [x1]
1759	// -->
1760	// ldp x2, x1, [x1]
1761	if (writeback == `1`
1762	&& load_p
1763	&& find_access (accesses: input_defs[`1`], regno: base_regno))
1764	{
1765	if (dump_file)
1766	fprintf (stream: dump_file,
1767	format: " load pair: i%d has wb but subsequent i%d has non-wb "
1768	"update of base (r%d), dropping wb\n",
1769	insns[`0`]->uid (), insns[`1`]->uid (), base_regno);
1770	gcc_assert (writeback_effect);
1771	writeback_effect = NULL_RTX;
1772	}
1773
1774	// So far the patterns have been in instruction order,
1775	// now we want them in offset order.
1776	if (i1 != first)
1777	std::swap (a&: pats[`0`], b&: pats[`1`]);
1778
1779	poly_int64 offsets[`2`];
1780	for (int i = `0`; i < `2`; i++)
1781	{
1782	rtx mem = XEXP (pats[i], load_p);
1783	gcc_checking_assert (MEM_P (mem));
1784	rtx base = strip_offset (XEXP (mem, `0`), offsets + i);
1785	gcc_checking_assert (REG_P (base));
1786	gcc_checking_assert (base_regno == REGNO (base));
1787	}
1788
1789	// If either of the original insns had writeback, but the resulting pair insn
1790	// does not (can happen e.g. in the load pair edge case above, or if the
1791	// writeback effects cancel out), then drop the def (s) of the base register
1792	// as appropriate.
1793	//
1794	// Also drop the first def in the case that both of the original insns had
1795	// writeback. The second def could well have uses, but the first def should
1796	// only be used by the second insn (and we dropped that use above).
1797	for (int i = `0`; i < `2`; i++)
1798	if ((!writeback_effect && (writeback & (`1` << i)))
1799	\|\| (i == `0` && writeback == `3`))
1800	input_defs[i] = check_remove_regno_access (watermark&: attempt,
1801	accesses: input_defs[i],
1802	regno: base_regno);
1803
1804	// If we don't currently have a writeback pair, and we don't have
1805	// a load that clobbers the base register, look for a trailing destructive
1806	// update of the base register and try and fold it in to make this into a
1807	// writeback pair.
1808	insn_info trailing_add = nullptr*;
1809	if (m_pass->should_handle_writeback (which: writeback_type::ALL)
1810	&& !writeback_effect
1811	&& (!load_p \|\| (!refers_to_regno_p (base_regno, base_regno + `1`,
1812	XEXP (pats[`0`], `0`), nullptr)
1813	&& !refers_to_regno_p (base_regno, base_regno + `1`,
1814	XEXP (pats[`1`], `0`), nullptr))))
1815	{
1816	def_info *add_def;
1817	trailing_add = m_pass->find_trailing_add (insns, pair_range: move_range, initial_writeback: writeback,
1818	writeback_effect: &writeback_effect,
1819	add_def: &add_def, base_def: base.def, initial_offset: offsets[`0`],
1820	access_size);
1821	if (trailing_add)
1822	{
1823	// The def of the base register from the trailing add should prevail.
1824	input_defs[`0`] = insert_access (watermark&: attempt, access1: add_def, accesses2: input_defs[`0`]);
1825	gcc_assert (input_defs[`0`].is_valid ());
1826	}
1827	}
1828
1829	// Now that we know what base mem we're going to use, check if it's OK
1830	// with the pair mem policy.
1831	rtx first_mem = XEXP (pats[`0`], load_p);
1832	if (!m_pass->pair_mem_ok_with_policy (base_mem: first_mem, load_p))
1833	{
1834	if (dump_file)
1835	fprintf (stream: dump_file,
1836	format: "punting on pair (%d,%d), pair mem policy says no\n",
1837	i1->uid (), i2->uid ());
1838	return false;
1839	}
1840
1841	rtx reg_notes = combine_reg_notes (i1: first, i2: second, load_p);
1842
1843	rtx pair_pat = m_pass->gen_pair (pats, writeback: writeback_effect, load_p);
1844	insn_change pair_change = nullptr*;
1845	auto set_pair_pat = [pair_pat,reg_notes](insn_change *change) {
1846	rtx_insn *rti = change->insn ()->rtl ();
1847	validate_unshare_change (rti, &PATTERN (insn: rti), pair_pat, true);
1848	validate_change (rti, &REG_NOTES (rti), reg_notes, true);
1849	};
1850
1851	// Turn CHANGE into a memory definition tombstone.
1852	auto make_tombstone = [&](insn_change *change)
1853	{
1854	tombstone_uids.quick_push (obj: change->insn ()->uid ());
1855	rtx_insn *rti = change->insn ()->rtl ();
1856	validate_change (rti, &PATTERN (insn: rti), gen_tombstone (), true);
1857	validate_change (rti, &REG_NOTES (rti), NULL_RTX, true);
1858	change->new_uses = use_array ();
1859	};
1860
1861	if (load_p)
1862	{
1863	changes.safe_push (obj: make_delete (first));
1864	pair_change = make_change (second);
1865	changes.safe_push (obj: pair_change);
1866
1867	pair_change->move_range = move_range;
1868	pair_change->new_defs = merge_access_arrays (watermark&: attempt,
1869	accesses1: input_defs[`0`],
1870	accesses2: input_defs[`1`]);
1871	gcc_assert (pair_change->new_defs.is_valid ());
1872
1873	pair_change->new_uses = new_uses;
1874	set_pair_pat (pair_change);
1875	}
1876	else
1877	{
1878	using Action = store_change_builder::action;
1879	insn_info *store_to_change = try_repurpose_store (first, second,
1880	move_range);
1881	store_change_builder builder (insns, store_to_change, pair_dst);
1882	insn_change *change;
1883	set_info new_set = nullptr*;
1884	for (; !builder.done (); builder.advance ())
1885	{
1886	auto action = builder.get_change ();
1887	change = (action.type == Action::INSERT)
1888	? nullptr : make_change (action.insn);
1889	switch (action.type)
1890	{
1891	case Action::CHANGE:
1892	{
1893	set_pair_pat (change);
1894	change->new_uses = new_uses;
1895	auto d1 = drop_memory_access (accesses: input_defs[`0`]);
1896	auto d2 = drop_memory_access (accesses: input_defs[`1`]);
1897	change->new_defs = merge_access_arrays (watermark&: attempt, accesses1: d1, accesses2: d2);
1898	gcc_assert (change->new_defs.is_valid ());
1899	def_info *store_def = memory_access (accesses: change->insn ()->defs ());
1900	change->new_defs = insert_access (watermark&: attempt,
1901	access1: store_def,
1902	accesses2: change->new_defs);
1903	gcc_assert (change->new_defs.is_valid ());
1904	change->move_range = move_range;
1905	pair_change = change;
1906	break;
1907	}
1908	case Action::TOMBSTONE:
1909	{
1910	make_tombstone (change);
1911	break;
1912	}
1913	case Action::INSERT:
1914	{
1915	if (dump_file)
1916	fprintf (stream: dump_file,
1917	format: " stp: cannot re-purpose candidate stores\n");
1918
1919	auto new_insn = crtl->ssa->create_insn (watermark&: attempt, insn_code: INSN, pat: pair_pat);
1920	change = make_change (new_insn);
1921	change->move_range = move_range;
1922	change->new_uses = new_uses;
1923	gcc_assert (change->new_uses.is_valid ());
1924
1925	auto d1 = drop_memory_access (accesses: input_defs[`0`]);
1926	auto d2 = drop_memory_access (accesses: input_defs[`1`]);
1927	change->new_defs = merge_access_arrays (watermark&: attempt, accesses1: d1, accesses2: d2);
1928	gcc_assert (change->new_defs.is_valid ());
1929
1930	new_set = crtl->ssa->create_set (watermark&: attempt, insn: new_insn, resource: memory);
1931	change->new_defs = insert_access (watermark&: attempt, access1: new_set,
1932	accesses2: change->new_defs);
1933	gcc_assert (change->new_defs.is_valid ());
1934	pair_change = change;
1935	break;
1936	}
1937	case Action::FIXUP_USE:
1938	{
1939	// This use now needs to consume memory from our stp.
1940	if (dump_file)
1941	fprintf (stream: dump_file,
1942	format: " stp: changing i%d to use mem from new stp "
1943	"(after i%d)\n",
1944	action.insn->uid (), pair_dst->uid ());
1945	change->new_uses = drop_memory_access (accesses: change->new_uses);
1946	gcc_assert (new_set);
1947	auto new_use = crtl->ssa->create_use (watermark&: attempt, insn: action.insn,
1948	set: new_set);
1949	change->new_uses = insert_access (watermark&: attempt, access1: new_use,
1950	accesses2: change->new_uses);
1951	break;
1952	}
1953	}
1954	changes.safe_push (obj: change);
1955	}
1956	}
1957
1958	if (trailing_add)
1959	{
1960	if (auto *mem_def = memory_access (accesses: trailing_add->defs()))
1961	{
1962	auto *change = make_change (trailing_add);
1963	change->new_defs = insert_access (watermark&: attempt, access1: mem_def, accesses2: def_array ());
1964	make_tombstone (change);
1965	changes.safe_push (obj: change);
1966	}
1967	else
1968	changes.safe_push (obj: make_delete (trailing_add));
1969	}
1970	else if ((writeback & `2`) && !writeback_effect)
1971	{
1972	// The second insn initially had writeback but now the pair does not,
1973	// need to update any nondebug uses of the base register def in the
1974	// second insn. We'll take care of debug uses later.
1975	auto def = find_access (accesses: insns[`1`]->defs (), regno: base_regno);
1976	gcc_assert (def);
1977	auto set = dyn_cast<set_info *> (p: def);
1978	if (set && set->has_nondebug_uses ())
1979	{
1980	auto orig_use = find_access (accesses: insns[`0`]->uses (), regno: base_regno);
1981	for (auto use : set->nondebug_insn_uses ())
1982	{
1983	auto change = make_change (use->insn ());
1984	change->new_uses = check_remove_regno_access (watermark&: attempt,
1985	accesses: change->new_uses,
1986	regno: base_regno);
1987	change->new_uses = insert_access (watermark&: attempt,
1988	access1: orig_use,
1989	accesses2: change->new_uses);
1990	changes.safe_push (obj: change);
1991	}
1992	}
1993	}
1994
1995	auto ignore = ignore_changing_insns (changes);
1996	for (unsigned i = `0`; i < changes.length (); i++)
1997	gcc_assert (changes[i]->move_range.singleton ()
1998	&& rtl_ssa::restrict_movement (*changes[i], ignore));
1999
2000	// Check the pair pattern is recog'd.
2001	if (!rtl_ssa::recog (watermark&: attempt, change&: *pair_change, ignore))
2002	{
2003	if (dump_file)
2004	fprintf (stream: dump_file, format: " failed to form pair, recog failed\n");
2005
2006	// Free any reg notes we allocated.
2007	while (reg_notes)
2008	{
2009	rtx next = XEXP (reg_notes, `1`);
2010	free_EXPR_LIST_node (reg_notes);
2011	reg_notes = next;
2012	}
2013	cancel_changes (`0`);
2014	return false;
2015	}
2016
2017	gcc_assert (crtl->ssa->verify_insn_changes (changes));
2018
2019	// Fix up any debug uses that will be affected by the changes.
2020	if (MAY_HAVE_DEBUG_INSNS)
2021	fixup_debug_uses (attempt, insns, orig_rtl, pair_dst, trailing_add,
2022	load_p, writeback, writeback_effect, base_regno);
2023
2024	confirm_change_group ();
2025	crtl->ssa->change_insns (changes);
2026
2027	for (auto uid : tombstone_uids)
2028	track_tombstone (uid);
2029
2030	return true;
2031	}
2032
2033	// Return true if STORE_INSN may modify mem rtx MEM. Make sure we keep
2034	// within our BUDGET for alias analysis.
2035	static bool
2036	store_modifies_mem_p (rtx mem, insn_info store_insn, int* &budget)
2037	{
2038	if (!budget)
2039	{
2040	if (dump_file)
2041	{
2042	fprintf (stream: dump_file,
2043	format: "exceeded budget, assuming store %d aliases with mem ",
2044	store_insn->uid ());
2045	print_simple_rtl (dump_file, mem);
2046	fprintf (stream: dump_file, format: "\n");
2047	}
2048
2049	return true;
2050	}
2051
2052	budget--;
2053	return memory_modified_in_insn_p (mem, store_insn->rtl ());
2054	}
2055
2056	// Return true if LOAD may be modified by STORE. Make sure we keep
2057	// within our BUDGET for alias analysis.
2058	static bool
2059	load_modified_by_store_p (insn_info *load,
2060	insn_info *store,
2061	int &budget)
2062	{
2063	gcc_checking_assert (budget >= `0`);
2064
2065	if (!budget)
2066	{
2067	if (dump_file)
2068	{
2069	fprintf (stream: dump_file,
2070	format: "exceeded budget, assuming load %d aliases with store %d\n",
2071	load->uid (), store->uid ());
2072	}
2073	return true;
2074	}
2075
2076	// It isn't safe to re-order stores over calls.
2077	if (CALL_P (load->rtl ()))
2078	return true;
2079
2080	budget--;
2081
2082	// Iterate over all MEMs in the load, seeing if any alias with
2083	// our store.
2084	subrtx_var_iterator::array_type array;
2085	rtx pat = PATTERN (insn: load->rtl ());
2086	FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
2087	if (MEM_P (iter) && memory_modified_in_insn_p (iter, store->rtl ()))
2088	return true;
2089
2090	return false;
2091	}
2092
2093	// Implement some common functionality used by both store_walker
2094	// and load_walker.
2095	template<bool reverse>
2096	class def_walker : public alias_walker
2097	{
2098	protected:
2099	using def_iter_t = typename std::conditional<reverse,
2100	reverse_def_iterator, def_iterator>::type;
2101
2102	static use_info *start_use_chain (def_iter_t &def_iter)
2103	{
2104	set_info set = nullptr*;
2105	for (; *def_iter; def_iter++)
2106	{
2107	set = dyn_cast<set_info > (def_iter);
2108	if (!set)
2109	continue;
2110
2111	use_info *use = reverse
2112	? set->last_nondebug_insn_use ()
2113	: set->first_nondebug_insn_use ();
2114
2115	if (use)
2116	return use;
2117	}
2118
2119	return nullptr;
2120	}
2121
2122	def_iter_t def_iter;
2123	insn_info *limit;
2124
2125	// Array of register uses from the candidate insn which occur in MEMs.
2126	use_array cand_addr_uses;
2127
2128	def_walker (def_info def, insn_info limit, use_array addr_uses) :
2129	def_iter (def), limit (limit), cand_addr_uses (addr_uses) {}
2130
2131	virtual bool iter_valid () const { return *def_iter; }
2132
2133	// Implemented in {load,store}_walker.
2134	virtual bool alias_conflict_p (int &budget) const = `0`;
2135
2136	// Return true if the current (walking) INSN () uses a register R inside a
2137	// MEM, where R is also used inside a MEM by the (static) candidate insn, and
2138	// those uses see different definitions of that register. In this case we
2139	// can't rely on RTL alias analysis, and for now we conservatively assume that
2140	// there is an alias conflict. See PR116783.
2141	bool addr_reg_conflict_p () const
2142	{
2143	use_array curr_insn_uses = insn ()->uses ();
2144	auto cand_use_iter = cand_addr_uses.begin ();
2145	auto insn_use_iter = curr_insn_uses.begin ();
2146	while (cand_use_iter != cand_addr_uses.end ()
2147	&& insn_use_iter != curr_insn_uses.end ())
2148	{
2149	auto insn_use = *insn_use_iter;
2150	auto cand_use = *cand_use_iter;
2151	if (insn_use->regno () > cand_use->regno ())
2152	cand_use_iter ++;
2153	else if (insn_use->regno () < cand_use->regno ())
2154	insn_use_iter ++;
2155	else
2156	{
2157	// As it stands I believe the alias code (memory_modified_in_insn_p)
2158	// doesn't look at insn notes such as REG_EQU{IV,AL}, so it should
2159	// be safe to skip over uses that only occur in notes.
2160	if (insn_use->includes_address_uses ()
2161	&& !insn_use->only_occurs_in_notes ()
2162	&& insn_use->def () != cand_use->def ())
2163	{
2164	if (dump_file)
2165	{
2166	fprintf (stream: dump_file,
2167	format: "assuming aliasing of cand i%d and i%d:\n"
2168	"-> insns see different defs of common addr reg r%u\n"
2169	"-> ",
2170	cand_use->insn ()->uid (), insn_use->insn ()->uid (),
2171	insn_use->regno ());
2172
2173	// Note that while the following sequence could be made more
2174	// concise by eliding pp_string calls into the pp_printf
2175	// calls, doing so triggers -Wformat-diag.
2176	pretty_printer pp;
2177	pp_string (&pp, "[");
2178	pp_access (&pp, cand_use, flags: `0`);
2179	pp_string (&pp, "] in ");
2180	pp_printf (&pp, "i%d", cand_use->insn ()->uid ());
2181	pp_string (&pp, " vs [");
2182	pp_access (&pp, insn_use, flags: `0`);
2183	pp_string (&pp, "] in ");
2184	pp_printf (&pp, "i%d", insn_use->insn ()->uid ());
2185	fprintf (stream: dump_file, format: "%s\n", pp_formatted_text (&pp));
2186	}
2187	return true;
2188	}
2189
2190	cand_use_iter ++;
2191	insn_use_iter ++;
2192	}
2193	}
2194
2195	return false;
2196	}
2197
2198	public:
2199	insn_info insn () const* override { return (*def_iter)->insn (); }
2200	void advance () override { def_iter++; }
2201	bool valid () const override final
2202	{
2203	if (!iter_valid ())
2204	return false;
2205
2206	if (reverse)
2207	return (insn ()) > limit;
2208	else
2209	return (insn ()) < limit;
2210	}
2211
2212	bool conflict_p (int &budget) const override final
2213	{
2214	if (addr_reg_conflict_p ())
2215	return true;
2216
2217	return alias_conflict_p (budget);
2218	}
2219	};
2220
2221	// alias_walker that iterates over stores.
2222	template<bool reverse, typename InsnPredicate>
2223	class store_walker : public def_walker<reverse>
2224	{
2225	rtx cand_mem;
2226	InsnPredicate tombstone_p;
2227
2228	public:
2229	store_walker (def_info *mem_def, rtx mem,
2230	use_array addr_uses,
2231	insn_info *limit_insn,
2232	InsnPredicate tombstone_fn) :
2233	def_walker<reverse> (mem_def, limit_insn, addr_uses),
2234	cand_mem (mem), tombstone_p (tombstone_fn) {}
2235
2236	bool alias_conflict_p (int &budget) const override final
2237	{
2238	if (tombstone_p (this->insn ()))
2239	return false;
2240
2241	return store_modifies_mem_p (cand_mem, this->insn (), budget);
2242	}
2243	};
2244
2245	// alias_walker that iterates over loads.
2246	template<bool reverse>
2247	class load_walker : public def_walker<reverse>
2248	{
2249	using Base = def_walker<reverse>;
2250	using use_iter_t = typename std::conditional<reverse,
2251	reverse_use_iterator, nondebug_insn_use_iterator>::type;
2252
2253	use_iter_t use_iter;
2254	insn_info *cand_store;
2255
2256	bool iter_valid () const override final { return *use_iter; }
2257
2258	public:
2259	void advance () override final
2260	{
2261	use_iter++;
2262	if (*use_iter)
2263	return;
2264	this->def_iter++;
2265	use_iter = Base::start_use_chain (this->def_iter);
2266	}
2267
2268	insn_info insn () const* override final
2269	{
2270	return (*use_iter)->insn ();
2271	}
2272
2273	bool alias_conflict_p (int &budget) const override final
2274	{
2275	return load_modified_by_store_p (insn (), cand_store, budget);
2276	}
2277
2278	load_walker (def_info def, insn_info store, use_array addr_uses,
2279	insn_info *limit_insn)
2280	: Base (def, limit_insn, addr_uses),
2281	use_iter (Base::start_use_chain (this->def_iter)),
2282	cand_store (store) {}
2283	};
2284
2285	// Process our alias_walkers in a round-robin fashion, proceeding until
2286	// nothing more can be learned from alias analysis.
2287	//
2288	// We try to maintain the invariant that if a walker becomes invalid, we
2289	// set its pointer to null.
2290	void
2291	pair_fusion::do_alias_analysis (insn_info *alias_hazards[`4`],
2292	alias_walker *walkers[`4`],
2293	bool load_p)
2294	{
2295	const int n_walkers = `2` + (`2` * !load_p);
2296	int budget = pair_mem_alias_check_limit ();
2297
2298	auto next_walker = [walkers,n_walkers](int current) -> int {
2299	for (int j = `1`; j <= n_walkers; j++)
2300	{
2301	int idx = (current + j) % n_walkers;
2302	if (walkers[idx])
2303	return idx;
2304	}
2305	return -`1`;
2306	};
2307
2308	int i = -`1`;
2309	for (int j = `0`; j < n_walkers; j++)
2310	{
2311	alias_hazards[j] = nullptr;
2312	if (!walkers[j])
2313	continue;
2314
2315	if (!walkers[j]->valid ())
2316	walkers[j] = nullptr;
2317	else if (i == -`1`)
2318	i = j;
2319	}
2320
2321	while (i >= `0`)
2322	{
2323	int insn_i = i % `2`;
2324	int paired_i = (i & `2`) + !insn_i;
2325	int pair_fst = (i & `2`);
2326	int pair_snd = (i & `2`) + `1`;
2327
2328	if (walkers[i]->conflict_p (budget))
2329	{
2330	alias_hazards[i] = walkers[i]->insn ();
2331
2332	// We got an aliasing conflict for this {load,store} walker,
2333	// so we don't need to walk any further.
2334	walkers[i] = nullptr;
2335
2336	// If we have a pair of alias conflicts that prevent
2337	// forming the pair, stop. There's no need to do further
2338	// analysis.
2339	if (alias_hazards[paired_i]
2340	&& (alias_hazards[pair_fst] <= alias_hazards[pair_snd]))
2341	return;
2342
2343	if (!load_p)
2344	{
2345	int other_pair_fst = (pair_fst ? `0` : `2`);
2346	int other_paired_i = other_pair_fst + !insn_i;
2347
2348	int x_pair_fst = (i == pair_fst) ? i : other_paired_i;
2349	int x_pair_snd = (i == pair_fst) ? other_paired_i : i;
2350
2351	// Similarly, handle the case where we have a {load,store}
2352	// or {store,load} alias hazard pair that prevents forming
2353	// the pair.
2354	if (alias_hazards[other_paired_i]
2355	&& alias_hazards[x_pair_fst] <= alias_hazards[x_pair_snd])
2356	return;
2357	}
2358	}
2359
2360	if (walkers[i])
2361	{
2362	walkers[i]->advance ();
2363
2364	if (!walkers[i]->valid ())
2365	walkers[i] = nullptr;
2366	}
2367
2368	i = next_walker (i);
2369	}
2370	}
2371
2372	// Given INSNS (in program order) which are known to be adjacent, look
2373	// to see if either insn has a suitable RTL (register) base that we can
2374	// use to form a pair. Push these to BASE_CANDS if we find any. CAND_MEMs
2375	// gives the relevant mems from the candidate insns, ACCESS_SIZE gives the
2376	// size of a single candidate access, and REVERSED says whether the accesses
2377	// are inverted in offset order.
2378	//
2379	// Returns an integer where bit (1 << i) is set if INSNS[i] uses writeback
2380	// addressing.
2381	int
2382	pair_fusion::get_viable_bases (insn_info *insns[`2`],
2383	vec<base_cand> &base_cands,
2384	rtx cand_mems[`2`],
2385	unsigned access_size,
2386	bool reversed)
2387	{
2388	// We discovered this pair through a common base. Need to ensure that
2389	// we have a common base register that is live at both locations.
2390	def_info *base_defs[`2`] = {};
2391	int writeback = `0`;
2392	for (int i = `0`; i < `2`; i++)
2393	{
2394	const bool is_lower = (i == reversed);
2395	poly_int64 poly_off;
2396	rtx base = pair_mem_strip_offset (mem: cand_mems[i], offset: &poly_off);
2397	if (GET_RTX_CLASS (GET_CODE (XEXP (cand_mems[i], `0`))) == RTX_AUTOINC)
2398	writeback \|= (`1` << i);
2399
2400	if (!REG_P (base) \|\| !poly_off.is_constant ())
2401	continue;
2402
2403	// Punt on accesses relative to eliminable regs. See the comment in
2404	// pair_fusion_bb_info::track_access for a detailed explanation of this.
2405	if (!reload_completed
2406	&& (REGNO (base) == FRAME_POINTER_REGNUM
2407	\|\| REGNO (base) == ARG_POINTER_REGNUM))
2408	continue;
2409
2410	HOST_WIDE_INT base_off = poly_off.to_constant ();
2411
2412	// It should be unlikely that we ever punt here, since MEM_EXPR offset
2413	// alignment should be a good proxy for register offset alignment.
2414	if (base_off % access_size != `0`)
2415	{
2416	if (dump_file)
2417	fprintf (stream: dump_file,
2418	format: "base not viable, offset misaligned (insn %d)\n",
2419	insns[i]->uid ());
2420	continue;
2421	}
2422
2423	base_off /= access_size;
2424
2425	if (!is_lower)
2426	base_off--;
2427
2428	if (!pair_mem_in_range_p (offset: base_off))
2429	continue;
2430
2431	use_info *use = find_access (accesses: insns[i]->uses (), REGNO (base));
2432	gcc_assert (use);
2433	base_defs[i] = use->def ();
2434	}
2435
2436	if (!base_defs[`0`] && !base_defs[`1`])
2437	{
2438	if (dump_file)
2439	fprintf (stream: dump_file, format: "no viable base register for pair (%d,%d)\n",
2440	insns[`0`]->uid (), insns[`1`]->uid ());
2441	return writeback;
2442	}
2443
2444	for (int i = `0`; i < `2`; i++)
2445	if ((writeback & (`1` << i)) && !base_defs[i])
2446	{
2447	if (dump_file)
2448	fprintf (stream: dump_file, format: "insn %d has writeback but base isn't viable\n",
2449	insns[i]->uid ());
2450	return writeback;
2451	}
2452
2453	if (writeback == `3`
2454	&& base_defs[`0`]->regno () != base_defs[`1`]->regno ())
2455	{
2456	if (dump_file)
2457	fprintf (stream: dump_file,
2458	format: "pair (%d,%d): double writeback with distinct regs (%d,%d): "
2459	"punting\n",
2460	insns[`0`]->uid (), insns[`1`]->uid (),
2461	base_defs[`0`]->regno (), base_defs[`1`]->regno ());
2462	return writeback;
2463	}
2464
2465	if (base_defs[`0`] && base_defs[`1`]
2466	&& base_defs[`0`]->regno () == base_defs[`1`]->regno ())
2467	{
2468	// Easy case: insns already share the same base reg.
2469	base_cands.quick_push (obj: base_defs[`0`]);
2470	return writeback;
2471	}
2472
2473	// Otherwise, we know that one of the bases must change.
2474	//
2475	// Note that if there is writeback we must use the writeback base
2476	// (we know now there is exactly one).
2477	for (int i = `0`; i < `2`; i++)
2478	if (base_defs[i] && (!writeback \|\| (writeback & (`1` << i))))
2479	base_cands.quick_push (obj: base_cand { base_defs[i], i });
2480
2481	return writeback;
2482	}
2483
2484	// Given two adjacent memory accesses of the same size, I1 and I2, try
2485	// and see if we can merge them into a paired access.
2486	//
2487	// ACCESS_SIZE gives the (common) size of a single access, LOAD_P is true
2488	// if the accesses are both loads, otherwise they are both stores.
2489	bool
2490	pair_fusion_bb_info::try_fuse_pair (bool load_p, unsigned access_size,
2491	insn_info i1, insn_info i2)
2492	{
2493	if (dump_file)
2494	fprintf (stream: dump_file, format: "analyzing pair (load=%d): (%d,%d)\n",
2495	load_p, i1->uid (), i2->uid ());
2496
2497	insn_info *insns[`2`];
2498	bool reversed = false;
2499	if (i1 < i2)
2500	{
2501	insns[`0`] = i1;
2502	insns[`1`] = i2;
2503	}
2504	else
2505	{
2506	insns[`0`] = i2;
2507	insns[`1`] = i1;
2508	reversed = true;
2509	}
2510
2511	rtx cand_mems[`2`];
2512	rtx reg_ops[`2`];
2513	rtx pats[`2`];
2514	for (int i = `0`; i < `2`; i++)
2515	{
2516	pats[i] = PATTERN (insn: insns[i]->rtl ());
2517	cand_mems[i] = XEXP (pats[i], load_p);
2518	reg_ops[i] = XEXP (pats[i], !load_p);
2519	}
2520
2521	if (load_p && reg_overlap_mentioned_p (reg_ops[`0`], reg_ops[`1`]))
2522	{
2523	if (dump_file)
2524	fprintf (stream: dump_file,
2525	format: "punting on load pair due to reg conflcits (%d,%d)\n",
2526	insns[`0`]->uid (), insns[`1`]->uid ());
2527	return false;
2528	}
2529
2530	if (cfun->can_throw_non_call_exceptions
2531	&& find_reg_note (insns[`0`]->rtl (), REG_EH_REGION, NULL_RTX)
2532	&& find_reg_note (insns[`1`]->rtl (), REG_EH_REGION, NULL_RTX))
2533	{
2534	if (dump_file)
2535	fprintf (stream: dump_file,
2536	format: "can't combine insns with EH side effects (%d,%d)\n",
2537	insns[`0`]->uid (), insns[`1`]->uid ());
2538	return false;
2539	}
2540
2541	auto_vec<base_cand, `2`> base_cands (`2`);
2542
2543	int writeback = m_pass->get_viable_bases (insns, base_cands, cand_mems,
2544	access_size, reversed);
2545	if (base_cands.is_empty ())
2546	{
2547	if (dump_file)
2548	fprintf (stream: dump_file, format: "no viable base for pair (%d,%d)\n",
2549	insns[`0`]->uid (), insns[`1`]->uid ());
2550	return false;
2551	}
2552
2553	// Punt on frame-related insns with writeback. We probably won't see
2554	// these in practice, but this is conservative and ensures we don't
2555	// have to worry about these later on.
2556	if (writeback && (RTX_FRAME_RELATED_P (i1->rtl ())
2557	\|\| RTX_FRAME_RELATED_P (i2->rtl ())))
2558	{
2559	if (dump_file)
2560	fprintf (stream: dump_file,
2561	format: "rejecting pair (%d,%d): frame-related insn with writeback\n",
2562	i1->uid (), i2->uid ());
2563	return false;
2564	}
2565
2566	rtx *ignore = &XEXP (pats[`1`], load_p);
2567	for (auto use : insns[`1`]->uses ())
2568	if (!use->is_mem ()
2569	&& refers_to_regno_p (use->regno (), use->regno () + `1`, pats[`1`], ignore)
2570	&& use->def () && use->def ()->insn () == insns[`0`])
2571	{
2572	// N.B. we allow a true dependence on the base address, as this
2573	// happens in the case of auto-inc accesses. Consider a post-increment
2574	// load followed by a regular indexed load, for example.
2575	if (dump_file)
2576	fprintf (stream: dump_file,
2577	format: "%d has non-address true dependence on %d, rejecting pair\n",
2578	insns[`1`]->uid (), insns[`0`]->uid ());
2579	return false;
2580	}
2581
2582	unsigned i = `0`;
2583	while (i < base_cands.length ())
2584	{
2585	base_cand &cand = base_cands [i];
2586
2587	rtx *ignore[`2`] = {};
2588	for (int j = `0`; j < `2`; j++)
2589	if (cand.from_insn == !j)
2590	ignore[j] = &XEXP (cand_mems[j], `0`);
2591
2592	insn_info *h = first_hazard_after (insn: insns[`0`], ignore: ignore[`0`]);
2593	if (h && h < insns[`1`])
2594	cand.hazards[`0`] = h;
2595
2596	h = latest_hazard_before (insn: insns[`1`], ignore: ignore[`1`]);
2597	if (h && h > insns[`0`])
2598	cand.hazards[`1`] = h;
2599
2600	if (!cand.viable ())
2601	{
2602	if (dump_file)
2603	fprintf (stream: dump_file,
2604	format: "pair (%d,%d): rejecting base %d due to dataflow "
2605	"hazards (%d,%d)\n",
2606	insns[`0`]->uid (),
2607	insns[`1`]->uid (),
2608	cand.def->regno (),
2609	cand.hazards[`0`]->uid (),
2610	cand.hazards[`1`]->uid ());
2611
2612	base_cands.ordered_remove (ix: i);
2613	}
2614	else
2615	i++;
2616	}
2617
2618	if (base_cands.is_empty ())
2619	{
2620	if (dump_file)
2621	fprintf (stream: dump_file,
2622	format: "can't form pair (%d,%d) due to dataflow hazards\n",
2623	insns[`0`]->uid (), insns[`1`]->uid ());
2624	return false;
2625	}
2626
2627	insn_info *alias_hazards[`4`] = {};
2628
2629	// First def of memory after the first insn, and last def of memory
2630	// before the second insn, respectively.
2631	def_info *mem_defs[`2`] = {};
2632	if (load_p)
2633	{
2634	if (!MEM_READONLY_P (cand_mems[`0`]))
2635	{
2636	mem_defs[`0`] = memory_access (accesses: insns[`0`]->uses ())->def ();
2637	gcc_checking_assert (mem_defs[`0`]);
2638	mem_defs[`0`] = mem_defs[`0`]->next_def ();
2639	}
2640	if (!MEM_READONLY_P (cand_mems[`1`]))
2641	{
2642	mem_defs[`1`] = memory_access (accesses: insns[`1`]->uses ())->def ();
2643	gcc_checking_assert (mem_defs[`1`]);
2644	}
2645	}
2646	else
2647	{
2648	mem_defs[`0`] = memory_access (accesses: insns[`0`]->defs ())->next_def ();
2649	mem_defs[`1`] = memory_access (accesses: insns[`1`]->defs ())->prev_def ();
2650	gcc_checking_assert (mem_defs[`0`]);
2651	gcc_checking_assert (mem_defs[`1`]);
2652	}
2653
2654	auto tombstone_p = [&](insn_info insn) -> bool* {
2655	return m_emitted_tombstone
2656	&& bitmap_bit_p (&m_tombstone_bitmap, insn->uid ());
2657	};
2658
2659	auto_vec<access_info *, `2`> addr_use_vec[`2`];
2660	use_array addr_uses[`2`];
2661
2662	// Collect the lists of register uses that occur in the candidate MEMs.
2663	for (int i = `0`; i < `2`; i++)
2664	{
2665	// N.B. it's safe for us to ignore uses that only occur in notes
2666	// here (e.g. in a REG_EQUIV expression) since we only pass the
2667	// MEM down to the alias machinery, so it can't see any insn-level
2668	// notes.
2669	for (auto use : insns[i]->uses ())
2670	if (use->is_reg ()
2671	&& use->includes_address_uses ()
2672	&& !use->only_occurs_in_notes ())
2673	addr_use_vec[i].safe_push (obj: use);
2674
2675	addr_uses[i] = use_array (addr_use_vec[i]);
2676	}
2677
2678	store_walker<false, decltype(tombstone_p)>
2679	forward_store_walker (mem_defs[`0`], cand_mems[`0`], addr_uses[`0`], insns[`1`],
2680	tombstone_p);
2681
2682	store_walker<true, decltype(tombstone_p)>
2683	backward_store_walker (mem_defs[`1`], cand_mems[`1`], addr_uses[`1`], insns[`0`],
2684	tombstone_p);
2685
2686	alias_walker *walkers[`4`] = {};
2687	if (mem_defs[`0`])
2688	walkers[`0`] = &forward_store_walker;
2689	if (mem_defs[`1`])
2690	walkers[`1`] = &backward_store_walker;
2691
2692	if (load_p && (mem_defs[`0`] \|\| mem_defs[`1`]))
2693	m_pass->do_alias_analysis (alias_hazards, walkers, load_p);
2694	else
2695	{
2696	// We want to find any loads hanging off the first store.
2697	mem_defs[`0`] = memory_access (accesses: insns[`0`]->defs ());
2698	load_walker<false> forward_load_walker (mem_defs[`0`], insns[`0`],
2699	addr_uses[`0`], insns[`1`]);
2700	load_walker<true> backward_load_walker (mem_defs[`1`], insns[`1`],
2701	addr_uses[`1`], insns[`0`]);
2702	walkers[`2`] = &forward_load_walker;
2703	walkers[`3`] = &backward_load_walker;
2704	m_pass->do_alias_analysis (alias_hazards, walkers, load_p);
2705	// Now consolidate hazards back down.
2706	if (alias_hazards[`2`]
2707	&& (!alias_hazards[`0`] \|\| (alias_hazards[`2`] < alias_hazards[`0`])))
2708	alias_hazards[`0`] = alias_hazards[`2`];
2709
2710	if (alias_hazards[`3`]
2711	&& (!alias_hazards[`1`] \|\| (alias_hazards[`3`] > alias_hazards[`1`])))
2712	alias_hazards[`1`] = alias_hazards[`3`];
2713	}
2714
2715	if (alias_hazards[`0`] && alias_hazards[`1`]
2716	&& alias_hazards[`0`] <= alias_hazards[`1`])
2717	{
2718	if (dump_file)
2719	fprintf (stream: dump_file,
2720	format: "cannot form pair (%d,%d) due to alias conflicts (%d,%d)\n",
2721	i1->uid (), i2->uid (),
2722	alias_hazards[`0`]->uid (), alias_hazards[`1`]->uid ());
2723	return false;
2724	}
2725
2726	// Now narrow the hazards on each base candidate using
2727	// the alias hazards.
2728	i = `0`;
2729	while (i < base_cands.length ())
2730	{
2731	base_cand &cand = base_cands [i];
2732	if (alias_hazards[`0`] && (!cand.hazards[`0`]
2733	\|\| alias_hazards[`0`] < cand.hazards[`0`]))
2734	cand.hazards[`0`] = alias_hazards[`0`];
2735	if (alias_hazards[`1`] && (!cand.hazards[`1`]
2736	\|\| alias_hazards[`1`] > cand.hazards[`1`]))
2737	cand.hazards[`1`] = alias_hazards[`1`];
2738
2739	if (cand.viable ())
2740	i++;
2741	else
2742	{
2743	if (dump_file)
2744	fprintf (stream: dump_file, format: "pair (%d,%d): rejecting base %d due to "
2745	"alias/dataflow hazards (%d,%d)",
2746	insns[`0`]->uid (), insns[`1`]->uid (),
2747	cand.def->regno (),
2748	cand.hazards[`0`]->uid (),
2749	cand.hazards[`1`]->uid ());
2750
2751	base_cands.ordered_remove (ix: i);
2752	}
2753	}
2754
2755	if (base_cands.is_empty ())
2756	{
2757	if (dump_file)
2758	fprintf (stream: dump_file,
2759	format: "cannot form pair (%d,%d) due to alias/dataflow hazards",
2760	insns[`0`]->uid (), insns[`1`]->uid ());
2761
2762	return false;
2763	}
2764
2765	base_cand *base = &base_cands [`0`];
2766	if (base_cands.length () > `1`)
2767	{
2768	// If there are still multiple viable bases, it makes sense
2769	// to choose one that allows us to reduce register pressure,
2770	// for loads this means moving further down, for stores this
2771	// means moving further up.
2772	gcc_checking_assert (base_cands.length () == `2`);
2773	const int hazard_i = !load_p;
2774	if (base->hazards[hazard_i])
2775	{
2776	if (!base_cands [`1`].hazards[hazard_i])
2777	base = &base_cands [`1`];
2778	else if (load_p
2779	&& *base_cands [`1`].hazards[hazard_i]
2780	> *(base->hazards[hazard_i]))
2781	base = &base_cands [`1`];
2782	else if (!load_p
2783	&& *base_cands [`1`].hazards[hazard_i]
2784	< *(base->hazards[hazard_i]))
2785	base = &base_cands [`1`];
2786	}
2787	}
2788
2789	// Otherwise, hazards[0] > hazards[1].
2790	// Pair can be formed anywhere in (hazards[1], hazards[0]).
2791	insn_range_info range (insns[`0`], insns[`1`]);
2792	if (base->hazards[`1`])
2793	range.first = base->hazards[`1`];
2794	if (base->hazards[`0`])
2795	range.last = base->hazards[`0`]->prev_nondebug_insn ();
2796
2797	// If the second insn can throw, narrow the move range to exactly that insn.
2798	// This prevents us trying to move the second insn from the end of the BB.
2799	if (cfun->can_throw_non_call_exceptions
2800	&& find_reg_note (insns[`1`]->rtl (), REG_EH_REGION, NULL_RTX))
2801	{
2802	gcc_assert (range.includes (insns[`1`]));
2803	range = insn_range_info (insns[`1`]);
2804	}
2805
2806	// Placement strategy: push loads down and pull stores up, this should
2807	// help register pressure by reducing live ranges.
2808	if (load_p)
2809	range.first = range.last;
2810	else
2811	range.last = range.first;
2812
2813	if (dump_file)
2814	{
2815	auto print_hazard = [](insn_info *i)
2816	{
2817	if (i)
2818	fprintf (stream: dump_file, format: "%d", i->uid ());
2819	else
2820	fprintf (stream: dump_file, format: "-");
2821	};
2822	auto print_pair = [print_hazard](insn_info **i)
2823	{
2824	print_hazard (i[`0`]);
2825	fprintf (stream: dump_file, format: ",");
2826	print_hazard (i[`1`]);
2827	};
2828
2829	fprintf (stream: dump_file, format: "fusing pair [L=%d] (%d,%d), base=%d, hazards: (",
2830	load_p, insns[`0`]->uid (), insns[`1`]->uid (),
2831	base->def->regno ());
2832	print_pair (base->hazards);
2833	fprintf (stream: dump_file, format: "), move_range: (%d,%d)\n",
2834	range.first->uid (), range.last->uid ());
2835	}
2836
2837	return fuse_pair (load_p, access_size, writeback,
2838	i1, i2, base&: *base, move_range: range);
2839	}
2840
2841	static void
2842	dump_insn_list (FILE f, const* insn_list_t &l)
2843	{
2844	fprintf (stream: f, format: "(");
2845
2846	auto i = l.begin ();
2847	auto end = l.end ();
2848
2849	if (i != end)
2850	fprintf (stream: f, format: "%d", (*i)->uid ());
2851	i ++;
2852
2853	for (; i != end; i ++)
2854	fprintf (stream: f, format: ", %d", (*i)->uid ());
2855
2856	fprintf (stream: f, format: ")");
2857	}
2858
2859	DEBUG_FUNCTION void
2860	debug (const insn_list_t &l)
2861	{
2862	dump_insn_list (stderr, l);
2863	fprintf (stderr, format: "\n");
2864	}
2865
2866	// LEFT_LIST and RIGHT_LIST are lists of candidate instructions where all insns
2867	// in LEFT_LIST are known to be adjacent to those in RIGHT_LIST.
2868	//
2869	// This function traverses the resulting 2D matrix of possible pair candidates
2870	// and attempts to merge them into pairs.
2871	//
2872	// The algorithm is straightforward: if we consider a combined list of
2873	// candidates X obtained by merging LEFT_LIST and RIGHT_LIST in program order,
2874	// then we advance through X until we reach a crossing point (where X[i] and
2875	// X[i+1] come from different source lists).
2876	//
2877	// At this point we know X[i] and X[i+1] are adjacent accesses, and we try to
2878	// fuse them into a pair. If this succeeds, we remove X[i] and X[i+1] from
2879	// their original lists and continue as above.
2880	//
2881	// In the failure case, we advance through the source list containing X[i] and
2882	// continue as above (proceeding to the next crossing point).
2883	//
2884	// The rationale for skipping over groups of consecutive candidates from the
2885	// same source list is as follows:
2886	//
2887	// In the store case, the insns in the group can't be re-ordered over each
2888	// other as they are guaranteed to store to the same location, so we're
2889	// guaranteed not to lose opportunities by doing this.
2890	//
2891	// In the load case, subsequent loads from the same location are either
2892	// redundant (in which case they should have been cleaned up by an earlier
2893	// optimization pass) or there is an intervening aliasing hazard, in which case
2894	// we can't re-order them anyway, so provided earlier passes have cleaned up
2895	// redundant loads, we shouldn't miss opportunities by doing this.
2896	void
2897	pair_fusion_bb_info::merge_pairs (insn_list_t &left_list,
2898	insn_list_t &right_list,
2899	bool load_p,
2900	unsigned access_size)
2901	{
2902	if (dump_file)
2903	{
2904	fprintf (stream: dump_file, format: "merge_pairs [L=%d], cand vecs ", load_p);
2905	dump_insn_list (f: dump_file, l: left_list);
2906	fprintf (stream: dump_file, format: " x ");
2907	dump_insn_list (f: dump_file, l: right_list);
2908	fprintf (stream: dump_file, format: "\n");
2909	}
2910
2911	auto iter_l = left_list.begin ();
2912	auto iter_r = right_list.begin ();
2913
2914	while (iter_l != left_list.end () && iter_r != right_list.end ())
2915	{
2916	auto next_l = std::next (x: iter_l);
2917	auto next_r = std::next (x: iter_r);
2918	if (iter_l < iter_r
2919	&& next_l != left_list.end ()
2920	&& next_l < iter_r)
2921	iter_l = next_l;
2922	else if (iter_r < iter_l
2923	&& next_r != right_list.end ()
2924	&& next_r < iter_l)
2925	iter_r = next_r;
2926	else if (try_fuse_pair (load_p, access_size, i1: iter_l, i2: iter_r))
2927	{
2928	left_list.erase (position: iter_l);
2929	iter_l = next_l;
2930	right_list.erase (position: iter_r);
2931	iter_r = next_r;
2932	}
2933	else if (iter_l < iter_r)
2934	iter_l = next_l;
2935	else
2936	iter_r = next_r;
2937	}
2938	}
2939
2940	// Iterate over the accesses in GROUP, looking for adjacent sets
2941	// of accesses. If we find two sets of adjacent accesses, call
2942	// merge_pairs.
2943	void
2944	pair_fusion_bb_info::transform_for_base (int encoded_lfs,
2945	access_group &group)
2946	{
2947	const auto lfs = decode_lfs (lfs: encoded_lfs);
2948	const unsigned access_size = lfs.size;
2949
2950	bool skip_next = true;
2951	access_record prev_access = nullptr*;
2952
2953	for (auto &access : group.list)
2954	{
2955	if (skip_next)
2956	skip_next = false;
2957	else if (known_eq (access.offset, prev_access->offset + access_size))
2958	{
2959	merge_pairs (left_list&: prev_access->cand_insns,
2960	right_list&: access.cand_insns,
2961	load_p: lfs.load_p,
2962	access_size);
2963	skip_next = access.cand_insns.empty ();
2964	}
2965	prev_access = &access;
2966	}
2967	}
2968
2969	// If we emitted tombstone insns for this BB, iterate through the BB
2970	// and remove all the tombstone insns, being sure to reparent any uses
2971	// of mem to previous defs when we do this.
2972	void
2973	pair_fusion_bb_info::cleanup_tombstones ()
2974	{
2975	// No need to do anything if we didn't emit a tombstone insn for this BB.
2976	if (!m_emitted_tombstone)
2977	return;
2978
2979	for (auto insn : iterate_safely (range: m_bb->nondebug_insns ()))
2980	{
2981	if (!insn->is_real ()
2982	\|\| !bitmap_bit_p (&m_tombstone_bitmap, insn->uid ()))
2983	continue;
2984
2985	auto set = as_a<set_info *> (p: memory_access (accesses: insn->defs ()));
2986	if (set->has_any_uses ())
2987	{
2988	auto prev_set = as_a<set_info *> (p: set->prev_def ());
2989	while (set->first_use ())
2990	crtl->ssa->reparent_use (use: set->first_use (), new_def: prev_set);
2991	}
2992
2993	// Now set has no uses, we can delete it.
2994	insn_change change (insn, insn_change::DELETE);
2995	crtl->ssa->change_insn (change);
2996	}
2997	}
2998
2999	template<typename Map>
3000	void
3001	pair_fusion_bb_info::traverse_base_map (Map &map)
3002	{
3003	for (auto kv : map)
3004	{
3005	const auto &key = kv.first;
3006	auto &value = kv.second;
3007	transform_for_base (encoded_lfs: key.second, group&: value);
3008	}
3009	}
3010
3011	void
3012	pair_fusion_bb_info::transform ()
3013	{
3014	traverse_base_map (map&: expr_map);
3015	traverse_base_map (map&: def_map);
3016	}
3017
3018	// Given an existing pair insn INSN, look for a trailing update of
3019	// the base register which we can fold in to make this pair use
3020	// a writeback addressing mode.
3021	void
3022	pair_fusion::try_promote_writeback (insn_info insn, bool* load_p)
3023	{
3024	rtx regs[`2`];
3025
3026	rtx mem = destructure_pair (regs, pattern: PATTERN (insn: insn->rtl ()), load_p);
3027	gcc_checking_assert (MEM_P (mem));
3028
3029	poly_int64 offset;
3030	rtx base = strip_offset (XEXP (mem, `0`), &offset);
3031	gcc_assert (REG_P (base));
3032
3033	const auto access_size = GET_MODE_SIZE (GET_MODE (mem)).to_constant () / `2`;
3034
3035	if (find_access (accesses: insn->defs (), REGNO (base)))
3036	{
3037	gcc_assert (load_p);
3038	if (dump_file)
3039	fprintf (stream: dump_file,
3040	format: "ldp %d clobbers base r%d, can't promote to writeback\n",
3041	insn->uid (), REGNO (base));
3042	return;
3043	}
3044
3045	auto base_use = find_access (accesses: insn->uses (), REGNO (base));
3046	gcc_assert (base_use);
3047
3048	if (!base_use->def ())
3049	{
3050	if (dump_file)
3051	fprintf (stream: dump_file,
3052	format: "found pair (i%d, L=%d): but base r%d is upwards exposed\n",
3053	insn->uid (), load_p, REGNO (base));
3054	return;
3055	}
3056
3057	auto base_def = base_use->def ();
3058
3059	rtx wb_effect = NULL_RTX;
3060	def_info *add_def;
3061	const insn_range_info pair_range (insn);
3062	insn_info insns[`2`] = { nullptr*, insn };
3063	insn_info *trailing_add
3064	= find_trailing_add (insns, pair_range, initial_writeback: `0`, writeback_effect: &wb_effect,
3065	add_def: &add_def, base_def, initial_offset: offset,
3066	access_size);
3067	if (!trailing_add)
3068	return;
3069
3070	auto attempt = crtl->ssa->new_change_attempt ();
3071
3072	insn_change pair_change (insn);
3073	insn_change del_change (trailing_add, insn_change::DELETE);
3074	insn_change *changes[] = { &pair_change, &del_change };
3075
3076	rtx pair_pat = gen_promote_writeback_pair (wb_effect, mem, regs, load_p);
3077	validate_unshare_change (insn->rtl (), &PATTERN (insn: insn->rtl ()), pair_pat,
3078	true);
3079
3080	// The pair must gain the def of the base register from the add.
3081	pair_change.new_defs = insert_access (watermark&: attempt,
3082	access1: add_def,
3083	accesses2: pair_change.new_defs);
3084	gcc_assert (pair_change.new_defs.is_valid ());
3085
3086	auto ignore = ignore_changing_insns (changes);
3087	for (unsigned i = `0`; i < ARRAY_SIZE (changes); i++)
3088	gcc_assert (changes[i]->move_range.singleton ()
3089	&& rtl_ssa::restrict_movement (*changes[i], ignore));
3090
3091	if (!rtl_ssa::recog (watermark&: attempt, change&: pair_change, ignore))
3092	{
3093	if (dump_file)
3094	fprintf (stream: dump_file, format: "i%d: recog failed on wb pair, bailing out\n",
3095	insn->uid ());
3096	cancel_changes (`0`);
3097	return;
3098	}
3099
3100	gcc_assert (crtl->ssa->verify_insn_changes (changes));
3101
3102	if (MAY_HAVE_DEBUG_INSNS)
3103	fixup_debug_uses_trailing_add (attempt, pair_dst: insn, trailing_add, writeback_effect: wb_effect);
3104
3105	confirm_change_group ();
3106	crtl->ssa->change_insns (changes);
3107	}
3108
3109	// Main function for the pass. Iterate over the insns in BB looking
3110	// for load/store candidates. If running after RA, also try and promote
3111	// non-writeback pairs to use writeback addressing. Then try to fuse
3112	// candidates into pairs.
3113	void pair_fusion::process_block (bb_info *bb)
3114	{
3115	const bool track_loads = track_loads_p ();
3116	const bool track_stores = track_stores_p ();
3117
3118	pair_fusion_bb_info bb_state (bb, this);
3119
3120	for (auto insn : bb->nondebug_insns ())
3121	{
3122	rtx_insn *rti = insn->rtl ();
3123
3124	if (!rti \|\| !INSN_P (rti))
3125	continue;
3126
3127	rtx pat = PATTERN (insn: rti);
3128	bool load_p;
3129	if (reload_completed
3130	&& should_handle_writeback (which: writeback_type::ALL)
3131	&& pair_mem_insn_p (insn: rti, load_p))
3132	try_promote_writeback (insn, load_p);
3133
3134	if (GET_CODE (pat) != SET)
3135	continue;
3136
3137	if (track_stores && MEM_P (XEXP (pat, `0`)))
3138	bb_state.track_access (insn, load_p: false, XEXP (pat, `0`));
3139	else if (track_loads && MEM_P (XEXP (pat, `1`)))
3140	bb_state.track_access (insn, load_p: true, XEXP (pat, `1`));
3141	}
3142
3143	bb_state.transform ();
3144	bb_state.cleanup_tombstones ();
3145	}
3146

source code of gcc/pair-fusion.cc