vec-perm-indices.cc source code [gcc/vec-perm-indices.cc]

1	/ A representation of vector permutation indices.*
2	Copyright (C) 2017-2023 Free Software Foundation, Inc.
3
4	This file is part of GCC.
5
6	GCC is free software; you can redistribute it and/or modify it under
7	the terms of the GNU General Public License as published by the Free
8	Software Foundation; either version 3, or (at your option) any later
9	version.
10
11	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12	WARRANTY; without even the implied warranty of MERCHANTABILITY or
13	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14	for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with GCC; see the file COPYING3. If not see
18	<http://www.gnu.org/licenses/>. /*
19
20	#include "config.h"
21	#include "system.h"
22	#include "coretypes.h"
23	#include "vec-perm-indices.h"
24	#include "tree.h"
25	#include "fold-const.h"
26	#include "tree-vector-builder.h"
27	#include "backend.h"
28	#include "rtl.h"
29	#include "memmodel.h"
30	#include "emit-rtl.h"
31	#include "selftest.h"
32	#include "rtx-vector-builder.h"
33
34	/ Switch to a new permutation vector that selects between NINPUTS vector*
35	inputs that have NELTS_PER_INPUT elements each. Take the elements of the
36	new permutation vector from ELEMENTS, clamping each one to be in range. /*
37
38	void
39	vec_perm_indices::new_vector (const vec_perm_builder &elements,
40	unsigned int ninputs,
41	poly_uint64 nelts_per_input)
42	{
43	m_ninputs = ninputs;
44	m_nelts_per_input = nelts_per_input;
45	/ If the vector has a constant number of elements, expand the*
46	encoding and clamp each element. E.g. { 0, 2, 4, ... } might
47	wrap halfway if there is only one vector input, and we want
48	the wrapped form to be the canonical one.
49
50	If the vector has a variable number of elements, just copy
51	the encoding. In that case the unwrapped form is canonical
52	and there is no way of representing the wrapped form. /*
53	poly_uint64 full_nelts = elements.full_nelts ();
54	unsigned HOST_WIDE_INT copy_nelts;
55	if (full_nelts.is_constant (const_value: &copy_nelts))
56	m_encoding.new_vector (full_nelts, npatterns: copy_nelts, nelts_per_pattern: `1`);
57	else
58	{
59	copy_nelts = elements.encoded_nelts ();
60	m_encoding.new_vector (full_nelts, npatterns: elements.npatterns (),
61	nelts_per_pattern: elements.nelts_per_pattern ());
62	}
63	unsigned int npatterns = m_encoding.npatterns ();
64	for (unsigned int i = `0`; i < npatterns; ++i)
65	m_encoding.quick_push (obj: clamp (elt: elements.elt (i)));
66	/ Use the fact that:*
67
68	(a + b) % c == ((a % c) + (b % c)) % c
69
70	to simplify the clamping of variable-length vectors. /*
71	for (unsigned int i = npatterns; i < copy_nelts; ++i)
72	{
73	element_type step = clamp (elt: elements.elt (i)
74	- elements.elt (i: i - npatterns));
75	m_encoding.quick_push (obj: clamp (elt: m_encoding [i - npatterns] + step));
76	}
77	m_encoding.finalize ();
78	}
79
80	/ Switch to a new permutation vector that selects the same input elements*
81	as ORIG, but with each element split into FACTOR pieces. For example,
82	if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
83	{ 2, 3, 4, 5, 0, 1, 6, 7 }. /*
84
85	void
86	vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
87	unsigned int factor)
88	{
89	m_ninputs = orig.m_ninputs;
90	m_nelts_per_input = orig.m_nelts_per_input * factor;
91	m_encoding.new_vector (full_nelts: orig.m_encoding.full_nelts () * factor,
92	npatterns: orig.m_encoding.npatterns () * factor,
93	nelts_per_pattern: orig.m_encoding.nelts_per_pattern ());
94	unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
95	for (unsigned int i = `0`; i < encoded_nelts; ++i)
96	{
97	element_type base = orig.m_encoding [i] * factor;
98	for (unsigned int j = `0`; j < factor; ++j)
99	m_encoding.quick_push (obj: base + j);
100	}
101	m_encoding.finalize ();
102	}
103
104	/ Check whether we can switch to a new permutation vector that*
105	selects the same input elements as ORIG, but with each element
106	built up from FACTOR pieces. Return true if yes, otherwise
107	return false. Every FACTOR permutation indexes should be
108	continuous separately and the first one of each batch should
109	be able to exactly modulo FACTOR. For example, if ORIG is
110	{ 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
111	is { 1, 2, 0, 3 }. /*
112
113	bool
114	vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
115	unsigned int factor)
116	{
117	gcc_assert (factor > `0`);
118
119	if (maybe_lt (a: orig.m_nelts_per_input, b: factor))
120	return false;
121
122	poly_uint64 nelts;
123	/ Invalid if vector units number isn't multiple of factor. /
124	if (!multiple_p (a: orig.m_nelts_per_input, b: factor, multiple: &nelts))
125	return false;
126
127	/ Only handle the case that npatterns is multiple of factor.*
128	FIXME: Try to see whether we can reshape it by factor npatterns. /*
129	if (orig.m_encoding.npatterns () % factor != `0`)
130	return false;
131
132	unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
133	auto_vec<element_type, `32`> encoding (encoded_nelts);
134	/ Separate all encoded elements into batches by size factor,*
135	then ensure the first element of each batch is multiple of
136	factor and all elements in each batch is consecutive from
137	the first one. /*
138	for (unsigned int i = `0`; i < encoded_nelts; i += factor)
139	{
140	element_type first = orig.m_encoding [i];
141	element_type new_index;
142	if (!multiple_p (a: first, b: factor, multiple: &new_index))
143	return false;
144	for (unsigned int j = `1`; j < factor; ++j)
145	if (maybe_ne (a: first + j, b: orig.m_encoding [i + j]))
146	return false;
147	encoding.quick_push (obj: new_index);
148	}
149
150	m_ninputs = orig.m_ninputs;
151	m_nelts_per_input = nelts;
152	poly_uint64 full_nelts = exact_div (a: orig.m_encoding.full_nelts (), b: factor);
153	unsigned int npatterns = orig.m_encoding.npatterns () / factor;
154
155	m_encoding.new_vector (full_nelts, npatterns,
156	nelts_per_pattern: orig.m_encoding.nelts_per_pattern ());
157	m_encoding.splice (src: encoding);
158	m_encoding.finalize ();
159
160	return true;
161	}
162
163	/ Rotate the inputs of the permutation right by DELTA inputs. This changes*
164	the values of the permutation vector but it doesn't change the way that
165	the elements are encoded. /*
166
167	void
168	vec_perm_indices::rotate_inputs (int delta)
169	{
170	element_type element_delta = delta * m_nelts_per_input;
171	for (unsigned int i = `0`; i < m_encoding.length (); ++i)
172	m_encoding [i] = clamp (elt: m_encoding [i] + element_delta);
173	}
174
175	/ Return true if index OUT_BASE + I * OUT_STEP selects input*
176	element IN_BASE + I IN_STEP. For example, the call to test*
177	whether a permute reverses a vector of N elements would be:
178
179	series_p (0, 1, N - 1, -1)
180
181	which would return true for { N - 1, N - 2, N - 3, ... }.
182	The calls to test for an interleaving of elements starting
183	at N1 and N2 would be:
184
185	series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
186
187	which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. /*
188
189	bool
190	vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
191	element_type in_base, element_type in_step) const
192	{
193	/ Check the base value. /
194	if (maybe_ne (a: clamp (elt: m_encoding.elt (i: out_base)), b: clamp (elt: in_base)))
195	return false;
196
197	element_type full_nelts = m_encoding.full_nelts ();
198	unsigned int npatterns = m_encoding.npatterns ();
199
200	/ Calculate which multiple of OUT_STEP elements we need to get*
201	back to the same pattern. /*
202	unsigned int cycle_length = least_common_multiple (out_step, npatterns);
203
204	/ Check the steps. /
205	in_step = clamp (elt: in_step);
206	out_base += out_step;
207	unsigned int limit = `0`;
208	for (;;)
209	{
210	/ Succeed if we've checked all the elements in the vector. /
211	if (known_ge (out_base, full_nelts))
212	return true;
213
214	if (out_base >= npatterns)
215	{
216	/ We've got to the end of the "foreground" values. Check*
217	2 elements from each pattern in the "background" values. /*
218	if (limit == `0`)
219	limit = out_base + cycle_length * `2`;
220	else if (out_base >= limit)
221	return true;
222	}
223
224	element_type v0 = m_encoding.elt (i: out_base - out_step);
225	element_type v1 = m_encoding.elt (i: out_base);
226	if (maybe_ne (a: clamp (elt: v1 - v0), b: in_step))
227	return false;
228
229	out_base += out_step;
230	}
231	}
232
233	/ Return true if all elements of the permutation vector are in the range*
234	[START, START + SIZE). /*
235
236	bool
237	vec_perm_indices::all_in_range_p (element_type start, element_type size) const
238	{
239	/ Check the first two elements of each pattern. /
240	unsigned int npatterns = m_encoding.npatterns ();
241	unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
242	unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, `2`);
243	for (unsigned int i = `0`; i < base_nelts; ++i)
244	if (!known_in_range_p (val: m_encoding [i], pos: start, size))
245	return false;
246
247	/ For stepped encodings, check the full range of the series. /
248	if (nelts_per_pattern == `3`)
249	{
250	element_type limit = input_nelts ();
251
252	/ The number of elements in each pattern beyond the first two*
253	that we checked above. /*
254	poly_int64 step_nelts = exact_div (a: m_encoding.full_nelts (),
255	b: npatterns) - `2`;
256	for (unsigned int i = `0`; i < npatterns; ++i)
257	{
258	/ BASE1 has been checked but BASE2 hasn't. /
259	element_type base1 = m_encoding [i + npatterns];
260	element_type base2 = m_encoding [i + base_nelts];
261
262	/ The step to add to get from BASE1 to each subsequent value. /
263	element_type step = clamp (elt: base2 - base1);
264
265	/ STEP has no inherent sign, so a value near LIMIT can*
266	act as a negative step. The series is in range if it
267	is in range according to one of the two interpretations.
268
269	Since we're dealing with clamped values, ELEMENT_TYPE is
270	wide enough for overflow not to be a problem. /*
271	element_type headroom_down = base1 - start;
272	element_type headroom_up = size - headroom_down - `1`;
273	HOST_WIDE_INT diff;
274	if ((!step.is_constant (const_value: &diff)
275	\|\| maybe_lt (a: headroom_up, b: diff * step_nelts))
276	&& (!(limit - step).is_constant (const_value: &diff)
277	\|\| maybe_lt (a: headroom_down, b: diff * step_nelts)))
278	return false;
279	}
280	}
281	return true;
282	}
283
284	/ Try to read the contents of VECTOR_CST CST as a constant permutation*
285	vector. Return true and add the elements to BUILDER on success,
286	otherwise return false without modifying BUILDER. /*
287
288	bool
289	tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
290	{
291	unsigned int encoded_nelts = vector_cst_encoded_nelts (t: cst);
292	for (unsigned int i = `0`; i < encoded_nelts; ++i)
293	if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
294	return false;
295
296	builder->new_vector (full_nelts: TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
297	VECTOR_CST_NPATTERNS (cst),
298	VECTOR_CST_NELTS_PER_PATTERN (cst));
299	for (unsigned int i = `0`; i < encoded_nelts; ++i)
300	builder->quick_push (obj: tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
301	return true;
302	}
303
304	/ Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. /
305
306	tree
307	vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
308	{
309	gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
310	tree_vector_builder sel (type, indices.encoding ().npatterns (),
311	indices.encoding ().nelts_per_pattern ());
312	unsigned int encoded_nelts = sel.encoded_nelts ();
313	for (unsigned int i = `0`; i < encoded_nelts; i++)
314	sel.quick_push (obj: build_int_cst (TREE_TYPE (type), indices [i]));
315	return sel.build ();
316	}
317
318	/ Return a CONST_VECTOR of mode MODE that contains the elements of*
319	INDICES. /*
320
321	rtx
322	vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
323	{
324	gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
325	&& known_eq (GET_MODE_NUNITS (mode), indices.length ()));
326	rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
327	indices.encoding ().nelts_per_pattern ());
328	unsigned int encoded_nelts = sel.encoded_nelts ();
329	for (unsigned int i = `0`; i < encoded_nelts; i++)
330	sel.quick_push (obj: gen_int_mode (indices [i], GET_MODE_INNER (mode)));
331	return sel.build ();
332	}
333
334	#if CHECKING_P
335
336	namespace selftest {
337
338	/ Test a 12-element vector. /
339
340	static void
341	test_vec_perm_12 (void)
342	{
343	vec_perm_builder builder (`12`, `12`, `1`);
344	for (unsigned int i = `0`; i < `4`; ++i)
345	{
346	builder.quick_push (obj: i * `5`);
347	builder.quick_push (obj: `3` + i);
348	builder.quick_push (obj: `2` + `3` * i);
349	}
350	vec_perm_indices indices (builder, `1`, `12`);
351	ASSERT_TRUE (indices.series_p (`0`, `3`, `0`, `5`));
352	ASSERT_FALSE (indices.series_p (`0`, `3`, `3`, `5`));
353	ASSERT_FALSE (indices.series_p (`0`, `3`, `0`, `8`));
354	ASSERT_TRUE (indices.series_p (`1`, `3`, `3`, `1`));
355	ASSERT_TRUE (indices.series_p (`2`, `3`, `2`, `3`));
356
357	ASSERT_TRUE (indices.series_p (`0`, `4`, `0`, `4`));
358	ASSERT_FALSE (indices.series_p (`1`, `4`, `3`, `4`));
359
360	ASSERT_TRUE (indices.series_p (`0`, `6`, `0`, `10`));
361	ASSERT_FALSE (indices.series_p (`0`, `6`, `0`, `100`));
362
363	ASSERT_FALSE (indices.series_p (`1`, `10`, `3`, `7`));
364	ASSERT_TRUE (indices.series_p (`1`, `10`, `3`, `8`));
365
366	ASSERT_TRUE (indices.series_p (`0`, `12`, `0`, `10`));
367	ASSERT_TRUE (indices.series_p (`0`, `12`, `0`, `11`));
368	ASSERT_TRUE (indices.series_p (`0`, `12`, `0`, `100`));
369	}
370
371	/ Run selftests for this file. /
372
373	void
374	vec_perm_indices_cc_tests ()
375	{
376	test_vec_perm_12 ();
377	}
378
379	} // namespace selftest
380
381	#endif
382

source code of gcc/vec-perm-indices.cc