1/* A representation of vector permutation indices.
2 Copyright (C) 2017-2023 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "vec-perm-indices.h"
24#include "tree.h"
25#include "fold-const.h"
26#include "tree-vector-builder.h"
27#include "backend.h"
28#include "rtl.h"
29#include "memmodel.h"
30#include "emit-rtl.h"
31#include "selftest.h"
32#include "rtx-vector-builder.h"
33
34/* Switch to a new permutation vector that selects between NINPUTS vector
35 inputs that have NELTS_PER_INPUT elements each. Take the elements of the
36 new permutation vector from ELEMENTS, clamping each one to be in range. */
37
38void
39vec_perm_indices::new_vector (const vec_perm_builder &elements,
40 unsigned int ninputs,
41 poly_uint64 nelts_per_input)
42{
43 m_ninputs = ninputs;
44 m_nelts_per_input = nelts_per_input;
45 /* If the vector has a constant number of elements, expand the
46 encoding and clamp each element. E.g. { 0, 2, 4, ... } might
47 wrap halfway if there is only one vector input, and we want
48 the wrapped form to be the canonical one.
49
50 If the vector has a variable number of elements, just copy
51 the encoding. In that case the unwrapped form is canonical
52 and there is no way of representing the wrapped form. */
53 poly_uint64 full_nelts = elements.full_nelts ();
54 unsigned HOST_WIDE_INT copy_nelts;
55 if (full_nelts.is_constant (const_value: &copy_nelts))
56 m_encoding.new_vector (full_nelts, npatterns: copy_nelts, nelts_per_pattern: 1);
57 else
58 {
59 copy_nelts = elements.encoded_nelts ();
60 m_encoding.new_vector (full_nelts, npatterns: elements.npatterns (),
61 nelts_per_pattern: elements.nelts_per_pattern ());
62 }
63 unsigned int npatterns = m_encoding.npatterns ();
64 for (unsigned int i = 0; i < npatterns; ++i)
65 m_encoding.quick_push (obj: clamp (elt: elements.elt (i)));
66 /* Use the fact that:
67
68 (a + b) % c == ((a % c) + (b % c)) % c
69
70 to simplify the clamping of variable-length vectors. */
71 for (unsigned int i = npatterns; i < copy_nelts; ++i)
72 {
73 element_type step = clamp (elt: elements.elt (i)
74 - elements.elt (i: i - npatterns));
75 m_encoding.quick_push (obj: clamp (elt: m_encoding[i - npatterns] + step));
76 }
77 m_encoding.finalize ();
78}
79
80/* Switch to a new permutation vector that selects the same input elements
81 as ORIG, but with each element split into FACTOR pieces. For example,
82 if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
83 { 2, 3, 4, 5, 0, 1, 6, 7 }. */
84
85void
86vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
87 unsigned int factor)
88{
89 m_ninputs = orig.m_ninputs;
90 m_nelts_per_input = orig.m_nelts_per_input * factor;
91 m_encoding.new_vector (full_nelts: orig.m_encoding.full_nelts () * factor,
92 npatterns: orig.m_encoding.npatterns () * factor,
93 nelts_per_pattern: orig.m_encoding.nelts_per_pattern ());
94 unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
95 for (unsigned int i = 0; i < encoded_nelts; ++i)
96 {
97 element_type base = orig.m_encoding[i] * factor;
98 for (unsigned int j = 0; j < factor; ++j)
99 m_encoding.quick_push (obj: base + j);
100 }
101 m_encoding.finalize ();
102}
103
104/* Check whether we can switch to a new permutation vector that
105 selects the same input elements as ORIG, but with each element
106 built up from FACTOR pieces. Return true if yes, otherwise
107 return false. Every FACTOR permutation indexes should be
108 continuous separately and the first one of each batch should
109 be able to exactly modulo FACTOR. For example, if ORIG is
110 { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
111 is { 1, 2, 0, 3 }. */
112
113bool
114vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
115 unsigned int factor)
116{
117 gcc_assert (factor > 0);
118
119 if (maybe_lt (a: orig.m_nelts_per_input, b: factor))
120 return false;
121
122 poly_uint64 nelts;
123 /* Invalid if vector units number isn't multiple of factor. */
124 if (!multiple_p (a: orig.m_nelts_per_input, b: factor, multiple: &nelts))
125 return false;
126
127 /* Only handle the case that npatterns is multiple of factor.
128 FIXME: Try to see whether we can reshape it by factor npatterns. */
129 if (orig.m_encoding.npatterns () % factor != 0)
130 return false;
131
132 unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
133 auto_vec<element_type, 32> encoding (encoded_nelts);
134 /* Separate all encoded elements into batches by size factor,
135 then ensure the first element of each batch is multiple of
136 factor and all elements in each batch is consecutive from
137 the first one. */
138 for (unsigned int i = 0; i < encoded_nelts; i += factor)
139 {
140 element_type first = orig.m_encoding[i];
141 element_type new_index;
142 if (!multiple_p (a: first, b: factor, multiple: &new_index))
143 return false;
144 for (unsigned int j = 1; j < factor; ++j)
145 if (maybe_ne (a: first + j, b: orig.m_encoding[i + j]))
146 return false;
147 encoding.quick_push (obj: new_index);
148 }
149
150 m_ninputs = orig.m_ninputs;
151 m_nelts_per_input = nelts;
152 poly_uint64 full_nelts = exact_div (a: orig.m_encoding.full_nelts (), b: factor);
153 unsigned int npatterns = orig.m_encoding.npatterns () / factor;
154
155 m_encoding.new_vector (full_nelts, npatterns,
156 nelts_per_pattern: orig.m_encoding.nelts_per_pattern ());
157 m_encoding.splice (src: encoding);
158 m_encoding.finalize ();
159
160 return true;
161}
162
163/* Rotate the inputs of the permutation right by DELTA inputs. This changes
164 the values of the permutation vector but it doesn't change the way that
165 the elements are encoded. */
166
167void
168vec_perm_indices::rotate_inputs (int delta)
169{
170 element_type element_delta = delta * m_nelts_per_input;
171 for (unsigned int i = 0; i < m_encoding.length (); ++i)
172 m_encoding[i] = clamp (elt: m_encoding[i] + element_delta);
173}
174
175/* Return true if index OUT_BASE + I * OUT_STEP selects input
176 element IN_BASE + I * IN_STEP. For example, the call to test
177 whether a permute reverses a vector of N elements would be:
178
179 series_p (0, 1, N - 1, -1)
180
181 which would return true for { N - 1, N - 2, N - 3, ... }.
182 The calls to test for an interleaving of elements starting
183 at N1 and N2 would be:
184
185 series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
186
187 which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */
188
189bool
190vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
191 element_type in_base, element_type in_step) const
192{
193 /* Check the base value. */
194 if (maybe_ne (a: clamp (elt: m_encoding.elt (i: out_base)), b: clamp (elt: in_base)))
195 return false;
196
197 element_type full_nelts = m_encoding.full_nelts ();
198 unsigned int npatterns = m_encoding.npatterns ();
199
200 /* Calculate which multiple of OUT_STEP elements we need to get
201 back to the same pattern. */
202 unsigned int cycle_length = least_common_multiple (out_step, npatterns);
203
204 /* Check the steps. */
205 in_step = clamp (elt: in_step);
206 out_base += out_step;
207 unsigned int limit = 0;
208 for (;;)
209 {
210 /* Succeed if we've checked all the elements in the vector. */
211 if (known_ge (out_base, full_nelts))
212 return true;
213
214 if (out_base >= npatterns)
215 {
216 /* We've got to the end of the "foreground" values. Check
217 2 elements from each pattern in the "background" values. */
218 if (limit == 0)
219 limit = out_base + cycle_length * 2;
220 else if (out_base >= limit)
221 return true;
222 }
223
224 element_type v0 = m_encoding.elt (i: out_base - out_step);
225 element_type v1 = m_encoding.elt (i: out_base);
226 if (maybe_ne (a: clamp (elt: v1 - v0), b: in_step))
227 return false;
228
229 out_base += out_step;
230 }
231}
232
233/* Return true if all elements of the permutation vector are in the range
234 [START, START + SIZE). */
235
236bool
237vec_perm_indices::all_in_range_p (element_type start, element_type size) const
238{
239 /* Check the first two elements of each pattern. */
240 unsigned int npatterns = m_encoding.npatterns ();
241 unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
242 unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
243 for (unsigned int i = 0; i < base_nelts; ++i)
244 if (!known_in_range_p (val: m_encoding[i], pos: start, size))
245 return false;
246
247 /* For stepped encodings, check the full range of the series. */
248 if (nelts_per_pattern == 3)
249 {
250 element_type limit = input_nelts ();
251
252 /* The number of elements in each pattern beyond the first two
253 that we checked above. */
254 poly_int64 step_nelts = exact_div (a: m_encoding.full_nelts (),
255 b: npatterns) - 2;
256 for (unsigned int i = 0; i < npatterns; ++i)
257 {
258 /* BASE1 has been checked but BASE2 hasn't. */
259 element_type base1 = m_encoding[i + npatterns];
260 element_type base2 = m_encoding[i + base_nelts];
261
262 /* The step to add to get from BASE1 to each subsequent value. */
263 element_type step = clamp (elt: base2 - base1);
264
265 /* STEP has no inherent sign, so a value near LIMIT can
266 act as a negative step. The series is in range if it
267 is in range according to one of the two interpretations.
268
269 Since we're dealing with clamped values, ELEMENT_TYPE is
270 wide enough for overflow not to be a problem. */
271 element_type headroom_down = base1 - start;
272 element_type headroom_up = size - headroom_down - 1;
273 HOST_WIDE_INT diff;
274 if ((!step.is_constant (const_value: &diff)
275 || maybe_lt (a: headroom_up, b: diff * step_nelts))
276 && (!(limit - step).is_constant (const_value: &diff)
277 || maybe_lt (a: headroom_down, b: diff * step_nelts)))
278 return false;
279 }
280 }
281 return true;
282}
283
284/* Try to read the contents of VECTOR_CST CST as a constant permutation
285 vector. Return true and add the elements to BUILDER on success,
286 otherwise return false without modifying BUILDER. */
287
288bool
289tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
290{
291 unsigned int encoded_nelts = vector_cst_encoded_nelts (t: cst);
292 for (unsigned int i = 0; i < encoded_nelts; ++i)
293 if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
294 return false;
295
296 builder->new_vector (full_nelts: TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
297 VECTOR_CST_NPATTERNS (cst),
298 VECTOR_CST_NELTS_PER_PATTERN (cst));
299 for (unsigned int i = 0; i < encoded_nelts; ++i)
300 builder->quick_push (obj: tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
301 return true;
302}
303
304/* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */
305
306tree
307vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
308{
309 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
310 tree_vector_builder sel (type, indices.encoding ().npatterns (),
311 indices.encoding ().nelts_per_pattern ());
312 unsigned int encoded_nelts = sel.encoded_nelts ();
313 for (unsigned int i = 0; i < encoded_nelts; i++)
314 sel.quick_push (obj: build_int_cst (TREE_TYPE (type), indices[i]));
315 return sel.build ();
316}
317
318/* Return a CONST_VECTOR of mode MODE that contains the elements of
319 INDICES. */
320
321rtx
322vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
323{
324 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
325 && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
326 rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
327 indices.encoding ().nelts_per_pattern ());
328 unsigned int encoded_nelts = sel.encoded_nelts ();
329 for (unsigned int i = 0; i < encoded_nelts; i++)
330 sel.quick_push (obj: gen_int_mode (indices[i], GET_MODE_INNER (mode)));
331 return sel.build ();
332}
333
334#if CHECKING_P
335
336namespace selftest {
337
338/* Test a 12-element vector. */
339
340static void
341test_vec_perm_12 (void)
342{
343 vec_perm_builder builder (12, 12, 1);
344 for (unsigned int i = 0; i < 4; ++i)
345 {
346 builder.quick_push (obj: i * 5);
347 builder.quick_push (obj: 3 + i);
348 builder.quick_push (obj: 2 + 3 * i);
349 }
350 vec_perm_indices indices (builder, 1, 12);
351 ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
352 ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
353 ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
354 ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
355 ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
356
357 ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
358 ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
359
360 ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
361 ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
362
363 ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
364 ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
365
366 ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
367 ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
368 ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
369}
370
371/* Run selftests for this file. */
372
373void
374vec_perm_indices_cc_tests ()
375{
376 test_vec_perm_12 ();
377}
378
379} // namespace selftest
380
381#endif
382

source code of gcc/vec-perm-indices.cc