1 | /* A representation of vector permutation indices. |
2 | Copyright (C) 2017-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free |
8 | Software Foundation; either version 3, or (at your option) any later |
9 | version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "vec-perm-indices.h" |
24 | #include "tree.h" |
25 | #include "fold-const.h" |
26 | #include "tree-vector-builder.h" |
27 | #include "backend.h" |
28 | #include "rtl.h" |
29 | #include "memmodel.h" |
30 | #include "emit-rtl.h" |
31 | #include "selftest.h" |
32 | #include "rtx-vector-builder.h" |
33 | |
34 | /* Switch to a new permutation vector that selects between NINPUTS vector |
35 | inputs that have NELTS_PER_INPUT elements each. Take the elements of the |
36 | new permutation vector from ELEMENTS, clamping each one to be in range. */ |
37 | |
38 | void |
39 | vec_perm_indices::new_vector (const vec_perm_builder &elements, |
40 | unsigned int ninputs, |
41 | poly_uint64 nelts_per_input) |
42 | { |
43 | m_ninputs = ninputs; |
44 | m_nelts_per_input = nelts_per_input; |
45 | /* If the vector has a constant number of elements, expand the |
46 | encoding and clamp each element. E.g. { 0, 2, 4, ... } might |
47 | wrap halfway if there is only one vector input, and we want |
48 | the wrapped form to be the canonical one. |
49 | |
50 | If the vector has a variable number of elements, just copy |
51 | the encoding. In that case the unwrapped form is canonical |
52 | and there is no way of representing the wrapped form. */ |
53 | poly_uint64 full_nelts = elements.full_nelts (); |
54 | unsigned HOST_WIDE_INT copy_nelts; |
55 | if (full_nelts.is_constant (const_value: ©_nelts)) |
56 | m_encoding.new_vector (full_nelts, npatterns: copy_nelts, nelts_per_pattern: 1); |
57 | else |
58 | { |
59 | copy_nelts = elements.encoded_nelts (); |
60 | m_encoding.new_vector (full_nelts, npatterns: elements.npatterns (), |
61 | nelts_per_pattern: elements.nelts_per_pattern ()); |
62 | } |
63 | unsigned int npatterns = m_encoding.npatterns (); |
64 | for (unsigned int i = 0; i < npatterns; ++i) |
65 | m_encoding.quick_push (obj: clamp (elt: elements.elt (i))); |
66 | /* Use the fact that: |
67 | |
68 | (a + b) % c == ((a % c) + (b % c)) % c |
69 | |
70 | to simplify the clamping of variable-length vectors. */ |
71 | for (unsigned int i = npatterns; i < copy_nelts; ++i) |
72 | { |
73 | element_type step = clamp (elt: elements.elt (i) |
74 | - elements.elt (i: i - npatterns)); |
75 | m_encoding.quick_push (obj: clamp (elt: m_encoding[i - npatterns] + step)); |
76 | } |
77 | m_encoding.finalize (); |
78 | } |
79 | |
80 | /* Switch to a new permutation vector that selects the same input elements |
81 | as ORIG, but with each element split into FACTOR pieces. For example, |
82 | if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is |
83 | { 2, 3, 4, 5, 0, 1, 6, 7 }. */ |
84 | |
85 | void |
86 | vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig, |
87 | unsigned int factor) |
88 | { |
89 | m_ninputs = orig.m_ninputs; |
90 | m_nelts_per_input = orig.m_nelts_per_input * factor; |
91 | m_encoding.new_vector (full_nelts: orig.m_encoding.full_nelts () * factor, |
92 | npatterns: orig.m_encoding.npatterns () * factor, |
93 | nelts_per_pattern: orig.m_encoding.nelts_per_pattern ()); |
94 | unsigned int encoded_nelts = orig.m_encoding.encoded_nelts (); |
95 | for (unsigned int i = 0; i < encoded_nelts; ++i) |
96 | { |
97 | element_type base = orig.m_encoding[i] * factor; |
98 | for (unsigned int j = 0; j < factor; ++j) |
99 | m_encoding.quick_push (obj: base + j); |
100 | } |
101 | m_encoding.finalize (); |
102 | } |
103 | |
104 | /* Check whether we can switch to a new permutation vector that |
105 | selects the same input elements as ORIG, but with each element |
106 | built up from FACTOR pieces. Return true if yes, otherwise |
107 | return false. Every FACTOR permutation indexes should be |
108 | continuous separately and the first one of each batch should |
109 | be able to exactly modulo FACTOR. For example, if ORIG is |
110 | { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation |
111 | is { 1, 2, 0, 3 }. */ |
112 | |
113 | bool |
114 | vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig, |
115 | unsigned int factor) |
116 | { |
117 | gcc_assert (factor > 0); |
118 | |
119 | if (maybe_lt (a: orig.m_nelts_per_input, b: factor)) |
120 | return false; |
121 | |
122 | poly_uint64 nelts; |
123 | /* Invalid if vector units number isn't multiple of factor. */ |
124 | if (!multiple_p (a: orig.m_nelts_per_input, b: factor, multiple: &nelts)) |
125 | return false; |
126 | |
127 | /* Only handle the case that npatterns is multiple of factor. |
128 | FIXME: Try to see whether we can reshape it by factor npatterns. */ |
129 | if (orig.m_encoding.npatterns () % factor != 0) |
130 | return false; |
131 | |
132 | unsigned int encoded_nelts = orig.m_encoding.encoded_nelts (); |
133 | auto_vec<element_type, 32> encoding (encoded_nelts); |
134 | /* Separate all encoded elements into batches by size factor, |
135 | then ensure the first element of each batch is multiple of |
136 | factor and all elements in each batch is consecutive from |
137 | the first one. */ |
138 | for (unsigned int i = 0; i < encoded_nelts; i += factor) |
139 | { |
140 | element_type first = orig.m_encoding[i]; |
141 | element_type new_index; |
142 | if (!multiple_p (a: first, b: factor, multiple: &new_index)) |
143 | return false; |
144 | for (unsigned int j = 1; j < factor; ++j) |
145 | if (maybe_ne (a: first + j, b: orig.m_encoding[i + j])) |
146 | return false; |
147 | encoding.quick_push (obj: new_index); |
148 | } |
149 | |
150 | m_ninputs = orig.m_ninputs; |
151 | m_nelts_per_input = nelts; |
152 | poly_uint64 full_nelts = exact_div (a: orig.m_encoding.full_nelts (), b: factor); |
153 | unsigned int npatterns = orig.m_encoding.npatterns () / factor; |
154 | |
155 | m_encoding.new_vector (full_nelts, npatterns, |
156 | nelts_per_pattern: orig.m_encoding.nelts_per_pattern ()); |
157 | m_encoding.splice (src: encoding); |
158 | m_encoding.finalize (); |
159 | |
160 | return true; |
161 | } |
162 | |
163 | /* Rotate the inputs of the permutation right by DELTA inputs. This changes |
164 | the values of the permutation vector but it doesn't change the way that |
165 | the elements are encoded. */ |
166 | |
167 | void |
168 | vec_perm_indices::rotate_inputs (int delta) |
169 | { |
170 | element_type element_delta = delta * m_nelts_per_input; |
171 | for (unsigned int i = 0; i < m_encoding.length (); ++i) |
172 | m_encoding[i] = clamp (elt: m_encoding[i] + element_delta); |
173 | } |
174 | |
175 | /* Return true if index OUT_BASE + I * OUT_STEP selects input |
176 | element IN_BASE + I * IN_STEP. For example, the call to test |
177 | whether a permute reverses a vector of N elements would be: |
178 | |
179 | series_p (0, 1, N - 1, -1) |
180 | |
181 | which would return true for { N - 1, N - 2, N - 3, ... }. |
182 | The calls to test for an interleaving of elements starting |
183 | at N1 and N2 would be: |
184 | |
185 | series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1). |
186 | |
187 | which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */ |
188 | |
189 | bool |
190 | vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step, |
191 | element_type in_base, element_type in_step) const |
192 | { |
193 | /* Check the base value. */ |
194 | if (maybe_ne (a: clamp (elt: m_encoding.elt (i: out_base)), b: clamp (elt: in_base))) |
195 | return false; |
196 | |
197 | element_type full_nelts = m_encoding.full_nelts (); |
198 | unsigned int npatterns = m_encoding.npatterns (); |
199 | |
200 | /* Calculate which multiple of OUT_STEP elements we need to get |
201 | back to the same pattern. */ |
202 | unsigned int cycle_length = least_common_multiple (out_step, npatterns); |
203 | |
204 | /* Check the steps. */ |
205 | in_step = clamp (elt: in_step); |
206 | out_base += out_step; |
207 | unsigned int limit = 0; |
208 | for (;;) |
209 | { |
210 | /* Succeed if we've checked all the elements in the vector. */ |
211 | if (known_ge (out_base, full_nelts)) |
212 | return true; |
213 | |
214 | if (out_base >= npatterns) |
215 | { |
216 | /* We've got to the end of the "foreground" values. Check |
217 | 2 elements from each pattern in the "background" values. */ |
218 | if (limit == 0) |
219 | limit = out_base + cycle_length * 2; |
220 | else if (out_base >= limit) |
221 | return true; |
222 | } |
223 | |
224 | element_type v0 = m_encoding.elt (i: out_base - out_step); |
225 | element_type v1 = m_encoding.elt (i: out_base); |
226 | if (maybe_ne (a: clamp (elt: v1 - v0), b: in_step)) |
227 | return false; |
228 | |
229 | out_base += out_step; |
230 | } |
231 | } |
232 | |
233 | /* Return true if all elements of the permutation vector are in the range |
234 | [START, START + SIZE). */ |
235 | |
236 | bool |
237 | vec_perm_indices::all_in_range_p (element_type start, element_type size) const |
238 | { |
239 | /* Check the first two elements of each pattern. */ |
240 | unsigned int npatterns = m_encoding.npatterns (); |
241 | unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern (); |
242 | unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2); |
243 | for (unsigned int i = 0; i < base_nelts; ++i) |
244 | if (!known_in_range_p (val: m_encoding[i], pos: start, size)) |
245 | return false; |
246 | |
247 | /* For stepped encodings, check the full range of the series. */ |
248 | if (nelts_per_pattern == 3) |
249 | { |
250 | element_type limit = input_nelts (); |
251 | |
252 | /* The number of elements in each pattern beyond the first two |
253 | that we checked above. */ |
254 | poly_int64 step_nelts = exact_div (a: m_encoding.full_nelts (), |
255 | b: npatterns) - 2; |
256 | for (unsigned int i = 0; i < npatterns; ++i) |
257 | { |
258 | /* BASE1 has been checked but BASE2 hasn't. */ |
259 | element_type base1 = m_encoding[i + npatterns]; |
260 | element_type base2 = m_encoding[i + base_nelts]; |
261 | |
262 | /* The step to add to get from BASE1 to each subsequent value. */ |
263 | element_type step = clamp (elt: base2 - base1); |
264 | |
265 | /* STEP has no inherent sign, so a value near LIMIT can |
266 | act as a negative step. The series is in range if it |
267 | is in range according to one of the two interpretations. |
268 | |
269 | Since we're dealing with clamped values, ELEMENT_TYPE is |
270 | wide enough for overflow not to be a problem. */ |
271 | element_type headroom_down = base1 - start; |
272 | element_type headroom_up = size - headroom_down - 1; |
273 | HOST_WIDE_INT diff; |
274 | if ((!step.is_constant (const_value: &diff) |
275 | || maybe_lt (a: headroom_up, b: diff * step_nelts)) |
276 | && (!(limit - step).is_constant (const_value: &diff) |
277 | || maybe_lt (a: headroom_down, b: diff * step_nelts))) |
278 | return false; |
279 | } |
280 | } |
281 | return true; |
282 | } |
283 | |
284 | /* Try to read the contents of VECTOR_CST CST as a constant permutation |
285 | vector. Return true and add the elements to BUILDER on success, |
286 | otherwise return false without modifying BUILDER. */ |
287 | |
288 | bool |
289 | tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst) |
290 | { |
291 | unsigned int encoded_nelts = vector_cst_encoded_nelts (t: cst); |
292 | for (unsigned int i = 0; i < encoded_nelts; ++i) |
293 | if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i))) |
294 | return false; |
295 | |
296 | builder->new_vector (full_nelts: TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)), |
297 | VECTOR_CST_NPATTERNS (cst), |
298 | VECTOR_CST_NELTS_PER_PATTERN (cst)); |
299 | for (unsigned int i = 0; i < encoded_nelts; ++i) |
300 | builder->quick_push (obj: tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i))); |
301 | return true; |
302 | } |
303 | |
304 | /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */ |
305 | |
306 | tree |
307 | vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices) |
308 | { |
309 | gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ())); |
310 | tree_vector_builder sel (type, indices.encoding ().npatterns (), |
311 | indices.encoding ().nelts_per_pattern ()); |
312 | unsigned int encoded_nelts = sel.encoded_nelts (); |
313 | for (unsigned int i = 0; i < encoded_nelts; i++) |
314 | sel.quick_push (obj: build_int_cst (TREE_TYPE (type), indices[i])); |
315 | return sel.build (); |
316 | } |
317 | |
318 | /* Return a CONST_VECTOR of mode MODE that contains the elements of |
319 | INDICES. */ |
320 | |
321 | rtx |
322 | vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices) |
323 | { |
324 | gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
325 | && known_eq (GET_MODE_NUNITS (mode), indices.length ())); |
326 | rtx_vector_builder sel (mode, indices.encoding ().npatterns (), |
327 | indices.encoding ().nelts_per_pattern ()); |
328 | unsigned int encoded_nelts = sel.encoded_nelts (); |
329 | for (unsigned int i = 0; i < encoded_nelts; i++) |
330 | sel.quick_push (obj: gen_int_mode (indices[i], GET_MODE_INNER (mode))); |
331 | return sel.build (); |
332 | } |
333 | |
334 | #if CHECKING_P |
335 | |
336 | namespace selftest { |
337 | |
338 | /* Test a 12-element vector. */ |
339 | |
340 | static void |
341 | test_vec_perm_12 (void) |
342 | { |
343 | vec_perm_builder builder (12, 12, 1); |
344 | for (unsigned int i = 0; i < 4; ++i) |
345 | { |
346 | builder.quick_push (obj: i * 5); |
347 | builder.quick_push (obj: 3 + i); |
348 | builder.quick_push (obj: 2 + 3 * i); |
349 | } |
350 | vec_perm_indices indices (builder, 1, 12); |
351 | ASSERT_TRUE (indices.series_p (0, 3, 0, 5)); |
352 | ASSERT_FALSE (indices.series_p (0, 3, 3, 5)); |
353 | ASSERT_FALSE (indices.series_p (0, 3, 0, 8)); |
354 | ASSERT_TRUE (indices.series_p (1, 3, 3, 1)); |
355 | ASSERT_TRUE (indices.series_p (2, 3, 2, 3)); |
356 | |
357 | ASSERT_TRUE (indices.series_p (0, 4, 0, 4)); |
358 | ASSERT_FALSE (indices.series_p (1, 4, 3, 4)); |
359 | |
360 | ASSERT_TRUE (indices.series_p (0, 6, 0, 10)); |
361 | ASSERT_FALSE (indices.series_p (0, 6, 0, 100)); |
362 | |
363 | ASSERT_FALSE (indices.series_p (1, 10, 3, 7)); |
364 | ASSERT_TRUE (indices.series_p (1, 10, 3, 8)); |
365 | |
366 | ASSERT_TRUE (indices.series_p (0, 12, 0, 10)); |
367 | ASSERT_TRUE (indices.series_p (0, 12, 0, 11)); |
368 | ASSERT_TRUE (indices.series_p (0, 12, 0, 100)); |
369 | } |
370 | |
371 | /* Run selftests for this file. */ |
372 | |
373 | void |
374 | vec_perm_indices_cc_tests () |
375 | { |
376 | test_vec_perm_12 (); |
377 | } |
378 | |
379 | } // namespace selftest |
380 | |
381 | #endif |
382 | |