1 | /* Medium-level subroutines: convert bit-field store and extract |
2 | and shifts, multiplies and divides to rtl instructions. |
3 | Copyright (C) 1987-2023 Free Software Foundation, Inc. |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | /* Work around tree-optimization/91825. */ |
22 | #pragma GCC diagnostic warning "-Wmaybe-uninitialized" |
23 | |
24 | #include "config.h" |
25 | #include "system.h" |
26 | #include "coretypes.h" |
27 | #include "backend.h" |
28 | #include "target.h" |
29 | #include "rtl.h" |
30 | #include "tree.h" |
31 | #include "predict.h" |
32 | #include "memmodel.h" |
33 | #include "tm_p.h" |
34 | #include "optabs.h" |
35 | #include "expmed.h" |
36 | #include "regs.h" |
37 | #include "emit-rtl.h" |
38 | #include "diagnostic-core.h" |
39 | #include "fold-const.h" |
40 | #include "stor-layout.h" |
41 | #include "dojump.h" |
42 | #include "explow.h" |
43 | #include "expr.h" |
44 | #include "langhooks.h" |
45 | #include "tree-vector-builder.h" |
46 | |
47 | struct target_expmed default_target_expmed; |
48 | #if SWITCHABLE_TARGET |
49 | struct target_expmed *this_target_expmed = &default_target_expmed; |
50 | #endif |
51 | |
52 | static bool store_integral_bit_field (rtx, opt_scalar_int_mode, |
53 | unsigned HOST_WIDE_INT, |
54 | unsigned HOST_WIDE_INT, |
55 | poly_uint64, poly_uint64, |
56 | machine_mode, rtx, bool, bool); |
57 | static void store_fixed_bit_field (rtx, opt_scalar_int_mode, |
58 | unsigned HOST_WIDE_INT, |
59 | unsigned HOST_WIDE_INT, |
60 | poly_uint64, poly_uint64, |
61 | rtx, scalar_int_mode, bool); |
62 | static void store_fixed_bit_field_1 (rtx, scalar_int_mode, |
63 | unsigned HOST_WIDE_INT, |
64 | unsigned HOST_WIDE_INT, |
65 | rtx, scalar_int_mode, bool); |
66 | static void store_split_bit_field (rtx, opt_scalar_int_mode, |
67 | unsigned HOST_WIDE_INT, |
68 | unsigned HOST_WIDE_INT, |
69 | poly_uint64, poly_uint64, |
70 | rtx, scalar_int_mode, bool); |
71 | static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode, |
72 | unsigned HOST_WIDE_INT, |
73 | unsigned HOST_WIDE_INT, int, rtx, |
74 | machine_mode, machine_mode, bool, bool); |
75 | static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode, |
76 | unsigned HOST_WIDE_INT, |
77 | unsigned HOST_WIDE_INT, rtx, int, bool); |
78 | static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode, |
79 | unsigned HOST_WIDE_INT, |
80 | unsigned HOST_WIDE_INT, rtx, int, bool); |
81 | static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int); |
82 | static rtx extract_split_bit_field (rtx, opt_scalar_int_mode, |
83 | unsigned HOST_WIDE_INT, |
84 | unsigned HOST_WIDE_INT, int, bool); |
85 | static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *); |
86 | static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT); |
87 | static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT); |
88 | |
89 | /* Return a constant integer mask value of mode MODE with BITSIZE ones |
90 | followed by BITPOS zeros, or the complement of that if COMPLEMENT. |
91 | The mask is truncated if necessary to the width of mode MODE. The |
92 | mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ |
93 | |
94 | static inline rtx |
95 | mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement) |
96 | { |
97 | return immed_wide_int_const |
98 | (wi::shifted_mask (start: bitpos, width: bitsize, negate_p: complement, |
99 | precision: GET_MODE_PRECISION (mode)), mode); |
100 | } |
101 | |
102 | /* Test whether a value is zero of a power of two. */ |
103 | #define EXACT_POWER_OF_2_OR_ZERO_P(x) \ |
104 | (((x) & ((x) - HOST_WIDE_INT_1U)) == 0) |
105 | |
106 | struct init_expmed_rtl |
107 | { |
108 | rtx reg; |
109 | rtx plus; |
110 | rtx neg; |
111 | rtx mult; |
112 | rtx sdiv; |
113 | rtx udiv; |
114 | rtx sdiv_32; |
115 | rtx smod_32; |
116 | rtx wide_mult; |
117 | rtx wide_lshr; |
118 | rtx wide_trunc; |
119 | rtx shift; |
120 | rtx shift_mult; |
121 | rtx shift_add; |
122 | rtx shift_sub0; |
123 | rtx shift_sub1; |
124 | rtx zext; |
125 | rtx trunc; |
126 | |
127 | rtx pow2[MAX_BITS_PER_WORD]; |
128 | rtx cint[MAX_BITS_PER_WORD]; |
129 | }; |
130 | |
131 | static void |
132 | init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode, |
133 | scalar_int_mode from_mode, bool speed) |
134 | { |
135 | int to_size, from_size; |
136 | rtx which; |
137 | |
138 | to_size = GET_MODE_PRECISION (mode: to_mode); |
139 | from_size = GET_MODE_PRECISION (mode: from_mode); |
140 | |
141 | /* Most partial integers have a precision less than the "full" |
142 | integer it requires for storage. In case one doesn't, for |
143 | comparison purposes here, reduce the bit size by one in that |
144 | case. */ |
145 | if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT |
146 | && pow2p_hwi (x: to_size)) |
147 | to_size --; |
148 | if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT |
149 | && pow2p_hwi (x: from_size)) |
150 | from_size --; |
151 | |
152 | /* Assume cost of zero-extend and sign-extend is the same. */ |
153 | which = (to_size < from_size ? all->trunc : all->zext); |
154 | |
155 | PUT_MODE (x: all->reg, mode: from_mode); |
156 | set_convert_cost (to_mode, from_mode, speed, |
157 | cost: set_src_cost (x: which, mode: to_mode, speed_p: speed)); |
158 | /* Restore all->reg's mode. */ |
159 | PUT_MODE (x: all->reg, mode: to_mode); |
160 | } |
161 | |
162 | static void |
163 | init_expmed_one_mode (struct init_expmed_rtl *all, |
164 | machine_mode mode, int speed) |
165 | { |
166 | int m, n, mode_bitsize; |
167 | machine_mode mode_from; |
168 | |
169 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); |
170 | |
171 | PUT_MODE (x: all->reg, mode); |
172 | PUT_MODE (x: all->plus, mode); |
173 | PUT_MODE (x: all->neg, mode); |
174 | PUT_MODE (x: all->mult, mode); |
175 | PUT_MODE (x: all->sdiv, mode); |
176 | PUT_MODE (x: all->udiv, mode); |
177 | PUT_MODE (x: all->sdiv_32, mode); |
178 | PUT_MODE (x: all->smod_32, mode); |
179 | PUT_MODE (x: all->wide_trunc, mode); |
180 | PUT_MODE (x: all->shift, mode); |
181 | PUT_MODE (x: all->shift_mult, mode); |
182 | PUT_MODE (x: all->shift_add, mode); |
183 | PUT_MODE (x: all->shift_sub0, mode); |
184 | PUT_MODE (x: all->shift_sub1, mode); |
185 | PUT_MODE (x: all->zext, mode); |
186 | PUT_MODE (x: all->trunc, mode); |
187 | |
188 | set_add_cost (speed, mode, cost: set_src_cost (x: all->plus, mode, speed_p: speed)); |
189 | set_neg_cost (speed, mode, cost: set_src_cost (x: all->neg, mode, speed_p: speed)); |
190 | set_mul_cost (speed, mode, cost: set_src_cost (x: all->mult, mode, speed_p: speed)); |
191 | set_sdiv_cost (speed, mode, cost: set_src_cost (x: all->sdiv, mode, speed_p: speed)); |
192 | set_udiv_cost (speed, mode, cost: set_src_cost (x: all->udiv, mode, speed_p: speed)); |
193 | |
194 | set_sdiv_pow2_cheap (speed, mode, cheap_p: (set_src_cost (x: all->sdiv_32, mode, speed_p: speed) |
195 | <= 2 * add_cost (speed, mode))); |
196 | set_smod_pow2_cheap (speed, mode, cheap: (set_src_cost (x: all->smod_32, mode, speed_p: speed) |
197 | <= 4 * add_cost (speed, mode))); |
198 | |
199 | set_shift_cost (speed, mode, bits: 0, cost: 0); |
200 | { |
201 | int cost = add_cost (speed, mode); |
202 | set_shiftadd_cost (speed, mode, bits: 0, cost); |
203 | set_shiftsub0_cost (speed, mode, bits: 0, cost); |
204 | set_shiftsub1_cost (speed, mode, bits: 0, cost); |
205 | } |
206 | |
207 | n = MIN (MAX_BITS_PER_WORD, mode_bitsize); |
208 | for (m = 1; m < n; m++) |
209 | { |
210 | XEXP (all->shift, 1) = all->cint[m]; |
211 | XEXP (all->shift_mult, 1) = all->pow2[m]; |
212 | |
213 | set_shift_cost (speed, mode, bits: m, cost: set_src_cost (x: all->shift, mode, speed_p: speed)); |
214 | set_shiftadd_cost (speed, mode, bits: m, cost: set_src_cost (x: all->shift_add, mode, |
215 | speed_p: speed)); |
216 | set_shiftsub0_cost (speed, mode, bits: m, cost: set_src_cost (x: all->shift_sub0, mode, |
217 | speed_p: speed)); |
218 | set_shiftsub1_cost (speed, mode, bits: m, cost: set_src_cost (x: all->shift_sub1, mode, |
219 | speed_p: speed)); |
220 | } |
221 | |
222 | scalar_int_mode int_mode_to; |
223 | if (is_a <scalar_int_mode> (m: mode, result: &int_mode_to)) |
224 | { |
225 | for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; |
226 | mode_from = (machine_mode)(mode_from + 1)) |
227 | init_expmed_one_conv (all, to_mode: int_mode_to, |
228 | from_mode: as_a <scalar_int_mode> (m: mode_from), speed); |
229 | |
230 | scalar_int_mode wider_mode; |
231 | if (GET_MODE_CLASS (int_mode_to) == MODE_INT |
232 | && GET_MODE_WIDER_MODE (m: int_mode_to).exists (mode: &wider_mode)) |
233 | { |
234 | PUT_MODE (x: all->reg, mode); |
235 | PUT_MODE (x: all->zext, mode: wider_mode); |
236 | PUT_MODE (x: all->wide_mult, mode: wider_mode); |
237 | PUT_MODE (x: all->wide_lshr, mode: wider_mode); |
238 | XEXP (all->wide_lshr, 1) |
239 | = gen_int_shift_amount (wider_mode, mode_bitsize); |
240 | |
241 | set_mul_widen_cost (speed, mode: wider_mode, |
242 | cost: set_src_cost (x: all->wide_mult, mode: wider_mode, speed_p: speed)); |
243 | set_mul_highpart_cost (speed, mode: int_mode_to, |
244 | cost: set_src_cost (x: all->wide_trunc, |
245 | mode: int_mode_to, speed_p: speed)); |
246 | } |
247 | } |
248 | } |
249 | |
250 | void |
251 | init_expmed (void) |
252 | { |
253 | struct init_expmed_rtl all; |
254 | machine_mode mode = QImode; |
255 | int m, speed; |
256 | |
257 | memset (s: &all, c: 0, n: sizeof all); |
258 | for (m = 1; m < MAX_BITS_PER_WORD; m++) |
259 | { |
260 | all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m); |
261 | all.cint[m] = GEN_INT (m); |
262 | } |
263 | |
264 | /* Avoid using hard regs in ways which may be unsupported. */ |
265 | all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
266 | all.plus = gen_rtx_PLUS (mode, all.reg, all.reg); |
267 | all.neg = gen_rtx_NEG (mode, all.reg); |
268 | all.mult = gen_rtx_MULT (mode, all.reg, all.reg); |
269 | all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg); |
270 | all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg); |
271 | all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]); |
272 | all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]); |
273 | all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg); |
274 | all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext); |
275 | all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg); |
276 | all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr); |
277 | all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg); |
278 | all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg); |
279 | all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg); |
280 | all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg); |
281 | all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult); |
282 | all.trunc = gen_rtx_TRUNCATE (mode, all.reg); |
283 | |
284 | for (speed = 0; speed < 2; speed++) |
285 | { |
286 | crtl->maybe_hot_insn_p = speed; |
287 | set_zero_cost (speed, cost: set_src_cost (const0_rtx, mode, speed_p: speed)); |
288 | |
289 | for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; |
290 | mode = (machine_mode)(mode + 1)) |
291 | init_expmed_one_mode (all: &all, mode, speed); |
292 | |
293 | if (MIN_MODE_PARTIAL_INT != VOIDmode) |
294 | for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; |
295 | mode = (machine_mode)(mode + 1)) |
296 | init_expmed_one_mode (all: &all, mode, speed); |
297 | |
298 | if (MIN_MODE_VECTOR_INT != VOIDmode) |
299 | for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; |
300 | mode = (machine_mode)(mode + 1)) |
301 | init_expmed_one_mode (all: &all, mode, speed); |
302 | } |
303 | |
304 | if (alg_hash_used_p ()) |
305 | { |
306 | struct alg_hash_entry *p = alg_hash_entry_ptr (idx: 0); |
307 | memset (s: p, c: 0, n: sizeof (*p) * NUM_ALG_HASH_ENTRIES); |
308 | } |
309 | else |
310 | set_alg_hash_used_p (true); |
311 | default_rtl_profile (); |
312 | |
313 | ggc_free (all.trunc); |
314 | ggc_free (all.shift_sub1); |
315 | ggc_free (all.shift_sub0); |
316 | ggc_free (all.shift_add); |
317 | ggc_free (all.shift_mult); |
318 | ggc_free (all.shift); |
319 | ggc_free (all.wide_trunc); |
320 | ggc_free (all.wide_lshr); |
321 | ggc_free (all.wide_mult); |
322 | ggc_free (all.zext); |
323 | ggc_free (all.smod_32); |
324 | ggc_free (all.sdiv_32); |
325 | ggc_free (all.udiv); |
326 | ggc_free (all.sdiv); |
327 | ggc_free (all.mult); |
328 | ggc_free (all.neg); |
329 | ggc_free (all.plus); |
330 | ggc_free (all.reg); |
331 | } |
332 | |
333 | /* Return an rtx representing minus the value of X. |
334 | MODE is the intended mode of the result, |
335 | useful if X is a CONST_INT. */ |
336 | |
337 | rtx |
338 | negate_rtx (machine_mode mode, rtx x) |
339 | { |
340 | rtx result = simplify_unary_operation (code: NEG, mode, op: x, op_mode: mode); |
341 | |
342 | if (result == 0) |
343 | result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); |
344 | |
345 | return result; |
346 | } |
347 | |
348 | /* Whether reverse storage order is supported on the target. */ |
349 | static int reverse_storage_order_supported = -1; |
350 | |
351 | /* Check whether reverse storage order is supported on the target. */ |
352 | |
353 | static void |
354 | check_reverse_storage_order_support (void) |
355 | { |
356 | if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) |
357 | { |
358 | reverse_storage_order_supported = 0; |
359 | sorry ("reverse scalar storage order" ); |
360 | } |
361 | else |
362 | reverse_storage_order_supported = 1; |
363 | } |
364 | |
365 | /* Whether reverse FP storage order is supported on the target. */ |
366 | static int reverse_float_storage_order_supported = -1; |
367 | |
368 | /* Check whether reverse FP storage order is supported on the target. */ |
369 | |
370 | static void |
371 | check_reverse_float_storage_order_support (void) |
372 | { |
373 | if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN) |
374 | { |
375 | reverse_float_storage_order_supported = 0; |
376 | sorry ("reverse floating-point scalar storage order" ); |
377 | } |
378 | else |
379 | reverse_float_storage_order_supported = 1; |
380 | } |
381 | |
382 | /* Return an rtx representing value of X with reverse storage order. |
383 | MODE is the intended mode of the result, |
384 | useful if X is a CONST_INT. */ |
385 | |
386 | rtx |
387 | flip_storage_order (machine_mode mode, rtx x) |
388 | { |
389 | scalar_int_mode int_mode; |
390 | rtx result; |
391 | |
392 | if (mode == QImode) |
393 | return x; |
394 | |
395 | if (COMPLEX_MODE_P (mode)) |
396 | { |
397 | rtx real = read_complex_part (x, false); |
398 | rtx imag = read_complex_part (x, true); |
399 | |
400 | real = flip_storage_order (GET_MODE_INNER (mode), x: real); |
401 | imag = flip_storage_order (GET_MODE_INNER (mode), x: imag); |
402 | |
403 | return gen_rtx_CONCAT (mode, real, imag); |
404 | } |
405 | |
406 | if (UNLIKELY (reverse_storage_order_supported < 0)) |
407 | check_reverse_storage_order_support (); |
408 | |
409 | if (!is_a <scalar_int_mode> (m: mode, result: &int_mode)) |
410 | { |
411 | if (FLOAT_MODE_P (mode) |
412 | && UNLIKELY (reverse_float_storage_order_supported < 0)) |
413 | check_reverse_float_storage_order_support (); |
414 | |
415 | if (!int_mode_for_size (size: GET_MODE_PRECISION (mode), limit: 0).exists (mode: &int_mode) |
416 | || !targetm.scalar_mode_supported_p (int_mode)) |
417 | { |
418 | sorry ("reverse storage order for %smode" , GET_MODE_NAME (mode)); |
419 | return x; |
420 | } |
421 | x = gen_lowpart (int_mode, x); |
422 | } |
423 | |
424 | result = simplify_unary_operation (code: BSWAP, mode: int_mode, op: x, op_mode: int_mode); |
425 | if (result == 0) |
426 | result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1); |
427 | |
428 | if (int_mode != mode) |
429 | result = gen_lowpart (mode, result); |
430 | |
431 | return result; |
432 | } |
433 | |
434 | /* If MODE is set, adjust bitfield memory MEM so that it points to the |
435 | first unit of mode MODE that contains a bitfield of size BITSIZE at |
436 | bit position BITNUM. If MODE is not set, return a BLKmode reference |
437 | to every byte in the bitfield. Set *NEW_BITNUM to the bit position |
438 | of the field within the new memory. */ |
439 | |
440 | static rtx |
441 | narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode, |
442 | unsigned HOST_WIDE_INT bitsize, |
443 | unsigned HOST_WIDE_INT bitnum, |
444 | unsigned HOST_WIDE_INT *new_bitnum) |
445 | { |
446 | scalar_int_mode imode; |
447 | if (mode.exists (mode: &imode)) |
448 | { |
449 | unsigned int unit = GET_MODE_BITSIZE (mode: imode); |
450 | *new_bitnum = bitnum % unit; |
451 | HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT; |
452 | return adjust_bitfield_address (mem, imode, offset); |
453 | } |
454 | else |
455 | { |
456 | *new_bitnum = bitnum % BITS_PER_UNIT; |
457 | HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT; |
458 | HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1) |
459 | / BITS_PER_UNIT); |
460 | return adjust_bitfield_address_size (mem, BLKmode, offset, size); |
461 | } |
462 | } |
463 | |
464 | /* The caller wants to perform insertion or extraction PATTERN on a |
465 | bitfield of size BITSIZE at BITNUM bits into memory operand OP0. |
466 | BITREGION_START and BITREGION_END are as for store_bit_field |
467 | and FIELDMODE is the natural mode of the field. |
468 | |
469 | Search for a mode that is compatible with the memory access |
470 | restrictions and (where applicable) with a register insertion or |
471 | extraction. Return the new memory on success, storing the adjusted |
472 | bit position in *NEW_BITNUM. Return null otherwise. */ |
473 | |
474 | static rtx |
475 | (enum extraction_pattern pattern, |
476 | rtx op0, HOST_WIDE_INT bitsize, |
477 | HOST_WIDE_INT bitnum, |
478 | poly_uint64 bitregion_start, |
479 | poly_uint64 bitregion_end, |
480 | machine_mode fieldmode, |
481 | unsigned HOST_WIDE_INT *new_bitnum) |
482 | { |
483 | bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start, |
484 | bitregion_end, MEM_ALIGN (op0), |
485 | MEM_VOLATILE_P (op0)); |
486 | scalar_int_mode best_mode; |
487 | if (iter.next_mode (&best_mode)) |
488 | { |
489 | /* We can use a memory in BEST_MODE. See whether this is true for |
490 | any wider modes. All other things being equal, we prefer to |
491 | use the widest mode possible because it tends to expose more |
492 | CSE opportunities. */ |
493 | if (!iter.prefer_smaller_modes ()) |
494 | { |
495 | /* Limit the search to the mode required by the corresponding |
496 | register insertion or extraction instruction, if any. */ |
497 | scalar_int_mode limit_mode = word_mode; |
498 | extraction_insn insn; |
499 | if (get_best_reg_extraction_insn (&insn, pattern, |
500 | GET_MODE_BITSIZE (mode: best_mode), |
501 | fieldmode)) |
502 | limit_mode = insn.field_mode; |
503 | |
504 | scalar_int_mode wider_mode; |
505 | while (iter.next_mode (&wider_mode) |
506 | && GET_MODE_SIZE (mode: wider_mode) <= GET_MODE_SIZE (mode: limit_mode)) |
507 | best_mode = wider_mode; |
508 | } |
509 | return narrow_bit_field_mem (mem: op0, mode: best_mode, bitsize, bitnum, |
510 | new_bitnum); |
511 | } |
512 | return NULL_RTX; |
513 | } |
514 | |
515 | /* Return true if a bitfield of size BITSIZE at bit number BITNUM within |
516 | a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg |
517 | offset is then BITNUM / BITS_PER_UNIT. */ |
518 | |
519 | static bool |
520 | lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize, |
521 | machine_mode struct_mode) |
522 | { |
523 | poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode); |
524 | if (BYTES_BIG_ENDIAN) |
525 | return (multiple_p (a: bitnum, BITS_PER_UNIT) |
526 | && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode)) |
527 | || multiple_p (a: bitnum + bitsize, |
528 | b: regsize * BITS_PER_UNIT))); |
529 | else |
530 | return multiple_p (a: bitnum, b: regsize * BITS_PER_UNIT); |
531 | } |
532 | |
533 | /* Return true if -fstrict-volatile-bitfields applies to an access of OP0 |
534 | containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. |
535 | Return false if the access would touch memory outside the range |
536 | BITREGION_START to BITREGION_END for conformance to the C++ memory |
537 | model. */ |
538 | |
539 | static bool |
540 | strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, |
541 | unsigned HOST_WIDE_INT bitnum, |
542 | scalar_int_mode fieldmode, |
543 | poly_uint64 bitregion_start, |
544 | poly_uint64 bitregion_end) |
545 | { |
546 | unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (mode: fieldmode); |
547 | |
548 | /* -fstrict-volatile-bitfields must be enabled and we must have a |
549 | volatile MEM. */ |
550 | if (!MEM_P (op0) |
551 | || !MEM_VOLATILE_P (op0) |
552 | || flag_strict_volatile_bitfields <= 0) |
553 | return false; |
554 | |
555 | /* The bit size must not be larger than the field mode, and |
556 | the field mode must not be larger than a word. */ |
557 | if (bitsize > modesize || modesize > BITS_PER_WORD) |
558 | return false; |
559 | |
560 | /* Check for cases of unaligned fields that must be split. */ |
561 | if (bitnum % modesize + bitsize > modesize) |
562 | return false; |
563 | |
564 | /* The memory must be sufficiently aligned for a MODESIZE access. |
565 | This condition guarantees, that the memory access will not |
566 | touch anything after the end of the structure. */ |
567 | if (MEM_ALIGN (op0) < modesize) |
568 | return false; |
569 | |
570 | /* Check for cases where the C++ memory model applies. */ |
571 | if (maybe_ne (a: bitregion_end, b: 0U) |
572 | && (maybe_lt (a: bitnum - bitnum % modesize, b: bitregion_start) |
573 | || maybe_gt (bitnum - bitnum % modesize + modesize - 1, |
574 | bitregion_end))) |
575 | return false; |
576 | |
577 | return true; |
578 | } |
579 | |
580 | /* Return true if OP is a memory and if a bitfield of size BITSIZE at |
581 | bit number BITNUM can be treated as a simple value of mode MODE. |
582 | Store the byte offset in *BYTENUM if so. */ |
583 | |
584 | static bool |
585 | simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum, |
586 | machine_mode mode, poly_uint64 *bytenum) |
587 | { |
588 | return (MEM_P (op0) |
589 | && multiple_p (a: bitnum, BITS_PER_UNIT, multiple: bytenum) |
590 | && known_eq (bitsize, GET_MODE_BITSIZE (mode)) |
591 | && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0)) |
592 | || (multiple_p (a: bitnum, GET_MODE_ALIGNMENT (mode)) |
593 | && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); |
594 | } |
595 | |
596 | /* Try to use instruction INSV to store VALUE into a field of OP0. |
597 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a |
598 | BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM |
599 | are as for store_bit_field. */ |
600 | |
601 | static bool |
602 | (const extraction_insn *insv, rtx op0, |
603 | opt_scalar_int_mode op0_mode, |
604 | unsigned HOST_WIDE_INT bitsize, |
605 | unsigned HOST_WIDE_INT bitnum, |
606 | rtx value, scalar_int_mode value_mode) |
607 | { |
608 | class expand_operand ops[4]; |
609 | rtx value1; |
610 | rtx xop0 = op0; |
611 | rtx_insn *last = get_last_insn (); |
612 | bool copy_back = false; |
613 | |
614 | scalar_int_mode op_mode = insv->field_mode; |
615 | unsigned int unit = GET_MODE_BITSIZE (mode: op_mode); |
616 | if (bitsize == 0 || bitsize > unit) |
617 | return false; |
618 | |
619 | if (MEM_P (xop0)) |
620 | /* Get a reference to the first byte of the field. */ |
621 | xop0 = narrow_bit_field_mem (mem: xop0, mode: insv->struct_mode, bitsize, bitnum, |
622 | new_bitnum: &bitnum); |
623 | else |
624 | { |
625 | /* Convert from counting within OP0 to counting in OP_MODE. */ |
626 | if (BYTES_BIG_ENDIAN) |
627 | bitnum += unit - GET_MODE_BITSIZE (mode: op0_mode.require ()); |
628 | |
629 | /* If xop0 is a register, we need it in OP_MODE |
630 | to make it acceptable to the format of insv. */ |
631 | if (GET_CODE (xop0) == SUBREG) |
632 | { |
633 | /* If such a SUBREG can't be created, give up. */ |
634 | if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)), |
635 | SUBREG_REG (xop0), SUBREG_BYTE (xop0))) |
636 | return false; |
637 | /* We can't just change the mode, because this might clobber op0, |
638 | and we will need the original value of op0 if insv fails. */ |
639 | xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), |
640 | SUBREG_BYTE (xop0)); |
641 | } |
642 | if (REG_P (xop0) && GET_MODE (xop0) != op_mode) |
643 | xop0 = gen_lowpart_SUBREG (op_mode, xop0); |
644 | } |
645 | |
646 | /* If the destination is a paradoxical subreg such that we need a |
647 | truncate to the inner mode, perform the insertion on a temporary and |
648 | truncate the result to the original destination. Note that we can't |
649 | just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N |
650 | X) 0)) is (reg:N X). */ |
651 | if (GET_CODE (xop0) == SUBREG |
652 | && REG_P (SUBREG_REG (xop0)) |
653 | && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), |
654 | op_mode)) |
655 | { |
656 | rtx tem = gen_reg_rtx (op_mode); |
657 | emit_move_insn (tem, xop0); |
658 | xop0 = tem; |
659 | copy_back = true; |
660 | } |
661 | |
662 | /* There are similar overflow check at the start of store_bit_field_1, |
663 | but that only check the situation where the field lies completely |
664 | outside the register, while there do have situation where the field |
665 | lies partialy in the register, we need to adjust bitsize for this |
666 | partial overflow situation. Without this fix, pr48335-2.c on big-endian |
667 | will broken on those arch support bit insert instruction, like arm, aarch64 |
668 | etc. */ |
669 | if (bitsize + bitnum > unit && bitnum < unit) |
670 | { |
671 | warning (OPT_Wextra, "write of %wu-bit data outside the bound of " |
672 | "destination object, data truncated into %wu-bit" , |
673 | bitsize, unit - bitnum); |
674 | bitsize = unit - bitnum; |
675 | } |
676 | |
677 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count |
678 | "backwards" from the size of the unit we are inserting into. |
679 | Otherwise, we count bits from the most significant on a |
680 | BYTES/BITS_BIG_ENDIAN machine. */ |
681 | |
682 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) |
683 | bitnum = unit - bitsize - bitnum; |
684 | |
685 | /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ |
686 | value1 = value; |
687 | if (value_mode != op_mode) |
688 | { |
689 | if (GET_MODE_BITSIZE (mode: value_mode) >= bitsize) |
690 | { |
691 | rtx tmp; |
692 | /* Optimization: Don't bother really extending VALUE |
693 | if it has all the bits we will actually use. However, |
694 | if we must narrow it, be sure we do it correctly. */ |
695 | |
696 | if (GET_MODE_SIZE (mode: value_mode) < GET_MODE_SIZE (mode: op_mode)) |
697 | { |
698 | tmp = simplify_subreg (outermode: op_mode, op: value1, innermode: value_mode, byte: 0); |
699 | if (! tmp) |
700 | tmp = simplify_gen_subreg (outermode: op_mode, |
701 | op: force_reg (value_mode, value1), |
702 | innermode: value_mode, byte: 0); |
703 | } |
704 | else |
705 | { |
706 | tmp = gen_lowpart_if_possible (op_mode, value1); |
707 | if (! tmp) |
708 | tmp = gen_lowpart (op_mode, force_reg (value_mode, value1)); |
709 | } |
710 | value1 = tmp; |
711 | } |
712 | else if (CONST_INT_P (value)) |
713 | value1 = gen_int_mode (INTVAL (value), op_mode); |
714 | else |
715 | /* Parse phase is supposed to make VALUE's data type |
716 | match that of the component reference, which is a type |
717 | at least as wide as the field; so VALUE should have |
718 | a mode that corresponds to that type. */ |
719 | gcc_assert (CONSTANT_P (value)); |
720 | } |
721 | |
722 | create_fixed_operand (op: &ops[0], x: xop0); |
723 | create_integer_operand (&ops[1], bitsize); |
724 | create_integer_operand (&ops[2], bitnum); |
725 | create_input_operand (op: &ops[3], value: value1, mode: op_mode); |
726 | if (maybe_expand_insn (icode: insv->icode, nops: 4, ops)) |
727 | { |
728 | if (copy_back) |
729 | convert_move (op0, xop0, true); |
730 | return true; |
731 | } |
732 | delete_insns_since (last); |
733 | return false; |
734 | } |
735 | |
736 | /* A subroutine of store_bit_field, with the same arguments. Return true |
737 | if the operation could be implemented. |
738 | |
739 | If FALLBACK_P is true, fall back to store_fixed_bit_field if we have |
740 | no other way of implementing the operation. If FALLBACK_P is false, |
741 | return false instead. |
742 | |
743 | if UNDEFINED_P is true then STR_RTX is undefined and may be set using |
744 | a subreg instead. */ |
745 | |
746 | static bool |
747 | store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, |
748 | poly_uint64 bitregion_start, poly_uint64 bitregion_end, |
749 | machine_mode fieldmode, |
750 | rtx value, bool reverse, bool fallback_p, bool undefined_p) |
751 | { |
752 | rtx op0 = str_rtx; |
753 | |
754 | while (GET_CODE (op0) == SUBREG) |
755 | { |
756 | bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT; |
757 | op0 = SUBREG_REG (op0); |
758 | } |
759 | |
760 | /* No action is needed if the target is a register and if the field |
761 | lies completely outside that register. This can occur if the source |
762 | code contains an out-of-bounds access to a small array. */ |
763 | if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0)))) |
764 | return true; |
765 | |
766 | /* Use vec_set patterns for inserting parts of vectors whenever |
767 | available. */ |
768 | machine_mode outermode = GET_MODE (op0); |
769 | scalar_mode innermode = GET_MODE_INNER (outermode); |
770 | poly_uint64 pos; |
771 | if (VECTOR_MODE_P (outermode) |
772 | && !MEM_P (op0) |
773 | && optab_handler (op: vec_set_optab, mode: outermode) != CODE_FOR_nothing |
774 | && fieldmode == innermode |
775 | && known_eq (bitsize, GET_MODE_BITSIZE (innermode)) |
776 | && multiple_p (a: bitnum, b: GET_MODE_BITSIZE (mode: innermode), multiple: &pos)) |
777 | { |
778 | class expand_operand ops[3]; |
779 | enum insn_code icode = optab_handler (op: vec_set_optab, mode: outermode); |
780 | |
781 | create_fixed_operand (op: &ops[0], x: op0); |
782 | create_input_operand (op: &ops[1], value, mode: innermode); |
783 | create_integer_operand (&ops[2], pos); |
784 | if (maybe_expand_insn (icode, nops: 3, ops)) |
785 | return true; |
786 | } |
787 | |
788 | /* If the target is a register, overwriting the entire object, or storing |
789 | a full-word or multi-word field can be done with just a SUBREG. */ |
790 | if (!MEM_P (op0) |
791 | && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))) |
792 | { |
793 | /* Use the subreg machinery either to narrow OP0 to the required |
794 | words or to cope with mode punning between equal-sized modes. |
795 | In the latter case, use subreg on the rhs side, not lhs. */ |
796 | rtx sub; |
797 | poly_uint64 bytenum; |
798 | poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0)); |
799 | if (known_eq (bitnum, 0U) |
800 | && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0)))) |
801 | { |
802 | sub = simplify_gen_subreg (GET_MODE (op0), op: value, innermode: fieldmode, byte: 0); |
803 | if (sub) |
804 | { |
805 | if (reverse) |
806 | sub = flip_storage_order (GET_MODE (op0), x: sub); |
807 | emit_move_insn (op0, sub); |
808 | return true; |
809 | } |
810 | } |
811 | else if (multiple_p (a: bitnum, BITS_PER_UNIT, multiple: &bytenum) |
812 | && (undefined_p |
813 | || (multiple_p (a: bitnum, b: regsize * BITS_PER_UNIT) |
814 | && multiple_p (a: bitsize, b: regsize * BITS_PER_UNIT))) |
815 | && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize)) |
816 | { |
817 | sub = simplify_gen_subreg (outermode: fieldmode, op: op0, GET_MODE (op0), byte: bytenum); |
818 | if (sub) |
819 | { |
820 | if (reverse) |
821 | value = flip_storage_order (mode: fieldmode, x: value); |
822 | emit_move_insn (sub, value); |
823 | return true; |
824 | } |
825 | } |
826 | } |
827 | |
828 | /* If the target is memory, storing any naturally aligned field can be |
829 | done with a simple store. For targets that support fast unaligned |
830 | memory, any naturally sized, unit aligned field can be done directly. */ |
831 | poly_uint64 bytenum; |
832 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode: fieldmode, bytenum: &bytenum)) |
833 | { |
834 | op0 = adjust_bitfield_address (op0, fieldmode, bytenum); |
835 | if (reverse) |
836 | value = flip_storage_order (mode: fieldmode, x: value); |
837 | emit_move_insn (op0, value); |
838 | return true; |
839 | } |
840 | |
841 | /* It's possible we'll need to handle other cases here for |
842 | polynomial bitnum and bitsize. */ |
843 | |
844 | /* From here on we need to be looking at a fixed-size insertion. */ |
845 | unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant (); |
846 | unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant (); |
847 | |
848 | /* Make sure we are playing with integral modes. Pun with subregs |
849 | if we aren't. This must come after the entire register case above, |
850 | since that case is valid for any mode. The following cases are only |
851 | valid for integral modes. */ |
852 | opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0)); |
853 | scalar_int_mode imode; |
854 | if (!op0_mode.exists (mode: &imode) || imode != GET_MODE (op0)) |
855 | { |
856 | if (MEM_P (op0)) |
857 | op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (), |
858 | 0, MEM_SIZE (op0)); |
859 | else if (!op0_mode.exists ()) |
860 | { |
861 | if (ibitnum == 0 |
862 | && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0))) |
863 | && MEM_P (value) |
864 | && !reverse) |
865 | { |
866 | value = adjust_address (value, GET_MODE (op0), 0); |
867 | emit_move_insn (op0, value); |
868 | return true; |
869 | } |
870 | if (!fallback_p) |
871 | return false; |
872 | rtx temp = assign_stack_temp (GET_MODE (op0), |
873 | GET_MODE_SIZE (GET_MODE (op0))); |
874 | emit_move_insn (temp, op0); |
875 | store_bit_field_1 (str_rtx: temp, bitsize, bitnum, bitregion_start: 0, bitregion_end: 0, fieldmode, value, |
876 | reverse, fallback_p, undefined_p); |
877 | emit_move_insn (op0, temp); |
878 | return true; |
879 | } |
880 | else |
881 | op0 = gen_lowpart (op0_mode.require (), op0); |
882 | } |
883 | |
884 | return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum, |
885 | bitregion_start, bitregion_end, |
886 | fieldmode, value, reverse, fallback_p); |
887 | } |
888 | |
889 | /* Subroutine of store_bit_field_1, with the same arguments, except |
890 | that BITSIZE and BITNUM are constant. Handle cases specific to |
891 | integral modes. If OP0_MODE is defined, it is the mode of OP0, |
892 | otherwise OP0 is a BLKmode MEM. */ |
893 | |
894 | static bool |
895 | store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |
896 | unsigned HOST_WIDE_INT bitsize, |
897 | unsigned HOST_WIDE_INT bitnum, |
898 | poly_uint64 bitregion_start, |
899 | poly_uint64 bitregion_end, |
900 | machine_mode fieldmode, |
901 | rtx value, bool reverse, bool fallback_p) |
902 | { |
903 | /* Storing an lsb-aligned field in a register |
904 | can be done with a movstrict instruction. */ |
905 | |
906 | if (!MEM_P (op0) |
907 | && !reverse |
908 | && lowpart_bit_field_p (bitnum, bitsize, struct_mode: op0_mode.require ()) |
909 | && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)) |
910 | && optab_handler (op: movstrict_optab, mode: fieldmode) != CODE_FOR_nothing) |
911 | { |
912 | class expand_operand ops[2]; |
913 | enum insn_code icode = optab_handler (op: movstrict_optab, mode: fieldmode); |
914 | rtx arg0 = op0; |
915 | unsigned HOST_WIDE_INT subreg_off; |
916 | |
917 | if (GET_CODE (arg0) == SUBREG) |
918 | { |
919 | /* Else we've got some float mode source being extracted into |
920 | a different float mode destination -- this combination of |
921 | subregs results in Severe Tire Damage. */ |
922 | gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode |
923 | || GET_MODE_CLASS (fieldmode) == MODE_INT |
924 | || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); |
925 | arg0 = SUBREG_REG (arg0); |
926 | } |
927 | |
928 | subreg_off = bitnum / BITS_PER_UNIT; |
929 | if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off) |
930 | /* STRICT_LOW_PART must have a non-paradoxical subreg as |
931 | operand. */ |
932 | && !paradoxical_subreg_p (outermode: fieldmode, GET_MODE (arg0))) |
933 | { |
934 | arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); |
935 | |
936 | create_fixed_operand (op: &ops[0], x: arg0); |
937 | /* Shrink the source operand to FIELDMODE. */ |
938 | create_convert_operand_to (op: &ops[1], value, mode: fieldmode, unsigned_p: false); |
939 | if (maybe_expand_insn (icode, nops: 2, ops)) |
940 | return true; |
941 | } |
942 | } |
943 | |
944 | /* Handle fields bigger than a word. */ |
945 | |
946 | if (bitsize > BITS_PER_WORD) |
947 | { |
948 | /* Here we transfer the words of the field |
949 | in the order least significant first. |
950 | This is because the most significant word is the one which may |
951 | be less than full. |
952 | However, only do that if the value is not BLKmode. */ |
953 | |
954 | const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; |
955 | const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
956 | rtx_insn *last; |
957 | |
958 | /* This is the mode we must force value to, so that there will be enough |
959 | subwords to extract. Note that fieldmode will often (always?) be |
960 | VOIDmode, because that is what store_field uses to indicate that this |
961 | is a bit field, but passing VOIDmode to operand_subword_force |
962 | is not allowed. |
963 | |
964 | The mode must be fixed-size, since insertions into variable-sized |
965 | objects are meant to be handled before calling this function. */ |
966 | fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value)); |
967 | if (value_mode == VOIDmode) |
968 | value_mode = smallest_int_mode_for_size (size: nwords * BITS_PER_WORD); |
969 | |
970 | last = get_last_insn (); |
971 | for (int i = 0; i < nwords; i++) |
972 | { |
973 | /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD |
974 | except maybe for the last iteration. */ |
975 | const unsigned HOST_WIDE_INT new_bitsize |
976 | = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); |
977 | /* Bit offset from the starting bit number in the target. */ |
978 | const unsigned int bit_offset |
979 | = backwards ^ reverse |
980 | ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0) |
981 | : i * BITS_PER_WORD; |
982 | /* Starting word number in the value. */ |
983 | const unsigned int wordnum |
984 | = backwards |
985 | ? GET_MODE_SIZE (mode: value_mode) / UNITS_PER_WORD - (i + 1) |
986 | : i; |
987 | /* The chunk of the value in word_mode. We use bit-field extraction |
988 | in BLKmode to handle unaligned memory references and to shift the |
989 | last chunk right on big-endian machines if need be. */ |
990 | rtx value_word |
991 | = fieldmode == BLKmode |
992 | ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD, |
993 | 1, NULL_RTX, word_mode, word_mode, false, |
994 | NULL) |
995 | : operand_subword_force (value, wordnum, value_mode); |
996 | |
997 | if (!store_bit_field_1 (str_rtx: op0, bitsize: new_bitsize, |
998 | bitnum: bitnum + bit_offset, |
999 | bitregion_start, bitregion_end, |
1000 | fieldmode: word_mode, |
1001 | value: value_word, reverse, fallback_p, undefined_p: false)) |
1002 | { |
1003 | delete_insns_since (last); |
1004 | return false; |
1005 | } |
1006 | } |
1007 | return true; |
1008 | } |
1009 | |
1010 | /* If VALUE has a floating-point or complex mode, access it as an |
1011 | integer of the corresponding size. This can occur on a machine |
1012 | with 64 bit registers that uses SFmode for float. It can also |
1013 | occur for unaligned float or complex fields. */ |
1014 | rtx orig_value = value; |
1015 | scalar_int_mode value_mode; |
1016 | if (GET_MODE (value) == VOIDmode) |
1017 | /* By this point we've dealt with values that are bigger than a word, |
1018 | so word_mode is a conservatively correct choice. */ |
1019 | value_mode = word_mode; |
1020 | else if (!is_a <scalar_int_mode> (GET_MODE (value), result: &value_mode)) |
1021 | { |
1022 | value_mode = int_mode_for_mode (GET_MODE (value)).require (); |
1023 | value = gen_reg_rtx (value_mode); |
1024 | emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); |
1025 | } |
1026 | |
1027 | /* If OP0 is a multi-word register, narrow it to the affected word. |
1028 | If the region spans two words, defer to store_split_bit_field. |
1029 | Don't do this if op0 is a single hard register wider than word |
1030 | such as a float or vector register. */ |
1031 | if (!MEM_P (op0) |
1032 | && GET_MODE_SIZE (mode: op0_mode.require ()) > UNITS_PER_WORD |
1033 | && (!REG_P (op0) |
1034 | || !HARD_REGISTER_P (op0) |
1035 | || hard_regno_nregs (REGNO (op0), mode: op0_mode.require ()) != 1)) |
1036 | { |
1037 | if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) |
1038 | { |
1039 | if (!fallback_p) |
1040 | return false; |
1041 | |
1042 | store_split_bit_field (op0, op0_mode, bitsize, bitnum, |
1043 | bitregion_start, bitregion_end, |
1044 | value, value_mode, reverse); |
1045 | return true; |
1046 | } |
1047 | op0 = simplify_gen_subreg (outermode: word_mode, op: op0, innermode: op0_mode.require (), |
1048 | byte: bitnum / BITS_PER_WORD * UNITS_PER_WORD); |
1049 | gcc_assert (op0); |
1050 | op0_mode = word_mode; |
1051 | bitnum %= BITS_PER_WORD; |
1052 | } |
1053 | |
1054 | /* From here on we can assume that the field to be stored in fits |
1055 | within a word. If the destination is a register, it too fits |
1056 | in a word. */ |
1057 | |
1058 | extraction_insn insv; |
1059 | if (!MEM_P (op0) |
1060 | && !reverse |
1061 | && get_best_reg_extraction_insn (&insv, EP_insv, |
1062 | GET_MODE_BITSIZE (mode: op0_mode.require ()), |
1063 | fieldmode) |
1064 | && store_bit_field_using_insv (insv: &insv, op0, op0_mode, |
1065 | bitsize, bitnum, value, value_mode)) |
1066 | return true; |
1067 | |
1068 | /* If OP0 is a memory, try copying it to a register and seeing if a |
1069 | cheap register alternative is available. */ |
1070 | if (MEM_P (op0) && !reverse) |
1071 | { |
1072 | if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum, |
1073 | fieldmode) |
1074 | && store_bit_field_using_insv (insv: &insv, op0, op0_mode, |
1075 | bitsize, bitnum, value, value_mode)) |
1076 | return true; |
1077 | |
1078 | rtx_insn *last = get_last_insn (); |
1079 | |
1080 | /* Try loading part of OP0 into a register, inserting the bitfield |
1081 | into that, and then copying the result back to OP0. */ |
1082 | unsigned HOST_WIDE_INT bitpos; |
1083 | rtx xop0 = adjust_bit_field_mem_for_reg (pattern: EP_insv, op0, bitsize, bitnum, |
1084 | bitregion_start, bitregion_end, |
1085 | fieldmode, new_bitnum: &bitpos); |
1086 | if (xop0) |
1087 | { |
1088 | rtx tempreg = copy_to_reg (xop0); |
1089 | if (store_bit_field_1 (str_rtx: tempreg, bitsize, bitnum: bitpos, |
1090 | bitregion_start, bitregion_end, |
1091 | fieldmode, value: orig_value, reverse, fallback_p: false, undefined_p: false)) |
1092 | { |
1093 | emit_move_insn (xop0, tempreg); |
1094 | return true; |
1095 | } |
1096 | delete_insns_since (last); |
1097 | } |
1098 | } |
1099 | |
1100 | if (!fallback_p) |
1101 | return false; |
1102 | |
1103 | store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start, |
1104 | bitregion_end, value, value_mode, reverse); |
1105 | return true; |
1106 | } |
1107 | |
1108 | /* Generate code to store value from rtx VALUE |
1109 | into a bit-field within structure STR_RTX |
1110 | containing BITSIZE bits starting at bit BITNUM. |
1111 | |
1112 | BITREGION_START is bitpos of the first bitfield in this region. |
1113 | BITREGION_END is the bitpos of the ending bitfield in this region. |
1114 | These two fields are 0, if the C++ memory model does not apply, |
1115 | or we are not interested in keeping track of bitfield regions. |
1116 | |
1117 | FIELDMODE is the machine-mode of the FIELD_DECL node for this field. |
1118 | |
1119 | If REVERSE is true, the store is to be done in reverse order. |
1120 | |
1121 | If UNDEFINED_P is true then STR_RTX is currently undefined. */ |
1122 | |
1123 | void |
1124 | store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, |
1125 | poly_uint64 bitregion_start, poly_uint64 bitregion_end, |
1126 | machine_mode fieldmode, |
1127 | rtx value, bool reverse, bool undefined_p) |
1128 | { |
1129 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ |
1130 | unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0; |
1131 | scalar_int_mode int_mode; |
1132 | if (bitsize.is_constant (const_value: &ibitsize) |
1133 | && bitnum.is_constant (const_value: &ibitnum) |
1134 | && is_a <scalar_int_mode> (m: fieldmode, result: &int_mode) |
1135 | && strict_volatile_bitfield_p (op0: str_rtx, bitsize: ibitsize, bitnum: ibitnum, fieldmode: int_mode, |
1136 | bitregion_start, bitregion_end)) |
1137 | { |
1138 | /* Storing of a full word can be done with a simple store. |
1139 | We know here that the field can be accessed with one single |
1140 | instruction. For targets that support unaligned memory, |
1141 | an unaligned access may be necessary. */ |
1142 | if (ibitsize == GET_MODE_BITSIZE (mode: int_mode)) |
1143 | { |
1144 | str_rtx = adjust_bitfield_address (str_rtx, int_mode, |
1145 | ibitnum / BITS_PER_UNIT); |
1146 | if (reverse) |
1147 | value = flip_storage_order (mode: int_mode, x: value); |
1148 | gcc_assert (ibitnum % BITS_PER_UNIT == 0); |
1149 | emit_move_insn (str_rtx, value); |
1150 | } |
1151 | else |
1152 | { |
1153 | rtx temp; |
1154 | |
1155 | str_rtx = narrow_bit_field_mem (mem: str_rtx, mode: int_mode, bitsize: ibitsize, |
1156 | bitnum: ibitnum, new_bitnum: &ibitnum); |
1157 | gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode)); |
1158 | temp = copy_to_reg (str_rtx); |
1159 | if (!store_bit_field_1 (str_rtx: temp, bitsize: ibitsize, bitnum: ibitnum, bitregion_start: 0, bitregion_end: 0, |
1160 | fieldmode: int_mode, value, reverse, fallback_p: true, undefined_p)) |
1161 | gcc_unreachable (); |
1162 | |
1163 | emit_move_insn (str_rtx, temp); |
1164 | } |
1165 | |
1166 | return; |
1167 | } |
1168 | |
1169 | /* Under the C++0x memory model, we must not touch bits outside the |
1170 | bit region. Adjust the address to start at the beginning of the |
1171 | bit region. */ |
1172 | if (MEM_P (str_rtx) && maybe_ne (a: bitregion_start, b: 0U)) |
1173 | { |
1174 | scalar_int_mode best_mode; |
1175 | machine_mode addr_mode = VOIDmode; |
1176 | |
1177 | poly_uint64 offset = exact_div (a: bitregion_start, BITS_PER_UNIT); |
1178 | bitnum -= bitregion_start; |
1179 | poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize); |
1180 | bitregion_end -= bitregion_start; |
1181 | bitregion_start = 0; |
1182 | if (bitsize.is_constant (const_value: &ibitsize) |
1183 | && bitnum.is_constant (const_value: &ibitnum) |
1184 | && get_best_mode (ibitsize, ibitnum, |
1185 | bitregion_start, bitregion_end, |
1186 | MEM_ALIGN (str_rtx), INT_MAX, |
1187 | MEM_VOLATILE_P (str_rtx), &best_mode)) |
1188 | addr_mode = best_mode; |
1189 | str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode, |
1190 | offset, size); |
1191 | } |
1192 | |
1193 | if (!store_bit_field_1 (str_rtx, bitsize, bitnum, |
1194 | bitregion_start, bitregion_end, |
1195 | fieldmode, value, reverse, fallback_p: true, undefined_p)) |
1196 | gcc_unreachable (); |
1197 | } |
1198 | |
1199 | /* Use shifts and boolean operations to store VALUE into a bit field of |
1200 | width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined, |
1201 | it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is |
1202 | the mode of VALUE. |
1203 | |
1204 | If REVERSE is true, the store is to be done in reverse order. */ |
1205 | |
1206 | static void |
1207 | store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |
1208 | unsigned HOST_WIDE_INT bitsize, |
1209 | unsigned HOST_WIDE_INT bitnum, |
1210 | poly_uint64 bitregion_start, poly_uint64 bitregion_end, |
1211 | rtx value, scalar_int_mode value_mode, bool reverse) |
1212 | { |
1213 | /* There is a case not handled here: |
1214 | a structure with a known alignment of just a halfword |
1215 | and a field split across two aligned halfwords within the structure. |
1216 | Or likewise a structure with a known alignment of just a byte |
1217 | and a field split across two bytes. |
1218 | Such cases are not supposed to be able to occur. */ |
1219 | |
1220 | scalar_int_mode best_mode; |
1221 | if (MEM_P (op0)) |
1222 | { |
1223 | unsigned int max_bitsize = BITS_PER_WORD; |
1224 | scalar_int_mode imode; |
1225 | if (op0_mode.exists (mode: &imode) && GET_MODE_BITSIZE (mode: imode) < max_bitsize) |
1226 | max_bitsize = GET_MODE_BITSIZE (mode: imode); |
1227 | |
1228 | if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end, |
1229 | MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0), |
1230 | &best_mode)) |
1231 | { |
1232 | /* The only way this should occur is if the field spans word |
1233 | boundaries. */ |
1234 | store_split_bit_field (op0, op0_mode, bitsize, bitnum, |
1235 | bitregion_start, bitregion_end, |
1236 | value, value_mode, reverse); |
1237 | return; |
1238 | } |
1239 | |
1240 | op0 = narrow_bit_field_mem (mem: op0, mode: best_mode, bitsize, bitnum, new_bitnum: &bitnum); |
1241 | } |
1242 | else |
1243 | best_mode = op0_mode.require (); |
1244 | |
1245 | store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum, |
1246 | value, value_mode, reverse); |
1247 | } |
1248 | |
1249 | /* Helper function for store_fixed_bit_field, stores |
1250 | the bit field always using MODE, which is the mode of OP0. The other |
1251 | arguments are as for store_fixed_bit_field. */ |
1252 | |
1253 | static void |
1254 | store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode, |
1255 | unsigned HOST_WIDE_INT bitsize, |
1256 | unsigned HOST_WIDE_INT bitnum, |
1257 | rtx value, scalar_int_mode value_mode, bool reverse) |
1258 | { |
1259 | rtx temp; |
1260 | int all_zero = 0; |
1261 | int all_one = 0; |
1262 | |
1263 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |
1264 | for invalid input, such as f5 from gcc.dg/pr48335-2.c. */ |
1265 | |
1266 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
1267 | /* BITNUM is the distance between our msb |
1268 | and that of the containing datum. |
1269 | Convert it to the distance from the lsb. */ |
1270 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; |
1271 | |
1272 | /* Now BITNUM is always the distance between our lsb |
1273 | and that of OP0. */ |
1274 | |
1275 | /* Shift VALUE left by BITNUM bits. If VALUE is not constant, |
1276 | we must first convert its mode to MODE. */ |
1277 | |
1278 | if (CONST_INT_P (value)) |
1279 | { |
1280 | unsigned HOST_WIDE_INT v = UINTVAL (value); |
1281 | |
1282 | if (bitsize < HOST_BITS_PER_WIDE_INT) |
1283 | v &= (HOST_WIDE_INT_1U << bitsize) - 1; |
1284 | |
1285 | if (v == 0) |
1286 | all_zero = 1; |
1287 | else if ((bitsize < HOST_BITS_PER_WIDE_INT |
1288 | && v == (HOST_WIDE_INT_1U << bitsize) - 1) |
1289 | || (bitsize == HOST_BITS_PER_WIDE_INT |
1290 | && v == HOST_WIDE_INT_M1U)) |
1291 | all_one = 1; |
1292 | |
1293 | value = lshift_value (mode, v, bitnum); |
1294 | } |
1295 | else |
1296 | { |
1297 | int must_and = (GET_MODE_BITSIZE (mode: value_mode) != bitsize |
1298 | && bitnum + bitsize != GET_MODE_BITSIZE (mode)); |
1299 | |
1300 | if (value_mode != mode) |
1301 | value = convert_to_mode (mode, value, 1); |
1302 | |
1303 | if (must_and) |
1304 | value = expand_binop (mode, and_optab, value, |
1305 | mask_rtx (mode, bitpos: 0, bitsize, complement: 0), |
1306 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
1307 | if (bitnum > 0) |
1308 | value = expand_shift (LSHIFT_EXPR, mode, value, |
1309 | bitnum, NULL_RTX, 1); |
1310 | } |
1311 | |
1312 | if (reverse) |
1313 | value = flip_storage_order (mode, x: value); |
1314 | |
1315 | /* Now clear the chosen bits in OP0, |
1316 | except that if VALUE is -1 we need not bother. */ |
1317 | /* We keep the intermediates in registers to allow CSE to combine |
1318 | consecutive bitfield assignments. */ |
1319 | |
1320 | temp = force_reg (mode, op0); |
1321 | |
1322 | if (! all_one) |
1323 | { |
1324 | rtx mask = mask_rtx (mode, bitpos: bitnum, bitsize, complement: 1); |
1325 | if (reverse) |
1326 | mask = flip_storage_order (mode, x: mask); |
1327 | temp = expand_binop (mode, and_optab, temp, mask, |
1328 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
1329 | temp = force_reg (mode, temp); |
1330 | } |
1331 | |
1332 | /* Now logical-or VALUE into OP0, unless it is zero. */ |
1333 | |
1334 | if (! all_zero) |
1335 | { |
1336 | temp = expand_binop (mode, ior_optab, temp, value, |
1337 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
1338 | temp = force_reg (mode, temp); |
1339 | } |
1340 | |
1341 | if (op0 != temp) |
1342 | { |
1343 | op0 = copy_rtx (op0); |
1344 | emit_move_insn (op0, temp); |
1345 | } |
1346 | } |
1347 | |
1348 | /* Store a bit field that is split across multiple accessible memory objects. |
1349 | |
1350 | OP0 is the REG, SUBREG or MEM rtx for the first of the objects. |
1351 | BITSIZE is the field width; BITPOS the position of its first bit |
1352 | (within the word). |
1353 | VALUE is the value to store, which has mode VALUE_MODE. |
1354 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is |
1355 | a BLKmode MEM. |
1356 | |
1357 | If REVERSE is true, the store is to be done in reverse order. |
1358 | |
1359 | This does not yet handle fields wider than BITS_PER_WORD. */ |
1360 | |
1361 | static void |
1362 | store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |
1363 | unsigned HOST_WIDE_INT bitsize, |
1364 | unsigned HOST_WIDE_INT bitpos, |
1365 | poly_uint64 bitregion_start, poly_uint64 bitregion_end, |
1366 | rtx value, scalar_int_mode value_mode, bool reverse) |
1367 | { |
1368 | unsigned int unit, total_bits, bitsdone = 0; |
1369 | |
1370 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1371 | much at a time. */ |
1372 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
1373 | unit = BITS_PER_WORD; |
1374 | else |
1375 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
1376 | |
1377 | /* If OP0 is a memory with a mode, then UNIT must not be larger than |
1378 | OP0's mode as well. Otherwise, store_fixed_bit_field will call us |
1379 | again, and we will mutually recurse forever. */ |
1380 | if (MEM_P (op0) && op0_mode.exists ()) |
1381 | unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ())); |
1382 | |
1383 | /* If VALUE is a constant other than a CONST_INT, get it into a register in |
1384 | WORD_MODE. If we can do this using gen_lowpart_common, do so. Note |
1385 | that VALUE might be a floating-point constant. */ |
1386 | if (CONSTANT_P (value) && !CONST_INT_P (value)) |
1387 | { |
1388 | rtx word = gen_lowpart_common (word_mode, value); |
1389 | |
1390 | if (word && (value != word)) |
1391 | value = word; |
1392 | else |
1393 | value = gen_lowpart_common (word_mode, force_reg (value_mode, value)); |
1394 | value_mode = word_mode; |
1395 | } |
1396 | |
1397 | total_bits = GET_MODE_BITSIZE (mode: value_mode); |
1398 | |
1399 | while (bitsdone < bitsize) |
1400 | { |
1401 | unsigned HOST_WIDE_INT thissize; |
1402 | unsigned HOST_WIDE_INT thispos; |
1403 | unsigned HOST_WIDE_INT offset; |
1404 | rtx part; |
1405 | |
1406 | offset = (bitpos + bitsdone) / unit; |
1407 | thispos = (bitpos + bitsdone) % unit; |
1408 | |
1409 | /* When region of bytes we can touch is restricted, decrease |
1410 | UNIT close to the end of the region as needed. If op0 is a REG |
1411 | or SUBREG of REG, don't do this, as there can't be data races |
1412 | on a register and we can expand shorter code in some cases. */ |
1413 | if (maybe_ne (a: bitregion_end, b: 0U) |
1414 | && unit > BITS_PER_UNIT |
1415 | && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1) |
1416 | && !REG_P (op0) |
1417 | && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0)))) |
1418 | { |
1419 | unit = unit / 2; |
1420 | continue; |
1421 | } |
1422 | |
1423 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1424 | store_fixed_bit_field will call us again, and we will mutually |
1425 | recurse forever. */ |
1426 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); |
1427 | thissize = MIN (thissize, unit - thispos); |
1428 | |
1429 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
1430 | { |
1431 | /* Fetch successively less significant portions. */ |
1432 | if (CONST_INT_P (value)) |
1433 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1434 | >> (bitsize - bitsdone - thissize)) |
1435 | & ((HOST_WIDE_INT_1 << thissize) - 1)); |
1436 | /* Likewise, but the source is little-endian. */ |
1437 | else if (reverse) |
1438 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1439 | thissize, |
1440 | bitsize - bitsdone - thissize, |
1441 | NULL_RTX, 1, false); |
1442 | else |
1443 | /* The args are chosen so that the last part includes the |
1444 | lsb. Give extract_bit_field the value it needs (with |
1445 | endianness compensation) to fetch the piece we want. */ |
1446 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1447 | thissize, |
1448 | total_bits - bitsize + bitsdone, |
1449 | NULL_RTX, 1, false); |
1450 | } |
1451 | else |
1452 | { |
1453 | /* Fetch successively more significant portions. */ |
1454 | if (CONST_INT_P (value)) |
1455 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1456 | >> bitsdone) |
1457 | & ((HOST_WIDE_INT_1 << thissize) - 1)); |
1458 | /* Likewise, but the source is big-endian. */ |
1459 | else if (reverse) |
1460 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1461 | thissize, |
1462 | total_bits - bitsdone - thissize, |
1463 | NULL_RTX, 1, false); |
1464 | else |
1465 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1466 | thissize, bitsdone, NULL_RTX, |
1467 | 1, false); |
1468 | } |
1469 | |
1470 | /* If OP0 is a register, then handle OFFSET here. */ |
1471 | rtx op0_piece = op0; |
1472 | opt_scalar_int_mode op0_piece_mode = op0_mode; |
1473 | if (SUBREG_P (op0) || REG_P (op0)) |
1474 | { |
1475 | scalar_int_mode imode; |
1476 | if (op0_mode.exists (mode: &imode) |
1477 | && GET_MODE_SIZE (mode: imode) < UNITS_PER_WORD) |
1478 | { |
1479 | if (offset) |
1480 | op0_piece = const0_rtx; |
1481 | } |
1482 | else |
1483 | { |
1484 | op0_piece = operand_subword_force (op0, |
1485 | offset * unit / BITS_PER_WORD, |
1486 | GET_MODE (op0)); |
1487 | op0_piece_mode = word_mode; |
1488 | } |
1489 | offset &= BITS_PER_WORD / unit - 1; |
1490 | } |
1491 | |
1492 | /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx, |
1493 | it is just an out-of-bounds access. Ignore it. */ |
1494 | if (op0_piece != const0_rtx) |
1495 | store_fixed_bit_field (op0: op0_piece, op0_mode: op0_piece_mode, bitsize: thissize, |
1496 | bitnum: offset * unit + thispos, bitregion_start, |
1497 | bitregion_end, value: part, value_mode: word_mode, reverse); |
1498 | bitsdone += thissize; |
1499 | } |
1500 | } |
1501 | |
1502 | /* A subroutine of extract_bit_field_1 that converts return value X |
1503 | to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments |
1504 | to extract_bit_field. */ |
1505 | |
1506 | static rtx |
1507 | (rtx x, machine_mode mode, |
1508 | machine_mode tmode, bool unsignedp) |
1509 | { |
1510 | if (GET_MODE (x) == tmode || GET_MODE (x) == mode) |
1511 | return x; |
1512 | |
1513 | /* If the x mode is not a scalar integral, first convert to the |
1514 | integer mode of that size and then access it as a floating-point |
1515 | value via a SUBREG. */ |
1516 | if (!SCALAR_INT_MODE_P (tmode)) |
1517 | { |
1518 | scalar_int_mode int_mode = int_mode_for_mode (tmode).require (); |
1519 | x = convert_to_mode (int_mode, x, unsignedp); |
1520 | x = force_reg (int_mode, x); |
1521 | return gen_lowpart (tmode, x); |
1522 | } |
1523 | |
1524 | return convert_to_mode (tmode, x, unsignedp); |
1525 | } |
1526 | |
1527 | /* Try to use an ext(z)v pattern to extract a field from OP0. |
1528 | Return the extracted value on success, otherwise return null. |
1529 | EXTV describes the extraction instruction to use. If OP0_MODE |
1530 | is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM. |
1531 | The other arguments are as for extract_bit_field. */ |
1532 | |
1533 | static rtx |
1534 | (const extraction_insn *extv, rtx op0, |
1535 | opt_scalar_int_mode op0_mode, |
1536 | unsigned HOST_WIDE_INT bitsize, |
1537 | unsigned HOST_WIDE_INT bitnum, |
1538 | int unsignedp, rtx target, |
1539 | machine_mode mode, machine_mode tmode) |
1540 | { |
1541 | class expand_operand ops[4]; |
1542 | rtx spec_target = target; |
1543 | rtx spec_target_subreg = 0; |
1544 | scalar_int_mode ext_mode = extv->field_mode; |
1545 | unsigned unit = GET_MODE_BITSIZE (mode: ext_mode); |
1546 | |
1547 | if (bitsize == 0 || unit < bitsize) |
1548 | return NULL_RTX; |
1549 | |
1550 | if (MEM_P (op0)) |
1551 | /* Get a reference to the first byte of the field. */ |
1552 | op0 = narrow_bit_field_mem (mem: op0, mode: extv->struct_mode, bitsize, bitnum, |
1553 | new_bitnum: &bitnum); |
1554 | else |
1555 | { |
1556 | /* Convert from counting within OP0 to counting in EXT_MODE. */ |
1557 | if (BYTES_BIG_ENDIAN) |
1558 | bitnum += unit - GET_MODE_BITSIZE (mode: op0_mode.require ()); |
1559 | |
1560 | /* If op0 is a register, we need it in EXT_MODE to make it |
1561 | acceptable to the format of ext(z)v. */ |
1562 | if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode) |
1563 | return NULL_RTX; |
1564 | if (REG_P (op0) && op0_mode.require () != ext_mode) |
1565 | op0 = gen_lowpart_SUBREG (ext_mode, op0); |
1566 | } |
1567 | |
1568 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count |
1569 | "backwards" from the size of the unit we are extracting from. |
1570 | Otherwise, we count bits from the most significant on a |
1571 | BYTES/BITS_BIG_ENDIAN machine. */ |
1572 | |
1573 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) |
1574 | bitnum = unit - bitsize - bitnum; |
1575 | |
1576 | if (target == 0) |
1577 | target = spec_target = gen_reg_rtx (tmode); |
1578 | |
1579 | if (GET_MODE (target) != ext_mode) |
1580 | { |
1581 | rtx temp; |
1582 | /* Don't use LHS paradoxical subreg if explicit truncation is needed |
1583 | between the mode of the extraction (word_mode) and the target |
1584 | mode. Instead, create a temporary and use convert_move to set |
1585 | the target. */ |
1586 | if (REG_P (target) |
1587 | && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode) |
1588 | && (temp = gen_lowpart_if_possible (ext_mode, target))) |
1589 | { |
1590 | target = temp; |
1591 | if (partial_subreg_p (GET_MODE (spec_target), innermode: ext_mode)) |
1592 | spec_target_subreg = target; |
1593 | } |
1594 | else |
1595 | target = gen_reg_rtx (ext_mode); |
1596 | } |
1597 | |
1598 | create_output_operand (op: &ops[0], x: target, mode: ext_mode); |
1599 | create_fixed_operand (op: &ops[1], x: op0); |
1600 | create_integer_operand (&ops[2], bitsize); |
1601 | create_integer_operand (&ops[3], bitnum); |
1602 | if (maybe_expand_insn (icode: extv->icode, nops: 4, ops)) |
1603 | { |
1604 | target = ops[0].value; |
1605 | if (target == spec_target) |
1606 | return target; |
1607 | if (target == spec_target_subreg) |
1608 | return spec_target; |
1609 | return convert_extracted_bit_field (x: target, mode, tmode, unsignedp); |
1610 | } |
1611 | return NULL_RTX; |
1612 | } |
1613 | |
1614 | /* See whether it would be valid to extract the part of OP0 with |
1615 | mode OP0_MODE described by BITNUM and BITSIZE into a value of |
1616 | mode MODE using a subreg operation. |
1617 | Return the subreg if so, otherwise return null. */ |
1618 | |
1619 | static rtx |
1620 | (machine_mode mode, rtx op0, |
1621 | machine_mode op0_mode, |
1622 | poly_uint64 bitsize, poly_uint64 bitnum) |
1623 | { |
1624 | poly_uint64 bytenum; |
1625 | if (multiple_p (a: bitnum, BITS_PER_UNIT, multiple: &bytenum) |
1626 | && known_eq (bitsize, GET_MODE_BITSIZE (mode)) |
1627 | && lowpart_bit_field_p (bitnum, bitsize, struct_mode: op0_mode) |
1628 | && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode)) |
1629 | return simplify_gen_subreg (outermode: mode, op: op0, innermode: op0_mode, byte: bytenum); |
1630 | return NULL_RTX; |
1631 | } |
1632 | |
1633 | /* A subroutine of extract_bit_field, with the same arguments. |
1634 | If UNSIGNEDP is -1, the result need not be sign or zero extended. |
1635 | If FALLBACK_P is true, fall back to extract_fixed_bit_field |
1636 | if we can find no other means of implementing the operation. |
1637 | if FALLBACK_P is false, return NULL instead. */ |
1638 | |
1639 | static rtx |
1640 | (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, |
1641 | int unsignedp, rtx target, machine_mode mode, |
1642 | machine_mode tmode, bool reverse, bool fallback_p, |
1643 | rtx *alt_rtl) |
1644 | { |
1645 | rtx op0 = str_rtx; |
1646 | machine_mode mode1; |
1647 | |
1648 | if (tmode == VOIDmode) |
1649 | tmode = mode; |
1650 | |
1651 | while (GET_CODE (op0) == SUBREG) |
1652 | { |
1653 | bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; |
1654 | op0 = SUBREG_REG (op0); |
1655 | } |
1656 | |
1657 | /* If we have an out-of-bounds access to a register, just return an |
1658 | uninitialized register of the required mode. This can occur if the |
1659 | source code contains an out-of-bounds access to a small array. */ |
1660 | if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0)))) |
1661 | return gen_reg_rtx (tmode); |
1662 | |
1663 | if (REG_P (op0) |
1664 | && mode == GET_MODE (op0) |
1665 | && known_eq (bitnum, 0U) |
1666 | && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0)))) |
1667 | { |
1668 | if (reverse) |
1669 | op0 = flip_storage_order (mode, x: op0); |
1670 | /* We're trying to extract a full register from itself. */ |
1671 | return op0; |
1672 | } |
1673 | |
1674 | /* First try to check for vector from vector extractions. */ |
1675 | if (VECTOR_MODE_P (GET_MODE (op0)) |
1676 | && !MEM_P (op0) |
1677 | && VECTOR_MODE_P (tmode) |
1678 | && known_eq (bitsize, GET_MODE_BITSIZE (tmode)) |
1679 | && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode))) |
1680 | { |
1681 | machine_mode new_mode = GET_MODE (op0); |
1682 | if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)) |
1683 | { |
1684 | scalar_mode inner_mode = GET_MODE_INNER (tmode); |
1685 | poly_uint64 nunits; |
1686 | if (!multiple_p (a: GET_MODE_BITSIZE (GET_MODE (op0)), |
1687 | GET_MODE_UNIT_BITSIZE (tmode), multiple: &nunits) |
1688 | || !related_vector_mode (tmode, inner_mode, |
1689 | nunits).exists (mode: &new_mode) |
1690 | || maybe_ne (a: GET_MODE_SIZE (mode: new_mode), |
1691 | b: GET_MODE_SIZE (GET_MODE (op0)))) |
1692 | new_mode = VOIDmode; |
1693 | } |
1694 | poly_uint64 pos; |
1695 | if (new_mode != VOIDmode |
1696 | && (convert_optab_handler (op: vec_extract_optab, to_mode: new_mode, from_mode: tmode) |
1697 | != CODE_FOR_nothing) |
1698 | && multiple_p (a: bitnum, b: GET_MODE_BITSIZE (mode: tmode), multiple: &pos)) |
1699 | { |
1700 | class expand_operand ops[3]; |
1701 | machine_mode outermode = new_mode; |
1702 | machine_mode innermode = tmode; |
1703 | enum insn_code icode |
1704 | = convert_optab_handler (op: vec_extract_optab, to_mode: outermode, from_mode: innermode); |
1705 | |
1706 | if (new_mode != GET_MODE (op0)) |
1707 | op0 = gen_lowpart (new_mode, op0); |
1708 | create_output_operand (op: &ops[0], x: target, mode: innermode); |
1709 | ops[0].target = 1; |
1710 | create_input_operand (op: &ops[1], value: op0, mode: outermode); |
1711 | create_integer_operand (&ops[2], pos); |
1712 | if (maybe_expand_insn (icode, nops: 3, ops)) |
1713 | { |
1714 | if (alt_rtl && ops[0].target) |
1715 | *alt_rtl = target; |
1716 | target = ops[0].value; |
1717 | if (GET_MODE (target) != mode) |
1718 | return gen_lowpart (tmode, target); |
1719 | return target; |
1720 | } |
1721 | } |
1722 | } |
1723 | |
1724 | /* See if we can get a better vector mode before extracting. */ |
1725 | if (VECTOR_MODE_P (GET_MODE (op0)) |
1726 | && !MEM_P (op0) |
1727 | && GET_MODE_INNER (GET_MODE (op0)) != tmode) |
1728 | { |
1729 | machine_mode new_mode; |
1730 | |
1731 | if (GET_MODE_CLASS (tmode) == MODE_FLOAT) |
1732 | new_mode = MIN_MODE_VECTOR_FLOAT; |
1733 | else if (GET_MODE_CLASS (tmode) == MODE_FRACT) |
1734 | new_mode = MIN_MODE_VECTOR_FRACT; |
1735 | else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) |
1736 | new_mode = MIN_MODE_VECTOR_UFRACT; |
1737 | else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) |
1738 | new_mode = MIN_MODE_VECTOR_ACCUM; |
1739 | else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) |
1740 | new_mode = MIN_MODE_VECTOR_UACCUM; |
1741 | else |
1742 | new_mode = MIN_MODE_VECTOR_INT; |
1743 | |
1744 | FOR_EACH_MODE_FROM (new_mode, new_mode) |
1745 | if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0))) |
1746 | && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode)) |
1747 | && targetm.vector_mode_supported_p (new_mode) |
1748 | && targetm.modes_tieable_p (GET_MODE (op0), new_mode)) |
1749 | break; |
1750 | if (new_mode != VOIDmode) |
1751 | op0 = gen_lowpart (new_mode, op0); |
1752 | } |
1753 | |
1754 | /* Use vec_extract patterns for extracting parts of vectors whenever |
1755 | available. If that fails, see whether the current modes and bitregion |
1756 | give a natural subreg. */ |
1757 | machine_mode outermode = GET_MODE (op0); |
1758 | if (VECTOR_MODE_P (outermode) && !MEM_P (op0)) |
1759 | { |
1760 | scalar_mode innermode = GET_MODE_INNER (outermode); |
1761 | enum insn_code icode |
1762 | = convert_optab_handler (op: vec_extract_optab, to_mode: outermode, from_mode: innermode); |
1763 | poly_uint64 pos; |
1764 | if (icode != CODE_FOR_nothing |
1765 | && known_eq (bitsize, GET_MODE_BITSIZE (innermode)) |
1766 | && multiple_p (a: bitnum, b: GET_MODE_BITSIZE (mode: innermode), multiple: &pos)) |
1767 | { |
1768 | class expand_operand ops[3]; |
1769 | |
1770 | create_output_operand (op: &ops[0], x: target, mode: innermode); |
1771 | ops[0].target = 1; |
1772 | create_input_operand (op: &ops[1], value: op0, mode: outermode); |
1773 | create_integer_operand (&ops[2], pos); |
1774 | if (maybe_expand_insn (icode, nops: 3, ops)) |
1775 | { |
1776 | if (alt_rtl && ops[0].target) |
1777 | *alt_rtl = target; |
1778 | target = ops[0].value; |
1779 | if (GET_MODE (target) != mode) |
1780 | return gen_lowpart (tmode, target); |
1781 | return target; |
1782 | } |
1783 | } |
1784 | /* Using subregs is useful if we're extracting one register vector |
1785 | from a multi-register vector. extract_bit_field_as_subreg checks |
1786 | for valid bitsize and bitnum, so we don't need to do that here. */ |
1787 | if (VECTOR_MODE_P (mode)) |
1788 | { |
1789 | rtx sub = extract_bit_field_as_subreg (mode, op0, op0_mode: outermode, |
1790 | bitsize, bitnum); |
1791 | if (sub) |
1792 | return sub; |
1793 | } |
1794 | } |
1795 | |
1796 | /* Make sure we are playing with integral modes. Pun with subregs |
1797 | if we aren't. */ |
1798 | opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0)); |
1799 | scalar_int_mode imode; |
1800 | if (!op0_mode.exists (mode: &imode) || imode != GET_MODE (op0)) |
1801 | { |
1802 | if (MEM_P (op0)) |
1803 | op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (), |
1804 | 0, MEM_SIZE (op0)); |
1805 | else if (op0_mode.exists (mode: &imode)) |
1806 | { |
1807 | op0 = gen_lowpart (imode, op0); |
1808 | |
1809 | /* If we got a SUBREG, force it into a register since we |
1810 | aren't going to be able to do another SUBREG on it. */ |
1811 | if (GET_CODE (op0) == SUBREG) |
1812 | op0 = force_reg (imode, op0); |
1813 | } |
1814 | else |
1815 | { |
1816 | poly_int64 size = GET_MODE_SIZE (GET_MODE (op0)); |
1817 | rtx mem = assign_stack_temp (GET_MODE (op0), size); |
1818 | emit_move_insn (mem, op0); |
1819 | op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size); |
1820 | } |
1821 | } |
1822 | |
1823 | /* ??? We currently assume TARGET is at least as big as BITSIZE. |
1824 | If that's wrong, the solution is to test for it and set TARGET to 0 |
1825 | if needed. */ |
1826 | |
1827 | /* Get the mode of the field to use for atomic access or subreg |
1828 | conversion. */ |
1829 | if (!SCALAR_INT_MODE_P (tmode) |
1830 | || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (mode: &mode1)) |
1831 | mode1 = mode; |
1832 | gcc_assert (mode1 != BLKmode); |
1833 | |
1834 | /* Extraction of a full MODE1 value can be done with a subreg as long |
1835 | as the least significant bit of the value is the least significant |
1836 | bit of either OP0 or a word of OP0. */ |
1837 | if (!MEM_P (op0) && !reverse && op0_mode.exists (mode: &imode)) |
1838 | { |
1839 | rtx sub = extract_bit_field_as_subreg (mode: mode1, op0, op0_mode: imode, |
1840 | bitsize, bitnum); |
1841 | if (sub) |
1842 | return convert_extracted_bit_field (x: sub, mode, tmode, unsignedp); |
1843 | } |
1844 | |
1845 | /* Extraction of a full MODE1 value can be done with a load as long as |
1846 | the field is on a byte boundary and is sufficiently aligned. */ |
1847 | poly_uint64 bytenum; |
1848 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode: mode1, bytenum: &bytenum)) |
1849 | { |
1850 | op0 = adjust_bitfield_address (op0, mode1, bytenum); |
1851 | if (reverse) |
1852 | op0 = flip_storage_order (mode: mode1, x: op0); |
1853 | return convert_extracted_bit_field (x: op0, mode, tmode, unsignedp); |
1854 | } |
1855 | |
1856 | /* If we have a memory source and a non-constant bit offset, restrict |
1857 | the memory to the referenced bytes. This is a worst-case fallback |
1858 | but is useful for things like vector booleans. */ |
1859 | if (MEM_P (op0) && !bitnum.is_constant ()) |
1860 | { |
1861 | bytenum = bits_to_bytes_round_down (bitnum); |
1862 | bitnum = num_trailing_bits (bitnum); |
1863 | poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize); |
1864 | op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize); |
1865 | op0_mode = opt_scalar_int_mode (); |
1866 | } |
1867 | |
1868 | /* It's possible we'll need to handle other cases here for |
1869 | polynomial bitnum and bitsize. */ |
1870 | |
1871 | /* From here on we need to be looking at a fixed-size insertion. */ |
1872 | return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (), |
1873 | bitnum.to_constant (), unsignedp, |
1874 | target, mode, tmode, reverse, fallback_p); |
1875 | } |
1876 | |
1877 | /* Subroutine of extract_bit_field_1, with the same arguments, except |
1878 | that BITSIZE and BITNUM are constant. Handle cases specific to |
1879 | integral modes. If OP0_MODE is defined, it is the mode of OP0, |
1880 | otherwise OP0 is a BLKmode MEM. */ |
1881 | |
1882 | static rtx |
1883 | (rtx op0, opt_scalar_int_mode op0_mode, |
1884 | unsigned HOST_WIDE_INT bitsize, |
1885 | unsigned HOST_WIDE_INT bitnum, int unsignedp, |
1886 | rtx target, machine_mode mode, machine_mode tmode, |
1887 | bool reverse, bool fallback_p) |
1888 | { |
1889 | /* Handle fields bigger than a word. */ |
1890 | |
1891 | if (bitsize > BITS_PER_WORD) |
1892 | { |
1893 | /* Here we transfer the words of the field |
1894 | in the order least significant first. |
1895 | This is because the most significant word is the one which may |
1896 | be less than full. */ |
1897 | |
1898 | const bool backwards = WORDS_BIG_ENDIAN; |
1899 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
1900 | unsigned int i; |
1901 | rtx_insn *last; |
1902 | |
1903 | if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) |
1904 | target = gen_reg_rtx (mode); |
1905 | |
1906 | /* In case we're about to clobber a base register or something |
1907 | (see gcc.c-torture/execute/20040625-1.c). */ |
1908 | if (reg_mentioned_p (target, op0)) |
1909 | target = gen_reg_rtx (mode); |
1910 | |
1911 | /* Indicate for flow that the entire target reg is being set. */ |
1912 | emit_clobber (target); |
1913 | |
1914 | /* The mode must be fixed-size, since extract_bit_field_1 handles |
1915 | extractions from variable-sized objects before calling this |
1916 | function. */ |
1917 | unsigned int target_size |
1918 | = GET_MODE_SIZE (GET_MODE (target)).to_constant (); |
1919 | last = get_last_insn (); |
1920 | for (i = 0; i < nwords; i++) |
1921 | { |
1922 | /* If I is 0, use the low-order word in both field and target; |
1923 | if I is 1, use the next to lowest word; and so on. */ |
1924 | /* Word number in TARGET to use. */ |
1925 | unsigned int wordnum |
1926 | = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i); |
1927 | /* Offset from start of field in OP0. */ |
1928 | unsigned int bit_offset = (backwards ^ reverse |
1929 | ? MAX ((int) bitsize - ((int) i + 1) |
1930 | * BITS_PER_WORD, |
1931 | 0) |
1932 | : (int) i * BITS_PER_WORD); |
1933 | rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); |
1934 | rtx result_part |
1935 | = extract_bit_field_1 (str_rtx: op0, MIN (BITS_PER_WORD, |
1936 | bitsize - i * BITS_PER_WORD), |
1937 | bitnum: bitnum + bit_offset, |
1938 | unsignedp: (unsignedp ? 1 : -1), target: target_part, |
1939 | mode, tmode: word_mode, reverse, fallback_p, NULL); |
1940 | |
1941 | gcc_assert (target_part); |
1942 | if (!result_part) |
1943 | { |
1944 | delete_insns_since (last); |
1945 | return NULL; |
1946 | } |
1947 | |
1948 | if (result_part != target_part) |
1949 | emit_move_insn (target_part, result_part); |
1950 | } |
1951 | |
1952 | if (unsignedp) |
1953 | { |
1954 | /* Unless we've filled TARGET, the upper regs in a multi-reg value |
1955 | need to be zero'd out. */ |
1956 | if (target_size > nwords * UNITS_PER_WORD) |
1957 | { |
1958 | unsigned int i, total_words; |
1959 | |
1960 | total_words = target_size / UNITS_PER_WORD; |
1961 | for (i = nwords; i < total_words; i++) |
1962 | emit_move_insn |
1963 | (operand_subword (target, |
1964 | backwards ? total_words - i - 1 : i, |
1965 | 1, VOIDmode), |
1966 | const0_rtx); |
1967 | } |
1968 | return target; |
1969 | } |
1970 | |
1971 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |
1972 | target = expand_shift (LSHIFT_EXPR, mode, target, |
1973 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
1974 | return expand_shift (RSHIFT_EXPR, mode, target, |
1975 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
1976 | } |
1977 | |
1978 | /* If OP0 is a multi-word register, narrow it to the affected word. |
1979 | If the region spans two words, defer to extract_split_bit_field. */ |
1980 | if (!MEM_P (op0) && GET_MODE_SIZE (mode: op0_mode.require ()) > UNITS_PER_WORD) |
1981 | { |
1982 | if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) |
1983 | { |
1984 | if (!fallback_p) |
1985 | return NULL_RTX; |
1986 | target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum, |
1987 | unsignedp, reverse); |
1988 | return convert_extracted_bit_field (x: target, mode, tmode, unsignedp); |
1989 | } |
1990 | /* If OP0 is a hard register, copy it to a pseudo before calling |
1991 | simplify_gen_subreg. */ |
1992 | if (REG_P (op0) && HARD_REGISTER_P (op0)) |
1993 | op0 = copy_to_reg (op0); |
1994 | op0 = simplify_gen_subreg (outermode: word_mode, op: op0, innermode: op0_mode.require (), |
1995 | byte: bitnum / BITS_PER_WORD * UNITS_PER_WORD); |
1996 | op0_mode = word_mode; |
1997 | bitnum %= BITS_PER_WORD; |
1998 | } |
1999 | |
2000 | /* From here on we know the desired field is smaller than a word. |
2001 | If OP0 is a register, it too fits within a word. */ |
2002 | enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv; |
2003 | extraction_insn extv; |
2004 | if (!MEM_P (op0) |
2005 | && !reverse |
2006 | /* ??? We could limit the structure size to the part of OP0 that |
2007 | contains the field, with appropriate checks for endianness |
2008 | and TARGET_TRULY_NOOP_TRUNCATION. */ |
2009 | && get_best_reg_extraction_insn (&extv, pattern, |
2010 | GET_MODE_BITSIZE (mode: op0_mode.require ()), |
2011 | tmode)) |
2012 | { |
2013 | rtx result = extract_bit_field_using_extv (extv: &extv, op0, op0_mode, |
2014 | bitsize, bitnum, |
2015 | unsignedp, target, mode, |
2016 | tmode); |
2017 | if (result) |
2018 | return result; |
2019 | } |
2020 | |
2021 | /* If OP0 is a memory, try copying it to a register and seeing if a |
2022 | cheap register alternative is available. */ |
2023 | if (MEM_P (op0) & !reverse) |
2024 | { |
2025 | if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum, |
2026 | tmode)) |
2027 | { |
2028 | rtx result = extract_bit_field_using_extv (extv: &extv, op0, op0_mode, |
2029 | bitsize, bitnum, |
2030 | unsignedp, target, mode, |
2031 | tmode); |
2032 | if (result) |
2033 | return result; |
2034 | } |
2035 | |
2036 | rtx_insn *last = get_last_insn (); |
2037 | |
2038 | /* Try loading part of OP0 into a register and extracting the |
2039 | bitfield from that. */ |
2040 | unsigned HOST_WIDE_INT bitpos; |
2041 | rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum, |
2042 | bitregion_start: 0, bitregion_end: 0, fieldmode: tmode, new_bitnum: &bitpos); |
2043 | if (xop0) |
2044 | { |
2045 | xop0 = copy_to_reg (xop0); |
2046 | rtx result = extract_bit_field_1 (str_rtx: xop0, bitsize, bitnum: bitpos, |
2047 | unsignedp, target, |
2048 | mode, tmode, reverse, fallback_p: false, NULL); |
2049 | if (result) |
2050 | return result; |
2051 | delete_insns_since (last); |
2052 | } |
2053 | } |
2054 | |
2055 | if (!fallback_p) |
2056 | return NULL; |
2057 | |
2058 | /* Find a correspondingly-sized integer field, so we can apply |
2059 | shifts and masks to it. */ |
2060 | scalar_int_mode int_mode; |
2061 | if (!int_mode_for_mode (tmode).exists (mode: &int_mode)) |
2062 | /* If this fails, we should probably push op0 out to memory and then |
2063 | do a load. */ |
2064 | int_mode = int_mode_for_mode (mode).require (); |
2065 | |
2066 | target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize, |
2067 | bitnum, target, unsignedp, reverse); |
2068 | |
2069 | /* Complex values must be reversed piecewise, so we need to undo the global |
2070 | reversal, convert to the complex mode and reverse again. */ |
2071 | if (reverse && COMPLEX_MODE_P (tmode)) |
2072 | { |
2073 | target = flip_storage_order (mode: int_mode, x: target); |
2074 | target = convert_extracted_bit_field (x: target, mode, tmode, unsignedp); |
2075 | target = flip_storage_order (mode: tmode, x: target); |
2076 | } |
2077 | else |
2078 | target = convert_extracted_bit_field (x: target, mode, tmode, unsignedp); |
2079 | |
2080 | return target; |
2081 | } |
2082 | |
2083 | /* Generate code to extract a byte-field from STR_RTX |
2084 | containing BITSIZE bits, starting at BITNUM, |
2085 | and put it in TARGET if possible (if TARGET is nonzero). |
2086 | Regardless of TARGET, we return the rtx for where the value is placed. |
2087 | |
2088 | STR_RTX is the structure containing the byte (a REG or MEM). |
2089 | UNSIGNEDP is nonzero if this is an unsigned bit field. |
2090 | MODE is the natural mode of the field value once extracted. |
2091 | TMODE is the mode the caller would like the value to have; |
2092 | but the value may be returned with type MODE instead. |
2093 | |
2094 | If REVERSE is true, the extraction is to be done in reverse order. |
2095 | |
2096 | If a TARGET is specified and we can store in it at no extra cost, |
2097 | we do so, and return TARGET. |
2098 | Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred |
2099 | if they are equally easy. |
2100 | |
2101 | If the result can be stored at TARGET, and ALT_RTL is non-NULL, |
2102 | then *ALT_RTL is set to TARGET (before legitimziation). */ |
2103 | |
2104 | rtx |
2105 | (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, |
2106 | int unsignedp, rtx target, machine_mode mode, |
2107 | machine_mode tmode, bool reverse, rtx *alt_rtl) |
2108 | { |
2109 | machine_mode mode1; |
2110 | |
2111 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ |
2112 | if (maybe_ne (a: GET_MODE_BITSIZE (GET_MODE (str_rtx)), b: 0)) |
2113 | mode1 = GET_MODE (str_rtx); |
2114 | else if (target && maybe_ne (a: GET_MODE_BITSIZE (GET_MODE (target)), b: 0)) |
2115 | mode1 = GET_MODE (target); |
2116 | else |
2117 | mode1 = tmode; |
2118 | |
2119 | unsigned HOST_WIDE_INT ibitsize, ibitnum; |
2120 | scalar_int_mode int_mode; |
2121 | if (bitsize.is_constant (const_value: &ibitsize) |
2122 | && bitnum.is_constant (const_value: &ibitnum) |
2123 | && is_a <scalar_int_mode> (m: mode1, result: &int_mode) |
2124 | && strict_volatile_bitfield_p (op0: str_rtx, bitsize: ibitsize, bitnum: ibitnum, |
2125 | fieldmode: int_mode, bitregion_start: 0, bitregion_end: 0)) |
2126 | { |
2127 | /* Extraction of a full INT_MODE value can be done with a simple load. |
2128 | We know here that the field can be accessed with one single |
2129 | instruction. For targets that support unaligned memory, |
2130 | an unaligned access may be necessary. */ |
2131 | if (ibitsize == GET_MODE_BITSIZE (mode: int_mode)) |
2132 | { |
2133 | rtx result = adjust_bitfield_address (str_rtx, int_mode, |
2134 | ibitnum / BITS_PER_UNIT); |
2135 | if (reverse) |
2136 | result = flip_storage_order (mode: int_mode, x: result); |
2137 | gcc_assert (ibitnum % BITS_PER_UNIT == 0); |
2138 | return convert_extracted_bit_field (x: result, mode, tmode, unsignedp); |
2139 | } |
2140 | |
2141 | str_rtx = narrow_bit_field_mem (mem: str_rtx, mode: int_mode, bitsize: ibitsize, bitnum: ibitnum, |
2142 | new_bitnum: &ibitnum); |
2143 | gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode)); |
2144 | str_rtx = copy_to_reg (str_rtx); |
2145 | return extract_bit_field_1 (str_rtx, bitsize: ibitsize, bitnum: ibitnum, unsignedp, |
2146 | target, mode, tmode, reverse, fallback_p: true, alt_rtl); |
2147 | } |
2148 | |
2149 | return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, |
2150 | target, mode, tmode, reverse, fallback_p: true, alt_rtl); |
2151 | } |
2152 | |
2153 | /* Use shifts and boolean operations to extract a field of BITSIZE bits |
2154 | from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0, |
2155 | otherwise OP0 is a BLKmode MEM. |
2156 | |
2157 | UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). |
2158 | If REVERSE is true, the extraction is to be done in reverse order. |
2159 | |
2160 | If TARGET is nonzero, attempts to store the value there |
2161 | and return TARGET, but this is not guaranteed. |
2162 | If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ |
2163 | |
2164 | static rtx |
2165 | (machine_mode tmode, rtx op0, |
2166 | opt_scalar_int_mode op0_mode, |
2167 | unsigned HOST_WIDE_INT bitsize, |
2168 | unsigned HOST_WIDE_INT bitnum, rtx target, |
2169 | int unsignedp, bool reverse) |
2170 | { |
2171 | scalar_int_mode mode; |
2172 | if (MEM_P (op0)) |
2173 | { |
2174 | if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), |
2175 | BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode)) |
2176 | /* The only way this should occur is if the field spans word |
2177 | boundaries. */ |
2178 | return extract_split_bit_field (op0, op0_mode, bitsize, bitnum, |
2179 | unsignedp, reverse); |
2180 | |
2181 | op0 = narrow_bit_field_mem (mem: op0, mode, bitsize, bitnum, new_bitnum: &bitnum); |
2182 | } |
2183 | else |
2184 | mode = op0_mode.require (); |
2185 | |
2186 | return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum, |
2187 | target, unsignedp, reverse); |
2188 | } |
2189 | |
2190 | /* Helper function for extract_fixed_bit_field, extracts |
2191 | the bit field always using MODE, which is the mode of OP0. |
2192 | If UNSIGNEDP is -1, the result need not be sign or zero extended. |
2193 | The other arguments are as for extract_fixed_bit_field. */ |
2194 | |
2195 | static rtx |
2196 | (machine_mode tmode, rtx op0, scalar_int_mode mode, |
2197 | unsigned HOST_WIDE_INT bitsize, |
2198 | unsigned HOST_WIDE_INT bitnum, rtx target, |
2199 | int unsignedp, bool reverse) |
2200 | { |
2201 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |
2202 | for invalid input, such as extract equivalent of f5 from |
2203 | gcc.dg/pr48335-2.c. */ |
2204 | |
2205 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
2206 | /* BITNUM is the distance between our msb and that of OP0. |
2207 | Convert it to the distance from the lsb. */ |
2208 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; |
2209 | |
2210 | /* Now BITNUM is always the distance between the field's lsb and that of OP0. |
2211 | We have reduced the big-endian case to the little-endian case. */ |
2212 | if (reverse) |
2213 | op0 = flip_storage_order (mode, x: op0); |
2214 | |
2215 | if (unsignedp) |
2216 | { |
2217 | if (bitnum) |
2218 | { |
2219 | /* If the field does not already start at the lsb, |
2220 | shift it so it does. */ |
2221 | /* Maybe propagate the target for the shift. */ |
2222 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
2223 | if (tmode != mode) |
2224 | subtarget = 0; |
2225 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1); |
2226 | } |
2227 | /* Convert the value to the desired mode. TMODE must also be a |
2228 | scalar integer for this conversion to make sense, since we |
2229 | shouldn't reinterpret the bits. */ |
2230 | scalar_int_mode new_mode = as_a <scalar_int_mode> (m: tmode); |
2231 | if (mode != new_mode) |
2232 | op0 = convert_to_mode (new_mode, op0, 1); |
2233 | |
2234 | /* Unless the msb of the field used to be the msb when we shifted, |
2235 | mask out the upper bits. */ |
2236 | |
2237 | if (GET_MODE_BITSIZE (mode) != bitnum + bitsize |
2238 | && unsignedp != -1) |
2239 | return expand_binop (new_mode, and_optab, op0, |
2240 | mask_rtx (mode: new_mode, bitpos: 0, bitsize, complement: 0), |
2241 | target, 1, OPTAB_LIB_WIDEN); |
2242 | return op0; |
2243 | } |
2244 | |
2245 | /* To extract a signed bit-field, first shift its msb to the msb of the word, |
2246 | then arithmetic-shift its lsb to the lsb of the word. */ |
2247 | op0 = force_reg (mode, op0); |
2248 | |
2249 | /* Find the narrowest integer mode that contains the field. */ |
2250 | |
2251 | opt_scalar_int_mode mode_iter; |
2252 | FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT) |
2253 | if (GET_MODE_BITSIZE (mode: mode_iter.require ()) >= bitsize + bitnum) |
2254 | break; |
2255 | |
2256 | mode = mode_iter.require (); |
2257 | op0 = convert_to_mode (mode, op0, 0); |
2258 | |
2259 | if (mode != tmode) |
2260 | target = 0; |
2261 | |
2262 | if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum)) |
2263 | { |
2264 | int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum); |
2265 | /* Maybe propagate the target for the shift. */ |
2266 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
2267 | op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); |
2268 | } |
2269 | |
2270 | return expand_shift (RSHIFT_EXPR, mode, op0, |
2271 | GET_MODE_BITSIZE (mode) - bitsize, target, 0); |
2272 | } |
2273 | |
2274 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value |
2275 | VALUE << BITPOS. */ |
2276 | |
2277 | static rtx |
2278 | lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value, |
2279 | int bitpos) |
2280 | { |
2281 | return immed_wide_int_const (wi::lshift (x: value, y: bitpos), mode); |
2282 | } |
2283 | |
2284 | /* Extract a bit field that is split across two words |
2285 | and return an RTX for the result. |
2286 | |
2287 | OP0 is the REG, SUBREG or MEM rtx for the first of the two words. |
2288 | BITSIZE is the field width; BITPOS, position of its first bit, in the word. |
2289 | UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. |
2290 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is |
2291 | a BLKmode MEM. |
2292 | |
2293 | If REVERSE is true, the extraction is to be done in reverse order. */ |
2294 | |
2295 | static rtx |
2296 | (rtx op0, opt_scalar_int_mode op0_mode, |
2297 | unsigned HOST_WIDE_INT bitsize, |
2298 | unsigned HOST_WIDE_INT bitpos, int unsignedp, |
2299 | bool reverse) |
2300 | { |
2301 | unsigned int unit; |
2302 | unsigned int bitsdone = 0; |
2303 | rtx result = NULL_RTX; |
2304 | int first = 1; |
2305 | |
2306 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
2307 | much at a time. */ |
2308 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
2309 | unit = BITS_PER_WORD; |
2310 | else |
2311 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
2312 | |
2313 | while (bitsdone < bitsize) |
2314 | { |
2315 | unsigned HOST_WIDE_INT thissize; |
2316 | rtx part; |
2317 | unsigned HOST_WIDE_INT thispos; |
2318 | unsigned HOST_WIDE_INT offset; |
2319 | |
2320 | offset = (bitpos + bitsdone) / unit; |
2321 | thispos = (bitpos + bitsdone) % unit; |
2322 | |
2323 | /* THISSIZE must not overrun a word boundary. Otherwise, |
2324 | extract_fixed_bit_field will call us again, and we will mutually |
2325 | recurse forever. */ |
2326 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); |
2327 | thissize = MIN (thissize, unit - thispos); |
2328 | |
2329 | /* If OP0 is a register, then handle OFFSET here. */ |
2330 | rtx op0_piece = op0; |
2331 | opt_scalar_int_mode op0_piece_mode = op0_mode; |
2332 | if (SUBREG_P (op0) || REG_P (op0)) |
2333 | { |
2334 | op0_piece = operand_subword_force (op0, offset, op0_mode.require ()); |
2335 | op0_piece_mode = word_mode; |
2336 | offset = 0; |
2337 | } |
2338 | |
2339 | /* Extract the parts in bit-counting order, |
2340 | whose meaning is determined by BYTES_PER_UNIT. |
2341 | OFFSET is in UNITs, and UNIT is in bits. */ |
2342 | part = extract_fixed_bit_field (tmode: word_mode, op0: op0_piece, op0_mode: op0_piece_mode, |
2343 | bitsize: thissize, bitnum: offset * unit + thispos, |
2344 | target: 0, unsignedp: 1, reverse); |
2345 | bitsdone += thissize; |
2346 | |
2347 | /* Shift this part into place for the result. */ |
2348 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
2349 | { |
2350 | if (bitsize != bitsdone) |
2351 | part = expand_shift (LSHIFT_EXPR, word_mode, part, |
2352 | bitsize - bitsdone, 0, 1); |
2353 | } |
2354 | else |
2355 | { |
2356 | if (bitsdone != thissize) |
2357 | part = expand_shift (LSHIFT_EXPR, word_mode, part, |
2358 | bitsdone - thissize, 0, 1); |
2359 | } |
2360 | |
2361 | if (first) |
2362 | result = part; |
2363 | else |
2364 | /* Combine the parts with bitwise or. This works |
2365 | because we extracted each part as an unsigned bit field. */ |
2366 | result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, |
2367 | OPTAB_LIB_WIDEN); |
2368 | |
2369 | first = 0; |
2370 | } |
2371 | |
2372 | /* Unsigned bit field: we are done. */ |
2373 | if (unsignedp) |
2374 | return result; |
2375 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |
2376 | result = expand_shift (LSHIFT_EXPR, word_mode, result, |
2377 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
2378 | return expand_shift (RSHIFT_EXPR, word_mode, result, |
2379 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
2380 | } |
2381 | |
2382 | /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving |
2383 | the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than |
2384 | MODE, fill the upper bits with zeros. Fail if the layout of either |
2385 | mode is unknown (as for CC modes) or if the extraction would involve |
2386 | unprofitable mode punning. Return the value on success, otherwise |
2387 | return null. |
2388 | |
2389 | This is different from gen_lowpart* in these respects: |
2390 | |
2391 | - the returned value must always be considered an rvalue |
2392 | |
2393 | - when MODE is wider than SRC_MODE, the extraction involves |
2394 | a zero extension |
2395 | |
2396 | - when MODE is smaller than SRC_MODE, the extraction involves |
2397 | a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION). |
2398 | |
2399 | In other words, this routine performs a computation, whereas the |
2400 | gen_lowpart* routines are conceptually lvalue or rvalue subreg |
2401 | operations. */ |
2402 | |
2403 | rtx |
2404 | (machine_mode mode, machine_mode src_mode, rtx src) |
2405 | { |
2406 | scalar_int_mode int_mode, src_int_mode; |
2407 | |
2408 | if (mode == src_mode) |
2409 | return src; |
2410 | |
2411 | if (CONSTANT_P (src)) |
2412 | { |
2413 | /* simplify_gen_subreg can't be used here, as if simplify_subreg |
2414 | fails, it will happily create (subreg (symbol_ref)) or similar |
2415 | invalid SUBREGs. */ |
2416 | poly_uint64 byte = subreg_lowpart_offset (outermode: mode, innermode: src_mode); |
2417 | rtx ret = simplify_subreg (outermode: mode, op: src, innermode: src_mode, byte); |
2418 | if (ret) |
2419 | return ret; |
2420 | |
2421 | if (GET_MODE (src) == VOIDmode |
2422 | || !validate_subreg (mode, src_mode, src, byte)) |
2423 | return NULL_RTX; |
2424 | |
2425 | src = force_reg (GET_MODE (src), src); |
2426 | return gen_rtx_SUBREG (mode, src, byte); |
2427 | } |
2428 | |
2429 | if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) |
2430 | return NULL_RTX; |
2431 | |
2432 | if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode)) |
2433 | && targetm.modes_tieable_p (mode, src_mode)) |
2434 | { |
2435 | rtx x = gen_lowpart_common (mode, src); |
2436 | if (x) |
2437 | return x; |
2438 | } |
2439 | |
2440 | if (!int_mode_for_mode (src_mode).exists (mode: &src_int_mode) |
2441 | || !int_mode_for_mode (mode).exists (mode: &int_mode)) |
2442 | return NULL_RTX; |
2443 | |
2444 | if (!targetm.modes_tieable_p (src_int_mode, src_mode)) |
2445 | return NULL_RTX; |
2446 | if (!targetm.modes_tieable_p (int_mode, mode)) |
2447 | return NULL_RTX; |
2448 | |
2449 | src = gen_lowpart (src_int_mode, src); |
2450 | if (!validate_subreg (int_mode, src_int_mode, src, |
2451 | subreg_lowpart_offset (outermode: int_mode, innermode: src_int_mode))) |
2452 | return NULL_RTX; |
2453 | |
2454 | src = convert_modes (mode: int_mode, oldmode: src_int_mode, x: src, unsignedp: true); |
2455 | src = gen_lowpart (mode, src); |
2456 | return src; |
2457 | } |
2458 | |
2459 | /* Add INC into TARGET. */ |
2460 | |
2461 | void |
2462 | expand_inc (rtx target, rtx inc) |
2463 | { |
2464 | rtx value = expand_binop (GET_MODE (target), add_optab, |
2465 | target, inc, |
2466 | target, 0, OPTAB_LIB_WIDEN); |
2467 | if (value != target) |
2468 | emit_move_insn (target, value); |
2469 | } |
2470 | |
2471 | /* Subtract DEC from TARGET. */ |
2472 | |
2473 | void |
2474 | expand_dec (rtx target, rtx dec) |
2475 | { |
2476 | rtx value = expand_binop (GET_MODE (target), sub_optab, |
2477 | target, dec, |
2478 | target, 0, OPTAB_LIB_WIDEN); |
2479 | if (value != target) |
2480 | emit_move_insn (target, value); |
2481 | } |
2482 | |
2483 | /* Output a shift instruction for expression code CODE, |
2484 | with SHIFTED being the rtx for the value to shift, |
2485 | and AMOUNT the rtx for the amount to shift by. |
2486 | Store the result in the rtx TARGET, if that is convenient. |
2487 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. |
2488 | Return the rtx for where the value is. |
2489 | If that cannot be done, abort the compilation unless MAY_FAIL is true, |
2490 | in which case 0 is returned. */ |
2491 | |
2492 | static rtx |
2493 | expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, |
2494 | rtx amount, rtx target, int unsignedp, bool may_fail = false) |
2495 | { |
2496 | rtx op1, temp = 0; |
2497 | int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); |
2498 | int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); |
2499 | optab lshift_optab = ashl_optab; |
2500 | optab rshift_arith_optab = ashr_optab; |
2501 | optab rshift_uns_optab = lshr_optab; |
2502 | optab lrotate_optab = rotl_optab; |
2503 | optab rrotate_optab = rotr_optab; |
2504 | machine_mode op1_mode; |
2505 | scalar_mode scalar_mode = GET_MODE_INNER (mode); |
2506 | int attempt; |
2507 | bool speed = optimize_insn_for_speed_p (); |
2508 | |
2509 | op1 = amount; |
2510 | op1_mode = GET_MODE (op1); |
2511 | |
2512 | /* Determine whether the shift/rotate amount is a vector, or scalar. If the |
2513 | shift amount is a vector, use the vector/vector shift patterns. */ |
2514 | if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) |
2515 | { |
2516 | lshift_optab = vashl_optab; |
2517 | rshift_arith_optab = vashr_optab; |
2518 | rshift_uns_optab = vlshr_optab; |
2519 | lrotate_optab = vrotl_optab; |
2520 | rrotate_optab = vrotr_optab; |
2521 | } |
2522 | |
2523 | /* Previously detected shift-counts computed by NEGATE_EXPR |
2524 | and shifted in the other direction; but that does not work |
2525 | on all machines. */ |
2526 | |
2527 | if (SHIFT_COUNT_TRUNCATED) |
2528 | { |
2529 | if (CONST_INT_P (op1) |
2530 | && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= |
2531 | (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode: scalar_mode))) |
2532 | op1 = gen_int_shift_amount (mode, |
2533 | (unsigned HOST_WIDE_INT) INTVAL (op1) |
2534 | % GET_MODE_BITSIZE (mode: scalar_mode)); |
2535 | else if (GET_CODE (op1) == SUBREG |
2536 | && subreg_lowpart_p (op1) |
2537 | && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1))) |
2538 | && SCALAR_INT_MODE_P (GET_MODE (op1))) |
2539 | op1 = SUBREG_REG (op1); |
2540 | } |
2541 | |
2542 | /* Canonicalize rotates by constant amount. We may canonicalize |
2543 | to reduce the immediate or if the ISA can rotate by constants |
2544 | in only on direction. */ |
2545 | if (rotate && reverse_rotate_by_imm_p (scalar_mode, left, op1)) |
2546 | { |
2547 | op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (mode: scalar_mode) |
2548 | - INTVAL (op1))); |
2549 | left = !left; |
2550 | code = left ? LROTATE_EXPR : RROTATE_EXPR; |
2551 | } |
2552 | |
2553 | /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi. |
2554 | Note that this is not the case for bigger values. For instance a rotation |
2555 | of 0x01020304 by 16 bits gives 0x03040102 which is different from |
2556 | 0x04030201 (bswapsi). */ |
2557 | if (rotate |
2558 | && CONST_INT_P (op1) |
2559 | && INTVAL (op1) == BITS_PER_UNIT |
2560 | && GET_MODE_SIZE (mode: scalar_mode) == 2 |
2561 | && optab_handler (op: bswap_optab, mode) != CODE_FOR_nothing) |
2562 | return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp); |
2563 | |
2564 | if (op1 == const0_rtx) |
2565 | return shifted; |
2566 | |
2567 | /* Check whether its cheaper to implement a left shift by a constant |
2568 | bit count by a sequence of additions. */ |
2569 | if (code == LSHIFT_EXPR |
2570 | && CONST_INT_P (op1) |
2571 | && INTVAL (op1) > 0 |
2572 | && INTVAL (op1) < GET_MODE_PRECISION (mode: scalar_mode) |
2573 | && INTVAL (op1) < MAX_BITS_PER_WORD |
2574 | && (shift_cost (speed, mode, INTVAL (op1)) |
2575 | > INTVAL (op1) * add_cost (speed, mode)) |
2576 | && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) |
2577 | { |
2578 | int i; |
2579 | for (i = 0; i < INTVAL (op1); i++) |
2580 | { |
2581 | temp = force_reg (mode, shifted); |
2582 | shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, |
2583 | unsignedp, OPTAB_LIB_WIDEN); |
2584 | } |
2585 | return shifted; |
2586 | } |
2587 | |
2588 | for (attempt = 0; temp == 0 && attempt < 3; attempt++) |
2589 | { |
2590 | enum optab_methods methods; |
2591 | |
2592 | if (attempt == 0) |
2593 | methods = OPTAB_DIRECT; |
2594 | else if (attempt == 1) |
2595 | methods = OPTAB_WIDEN; |
2596 | else |
2597 | methods = OPTAB_LIB_WIDEN; |
2598 | |
2599 | if (rotate) |
2600 | { |
2601 | /* Widening does not work for rotation. */ |
2602 | if (methods == OPTAB_WIDEN) |
2603 | continue; |
2604 | else if (methods == OPTAB_LIB_WIDEN) |
2605 | { |
2606 | /* If we have been unable to open-code this by a rotation, |
2607 | do it as the IOR of two shifts. I.e., to rotate A |
2608 | by N bits, compute |
2609 | (A << N) | ((unsigned) A >> ((-N) & (C - 1))) |
2610 | where C is the bitsize of A. |
2611 | |
2612 | It is theoretically possible that the target machine might |
2613 | not be able to perform either shift and hence we would |
2614 | be making two libcalls rather than just the one for the |
2615 | shift (similarly if IOR could not be done). We will allow |
2616 | this extremely unlikely lossage to avoid complicating the |
2617 | code below. */ |
2618 | |
2619 | rtx subtarget = target == shifted ? 0 : target; |
2620 | rtx new_amount, other_amount; |
2621 | rtx temp1; |
2622 | |
2623 | new_amount = op1; |
2624 | if (op1 == const0_rtx) |
2625 | return shifted; |
2626 | else if (CONST_INT_P (op1)) |
2627 | other_amount = gen_int_shift_amount |
2628 | (mode, GET_MODE_BITSIZE (mode: scalar_mode) - INTVAL (op1)); |
2629 | else |
2630 | { |
2631 | other_amount |
2632 | = simplify_gen_unary (code: NEG, GET_MODE (op1), |
2633 | op: op1, GET_MODE (op1)); |
2634 | HOST_WIDE_INT mask = GET_MODE_PRECISION (mode: scalar_mode) - 1; |
2635 | other_amount |
2636 | = simplify_gen_binary (code: AND, GET_MODE (op1), op0: other_amount, |
2637 | op1: gen_int_mode (mask, GET_MODE (op1))); |
2638 | } |
2639 | |
2640 | shifted = force_reg (mode, shifted); |
2641 | |
2642 | temp = expand_shift_1 (code: left ? LSHIFT_EXPR : RSHIFT_EXPR, |
2643 | mode, shifted, amount: new_amount, target: 0, unsignedp: 1); |
2644 | temp1 = expand_shift_1 (code: left ? RSHIFT_EXPR : LSHIFT_EXPR, |
2645 | mode, shifted, amount: other_amount, |
2646 | target: subtarget, unsignedp: 1); |
2647 | return expand_binop (mode, ior_optab, temp, temp1, target, |
2648 | unsignedp, methods); |
2649 | } |
2650 | |
2651 | temp = expand_binop (mode, |
2652 | left ? lrotate_optab : rrotate_optab, |
2653 | shifted, op1, target, unsignedp, methods); |
2654 | } |
2655 | else if (unsignedp) |
2656 | temp = expand_binop (mode, |
2657 | left ? lshift_optab : rshift_uns_optab, |
2658 | shifted, op1, target, unsignedp, methods); |
2659 | |
2660 | /* Do arithmetic shifts. |
2661 | Also, if we are going to widen the operand, we can just as well |
2662 | use an arithmetic right-shift instead of a logical one. */ |
2663 | if (temp == 0 && ! rotate |
2664 | && (! unsignedp || (! left && methods == OPTAB_WIDEN))) |
2665 | { |
2666 | enum optab_methods methods1 = methods; |
2667 | |
2668 | /* If trying to widen a log shift to an arithmetic shift, |
2669 | don't accept an arithmetic shift of the same size. */ |
2670 | if (unsignedp) |
2671 | methods1 = OPTAB_MUST_WIDEN; |
2672 | |
2673 | /* Arithmetic shift */ |
2674 | |
2675 | temp = expand_binop (mode, |
2676 | left ? lshift_optab : rshift_arith_optab, |
2677 | shifted, op1, target, unsignedp, methods1); |
2678 | } |
2679 | |
2680 | /* We used to try extzv here for logical right shifts, but that was |
2681 | only useful for one machine, the VAX, and caused poor code |
2682 | generation there for lshrdi3, so the code was deleted and a |
2683 | define_expand for lshrsi3 was added to vax.md. */ |
2684 | } |
2685 | |
2686 | gcc_assert (temp != NULL_RTX || may_fail); |
2687 | return temp; |
2688 | } |
2689 | |
2690 | /* Output a shift instruction for expression code CODE, |
2691 | with SHIFTED being the rtx for the value to shift, |
2692 | and AMOUNT the amount to shift by. |
2693 | Store the result in the rtx TARGET, if that is convenient. |
2694 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. |
2695 | Return the rtx for where the value is. */ |
2696 | |
2697 | rtx |
2698 | expand_shift (enum tree_code code, machine_mode mode, rtx shifted, |
2699 | poly_int64 amount, rtx target, int unsignedp) |
2700 | { |
2701 | return expand_shift_1 (code, mode, shifted, |
2702 | amount: gen_int_shift_amount (mode, amount), |
2703 | target, unsignedp); |
2704 | } |
2705 | |
2706 | /* Likewise, but return 0 if that cannot be done. */ |
2707 | |
2708 | rtx |
2709 | maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted, |
2710 | int amount, rtx target, int unsignedp) |
2711 | { |
2712 | return expand_shift_1 (code, mode, |
2713 | shifted, GEN_INT (amount), target, unsignedp, may_fail: true); |
2714 | } |
2715 | |
2716 | /* Output a shift instruction for expression code CODE, |
2717 | with SHIFTED being the rtx for the value to shift, |
2718 | and AMOUNT the tree for the amount to shift by. |
2719 | Store the result in the rtx TARGET, if that is convenient. |
2720 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. |
2721 | Return the rtx for where the value is. */ |
2722 | |
2723 | rtx |
2724 | expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, |
2725 | tree amount, rtx target, int unsignedp) |
2726 | { |
2727 | return expand_shift_1 (code, mode, |
2728 | shifted, amount: expand_normal (exp: amount), target, unsignedp); |
2729 | } |
2730 | |
2731 | |
2732 | static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, |
2733 | const struct mult_cost *, machine_mode mode); |
2734 | static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, |
2735 | const struct algorithm *, enum mult_variant); |
2736 | static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); |
2737 | static rtx extract_high_half (scalar_int_mode, rtx); |
2738 | static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int); |
2739 | static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx, |
2740 | int, int); |
2741 | /* Compute and return the best algorithm for multiplying by T. |
2742 | The algorithm must cost less than cost_limit |
2743 | If retval.cost >= COST_LIMIT, no algorithm was found and all |
2744 | other field of the returned struct are undefined. |
2745 | MODE is the machine mode of the multiplication. */ |
2746 | |
2747 | static void |
2748 | synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, |
2749 | const struct mult_cost *cost_limit, machine_mode mode) |
2750 | { |
2751 | int m; |
2752 | struct algorithm *alg_in, *best_alg; |
2753 | struct mult_cost best_cost; |
2754 | struct mult_cost new_limit; |
2755 | int op_cost, op_latency; |
2756 | unsigned HOST_WIDE_INT orig_t = t; |
2757 | unsigned HOST_WIDE_INT q; |
2758 | int maxm, hash_index; |
2759 | bool cache_hit = false; |
2760 | enum alg_code cache_alg = alg_zero; |
2761 | bool speed = optimize_insn_for_speed_p (); |
2762 | scalar_int_mode imode; |
2763 | struct alg_hash_entry *entry_ptr; |
2764 | |
2765 | /* Indicate that no algorithm is yet found. If no algorithm |
2766 | is found, this value will be returned and indicate failure. */ |
2767 | alg_out->cost.cost = cost_limit->cost + 1; |
2768 | alg_out->cost.latency = cost_limit->latency + 1; |
2769 | |
2770 | if (cost_limit->cost < 0 |
2771 | || (cost_limit->cost == 0 && cost_limit->latency <= 0)) |
2772 | return; |
2773 | |
2774 | /* Be prepared for vector modes. */ |
2775 | imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode)); |
2776 | |
2777 | maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); |
2778 | |
2779 | /* Restrict the bits of "t" to the multiplication's mode. */ |
2780 | t &= GET_MODE_MASK (imode); |
2781 | |
2782 | /* t == 1 can be done in zero cost. */ |
2783 | if (t == 1) |
2784 | { |
2785 | alg_out->ops = 1; |
2786 | alg_out->cost.cost = 0; |
2787 | alg_out->cost.latency = 0; |
2788 | alg_out->op[0] = alg_m; |
2789 | return; |
2790 | } |
2791 | |
2792 | /* t == 0 sometimes has a cost. If it does and it exceeds our limit, |
2793 | fail now. */ |
2794 | if (t == 0) |
2795 | { |
2796 | if (MULT_COST_LESS (cost_limit, zero_cost (speed))) |
2797 | return; |
2798 | else |
2799 | { |
2800 | alg_out->ops = 1; |
2801 | alg_out->cost.cost = zero_cost (speed); |
2802 | alg_out->cost.latency = zero_cost (speed); |
2803 | alg_out->op[0] = alg_zero; |
2804 | return; |
2805 | } |
2806 | } |
2807 | |
2808 | /* We'll be needing a couple extra algorithm structures now. */ |
2809 | |
2810 | alg_in = XALLOCA (struct algorithm); |
2811 | best_alg = XALLOCA (struct algorithm); |
2812 | best_cost = *cost_limit; |
2813 | |
2814 | /* Compute the hash index. */ |
2815 | hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; |
2816 | |
2817 | /* See if we already know what to do for T. */ |
2818 | entry_ptr = alg_hash_entry_ptr (idx: hash_index); |
2819 | if (entry_ptr->t == t |
2820 | && entry_ptr->mode == mode |
2821 | && entry_ptr->speed == speed |
2822 | && entry_ptr->alg != alg_unknown) |
2823 | { |
2824 | cache_alg = entry_ptr->alg; |
2825 | |
2826 | if (cache_alg == alg_impossible) |
2827 | { |
2828 | /* The cache tells us that it's impossible to synthesize |
2829 | multiplication by T within entry_ptr->cost. */ |
2830 | if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) |
2831 | /* COST_LIMIT is at least as restrictive as the one |
2832 | recorded in the hash table, in which case we have no |
2833 | hope of synthesizing a multiplication. Just |
2834 | return. */ |
2835 | return; |
2836 | |
2837 | /* If we get here, COST_LIMIT is less restrictive than the |
2838 | one recorded in the hash table, so we may be able to |
2839 | synthesize a multiplication. Proceed as if we didn't |
2840 | have the cache entry. */ |
2841 | } |
2842 | else |
2843 | { |
2844 | if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) |
2845 | /* The cached algorithm shows that this multiplication |
2846 | requires more cost than COST_LIMIT. Just return. This |
2847 | way, we don't clobber this cache entry with |
2848 | alg_impossible but retain useful information. */ |
2849 | return; |
2850 | |
2851 | cache_hit = true; |
2852 | |
2853 | switch (cache_alg) |
2854 | { |
2855 | case alg_shift: |
2856 | goto do_alg_shift; |
2857 | |
2858 | case alg_add_t_m2: |
2859 | case alg_sub_t_m2: |
2860 | goto do_alg_addsub_t_m2; |
2861 | |
2862 | case alg_add_factor: |
2863 | case alg_sub_factor: |
2864 | goto do_alg_addsub_factor; |
2865 | |
2866 | case alg_add_t2_m: |
2867 | goto do_alg_add_t2_m; |
2868 | |
2869 | case alg_sub_t2_m: |
2870 | goto do_alg_sub_t2_m; |
2871 | |
2872 | default: |
2873 | gcc_unreachable (); |
2874 | } |
2875 | } |
2876 | } |
2877 | |
2878 | /* If we have a group of zero bits at the low-order part of T, try |
2879 | multiplying by the remaining bits and then doing a shift. */ |
2880 | |
2881 | if ((t & 1) == 0) |
2882 | { |
2883 | do_alg_shift: |
2884 | m = ctz_or_zero (x: t); /* m = number of low zero bits */ |
2885 | if (m < maxm) |
2886 | { |
2887 | q = t >> m; |
2888 | /* The function expand_shift will choose between a shift and |
2889 | a sequence of additions, so the observed cost is given as |
2890 | MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ |
2891 | op_cost = m * add_cost (speed, mode); |
2892 | if (shift_cost (speed, mode, bits: m) < op_cost) |
2893 | op_cost = shift_cost (speed, mode, bits: m); |
2894 | new_limit.cost = best_cost.cost - op_cost; |
2895 | new_limit.latency = best_cost.latency - op_cost; |
2896 | synth_mult (alg_out: alg_in, t: q, cost_limit: &new_limit, mode); |
2897 | |
2898 | alg_in->cost.cost += op_cost; |
2899 | alg_in->cost.latency += op_cost; |
2900 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
2901 | { |
2902 | best_cost = alg_in->cost; |
2903 | std::swap (a&: alg_in, b&: best_alg); |
2904 | best_alg->log[best_alg->ops] = m; |
2905 | best_alg->op[best_alg->ops] = alg_shift; |
2906 | } |
2907 | |
2908 | /* See if treating ORIG_T as a signed number yields a better |
2909 | sequence. Try this sequence only for a negative ORIG_T |
2910 | as it would be useless for a non-negative ORIG_T. */ |
2911 | if ((HOST_WIDE_INT) orig_t < 0) |
2912 | { |
2913 | /* Shift ORIG_T as follows because a right shift of a |
2914 | negative-valued signed type is implementation |
2915 | defined. */ |
2916 | q = ~(~orig_t >> m); |
2917 | /* The function expand_shift will choose between a shift |
2918 | and a sequence of additions, so the observed cost is |
2919 | given as MIN (m * add_cost(speed, mode), |
2920 | shift_cost(speed, mode, m)). */ |
2921 | op_cost = m * add_cost (speed, mode); |
2922 | if (shift_cost (speed, mode, bits: m) < op_cost) |
2923 | op_cost = shift_cost (speed, mode, bits: m); |
2924 | new_limit.cost = best_cost.cost - op_cost; |
2925 | new_limit.latency = best_cost.latency - op_cost; |
2926 | synth_mult (alg_out: alg_in, t: q, cost_limit: &new_limit, mode); |
2927 | |
2928 | alg_in->cost.cost += op_cost; |
2929 | alg_in->cost.latency += op_cost; |
2930 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
2931 | { |
2932 | best_cost = alg_in->cost; |
2933 | std::swap (a&: alg_in, b&: best_alg); |
2934 | best_alg->log[best_alg->ops] = m; |
2935 | best_alg->op[best_alg->ops] = alg_shift; |
2936 | } |
2937 | } |
2938 | } |
2939 | if (cache_hit) |
2940 | goto done; |
2941 | } |
2942 | |
2943 | /* If we have an odd number, add or subtract one. */ |
2944 | if ((t & 1) != 0) |
2945 | { |
2946 | unsigned HOST_WIDE_INT w; |
2947 | |
2948 | do_alg_addsub_t_m2: |
2949 | for (w = 1; (w & t) != 0; w <<= 1) |
2950 | ; |
2951 | /* If T was -1, then W will be zero after the loop. This is another |
2952 | case where T ends with ...111. Handling this with (T + 1) and |
2953 | subtract 1 produces slightly better code and results in algorithm |
2954 | selection much faster than treating it like the ...0111 case |
2955 | below. */ |
2956 | if (w == 0 |
2957 | || (w > 2 |
2958 | /* Reject the case where t is 3. |
2959 | Thus we prefer addition in that case. */ |
2960 | && t != 3)) |
2961 | { |
2962 | /* T ends with ...111. Multiply by (T + 1) and subtract T. */ |
2963 | |
2964 | op_cost = add_cost (speed, mode); |
2965 | new_limit.cost = best_cost.cost - op_cost; |
2966 | new_limit.latency = best_cost.latency - op_cost; |
2967 | synth_mult (alg_out: alg_in, t: t + 1, cost_limit: &new_limit, mode); |
2968 | |
2969 | alg_in->cost.cost += op_cost; |
2970 | alg_in->cost.latency += op_cost; |
2971 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
2972 | { |
2973 | best_cost = alg_in->cost; |
2974 | std::swap (a&: alg_in, b&: best_alg); |
2975 | best_alg->log[best_alg->ops] = 0; |
2976 | best_alg->op[best_alg->ops] = alg_sub_t_m2; |
2977 | } |
2978 | } |
2979 | else |
2980 | { |
2981 | /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */ |
2982 | |
2983 | op_cost = add_cost (speed, mode); |
2984 | new_limit.cost = best_cost.cost - op_cost; |
2985 | new_limit.latency = best_cost.latency - op_cost; |
2986 | synth_mult (alg_out: alg_in, t: t - 1, cost_limit: &new_limit, mode); |
2987 | |
2988 | alg_in->cost.cost += op_cost; |
2989 | alg_in->cost.latency += op_cost; |
2990 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
2991 | { |
2992 | best_cost = alg_in->cost; |
2993 | std::swap (a&: alg_in, b&: best_alg); |
2994 | best_alg->log[best_alg->ops] = 0; |
2995 | best_alg->op[best_alg->ops] = alg_add_t_m2; |
2996 | } |
2997 | } |
2998 | |
2999 | /* We may be able to calculate a * -7, a * -15, a * -31, etc |
3000 | quickly with a - a * n for some appropriate constant n. */ |
3001 | m = exact_log2 (x: -orig_t + 1); |
3002 | if (m >= 0 && m < maxm) |
3003 | { |
3004 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, bits: m); |
3005 | /* If the target has a cheap shift-and-subtract insn use |
3006 | that in preference to a shift insn followed by a sub insn. |
3007 | Assume that the shift-and-sub is "atomic" with a latency |
3008 | equal to it's cost, otherwise assume that on superscalar |
3009 | hardware the shift may be executed concurrently with the |
3010 | earlier steps in the algorithm. */ |
3011 | if (shiftsub1_cost (speed, mode, bits: m) <= op_cost) |
3012 | { |
3013 | op_cost = shiftsub1_cost (speed, mode, bits: m); |
3014 | op_latency = op_cost; |
3015 | } |
3016 | else |
3017 | op_latency = add_cost (speed, mode); |
3018 | |
3019 | new_limit.cost = best_cost.cost - op_cost; |
3020 | new_limit.latency = best_cost.latency - op_latency; |
3021 | synth_mult (alg_out: alg_in, t: (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, |
3022 | cost_limit: &new_limit, mode); |
3023 | |
3024 | alg_in->cost.cost += op_cost; |
3025 | alg_in->cost.latency += op_latency; |
3026 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
3027 | { |
3028 | best_cost = alg_in->cost; |
3029 | std::swap (a&: alg_in, b&: best_alg); |
3030 | best_alg->log[best_alg->ops] = m; |
3031 | best_alg->op[best_alg->ops] = alg_sub_t_m2; |
3032 | } |
3033 | } |
3034 | |
3035 | if (cache_hit) |
3036 | goto done; |
3037 | } |
3038 | |
3039 | /* Look for factors of t of the form |
3040 | t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). |
3041 | If we find such a factor, we can multiply by t using an algorithm that |
3042 | multiplies by q, shift the result by m and add/subtract it to itself. |
3043 | |
3044 | We search for large factors first and loop down, even if large factors |
3045 | are less probable than small; if we find a large factor we will find a |
3046 | good sequence quickly, and therefore be able to prune (by decreasing |
3047 | COST_LIMIT) the search. */ |
3048 | |
3049 | do_alg_addsub_factor: |
3050 | for (m = floor_log2 (x: t - 1); m >= 2; m--) |
3051 | { |
3052 | unsigned HOST_WIDE_INT d; |
3053 | |
3054 | d = (HOST_WIDE_INT_1U << m) + 1; |
3055 | if (t % d == 0 && t > d && m < maxm |
3056 | && (!cache_hit || cache_alg == alg_add_factor)) |
3057 | { |
3058 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, bits: m); |
3059 | if (shiftadd_cost (speed, mode, bits: m) <= op_cost) |
3060 | op_cost = shiftadd_cost (speed, mode, bits: m); |
3061 | |
3062 | op_latency = op_cost; |
3063 | |
3064 | |
3065 | new_limit.cost = best_cost.cost - op_cost; |
3066 | new_limit.latency = best_cost.latency - op_latency; |
3067 | synth_mult (alg_out: alg_in, t: t / d, cost_limit: &new_limit, mode); |
3068 | |
3069 | alg_in->cost.cost += op_cost; |
3070 | alg_in->cost.latency += op_latency; |
3071 | if (alg_in->cost.latency < op_cost) |
3072 | alg_in->cost.latency = op_cost; |
3073 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
3074 | { |
3075 | best_cost = alg_in->cost; |
3076 | std::swap (a&: alg_in, b&: best_alg); |
3077 | best_alg->log[best_alg->ops] = m; |
3078 | best_alg->op[best_alg->ops] = alg_add_factor; |
3079 | } |
3080 | /* Other factors will have been taken care of in the recursion. */ |
3081 | break; |
3082 | } |
3083 | |
3084 | d = (HOST_WIDE_INT_1U << m) - 1; |
3085 | if (t % d == 0 && t > d && m < maxm |
3086 | && (!cache_hit || cache_alg == alg_sub_factor)) |
3087 | { |
3088 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, bits: m); |
3089 | if (shiftsub0_cost (speed, mode, bits: m) <= op_cost) |
3090 | op_cost = shiftsub0_cost (speed, mode, bits: m); |
3091 | |
3092 | op_latency = op_cost; |
3093 | |
3094 | new_limit.cost = best_cost.cost - op_cost; |
3095 | new_limit.latency = best_cost.latency - op_latency; |
3096 | synth_mult (alg_out: alg_in, t: t / d, cost_limit: &new_limit, mode); |
3097 | |
3098 | alg_in->cost.cost += op_cost; |
3099 | alg_in->cost.latency += op_latency; |
3100 | if (alg_in->cost.latency < op_cost) |
3101 | alg_in->cost.latency = op_cost; |
3102 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
3103 | { |
3104 | best_cost = alg_in->cost; |
3105 | std::swap (a&: alg_in, b&: best_alg); |
3106 | best_alg->log[best_alg->ops] = m; |
3107 | best_alg->op[best_alg->ops] = alg_sub_factor; |
3108 | } |
3109 | break; |
3110 | } |
3111 | } |
3112 | if (cache_hit) |
3113 | goto done; |
3114 | |
3115 | /* Try shift-and-add (load effective address) instructions, |
3116 | i.e. do a*3, a*5, a*9. */ |
3117 | if ((t & 1) != 0) |
3118 | { |
3119 | do_alg_add_t2_m: |
3120 | q = t - 1; |
3121 | m = ctz_hwi (x: q); |
3122 | if (q && m < maxm) |
3123 | { |
3124 | op_cost = shiftadd_cost (speed, mode, bits: m); |
3125 | new_limit.cost = best_cost.cost - op_cost; |
3126 | new_limit.latency = best_cost.latency - op_cost; |
3127 | synth_mult (alg_out: alg_in, t: (t - 1) >> m, cost_limit: &new_limit, mode); |
3128 | |
3129 | alg_in->cost.cost += op_cost; |
3130 | alg_in->cost.latency += op_cost; |
3131 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
3132 | { |
3133 | best_cost = alg_in->cost; |
3134 | std::swap (a&: alg_in, b&: best_alg); |
3135 | best_alg->log[best_alg->ops] = m; |
3136 | best_alg->op[best_alg->ops] = alg_add_t2_m; |
3137 | } |
3138 | } |
3139 | if (cache_hit) |
3140 | goto done; |
3141 | |
3142 | do_alg_sub_t2_m: |
3143 | q = t + 1; |
3144 | m = ctz_hwi (x: q); |
3145 | if (q && m < maxm) |
3146 | { |
3147 | op_cost = shiftsub0_cost (speed, mode, bits: m); |
3148 | new_limit.cost = best_cost.cost - op_cost; |
3149 | new_limit.latency = best_cost.latency - op_cost; |
3150 | synth_mult (alg_out: alg_in, t: (t + 1) >> m, cost_limit: &new_limit, mode); |
3151 | |
3152 | alg_in->cost.cost += op_cost; |
3153 | alg_in->cost.latency += op_cost; |
3154 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
3155 | { |
3156 | best_cost = alg_in->cost; |
3157 | std::swap (a&: alg_in, b&: best_alg); |
3158 | best_alg->log[best_alg->ops] = m; |
3159 | best_alg->op[best_alg->ops] = alg_sub_t2_m; |
3160 | } |
3161 | } |
3162 | if (cache_hit) |
3163 | goto done; |
3164 | } |
3165 | |
3166 | done: |
3167 | /* If best_cost has not decreased, we have not found any algorithm. */ |
3168 | if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) |
3169 | { |
3170 | /* We failed to find an algorithm. Record alg_impossible for |
3171 | this case (that is, <T, MODE, COST_LIMIT>) so that next time |
3172 | we are asked to find an algorithm for T within the same or |
3173 | lower COST_LIMIT, we can immediately return to the |
3174 | caller. */ |
3175 | entry_ptr->t = t; |
3176 | entry_ptr->mode = mode; |
3177 | entry_ptr->speed = speed; |
3178 | entry_ptr->alg = alg_impossible; |
3179 | entry_ptr->cost = *cost_limit; |
3180 | return; |
3181 | } |
3182 | |
3183 | /* Cache the result. */ |
3184 | if (!cache_hit) |
3185 | { |
3186 | entry_ptr->t = t; |
3187 | entry_ptr->mode = mode; |
3188 | entry_ptr->speed = speed; |
3189 | entry_ptr->alg = best_alg->op[best_alg->ops]; |
3190 | entry_ptr->cost.cost = best_cost.cost; |
3191 | entry_ptr->cost.latency = best_cost.latency; |
3192 | } |
3193 | |
3194 | /* If we are getting a too long sequence for `struct algorithm' |
3195 | to record, make this search fail. */ |
3196 | if (best_alg->ops == MAX_BITS_PER_WORD) |
3197 | return; |
3198 | |
3199 | /* Copy the algorithm from temporary space to the space at alg_out. |
3200 | We avoid using structure assignment because the majority of |
3201 | best_alg is normally undefined, and this is a critical function. */ |
3202 | alg_out->ops = best_alg->ops + 1; |
3203 | alg_out->cost = best_cost; |
3204 | memcpy (dest: alg_out->op, src: best_alg->op, |
3205 | n: alg_out->ops * sizeof *alg_out->op); |
3206 | memcpy (dest: alg_out->log, src: best_alg->log, |
3207 | n: alg_out->ops * sizeof *alg_out->log); |
3208 | } |
3209 | |
3210 | /* Find the cheapest way of multiplying a value of mode MODE by VAL. |
3211 | Try three variations: |
3212 | |
3213 | - a shift/add sequence based on VAL itself |
3214 | - a shift/add sequence based on -VAL, followed by a negation |
3215 | - a shift/add sequence based on VAL - 1, followed by an addition. |
3216 | |
3217 | Return true if the cheapest of these cost less than MULT_COST, |
3218 | describing the algorithm in *ALG and final fixup in *VARIANT. */ |
3219 | |
3220 | bool |
3221 | choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, |
3222 | struct algorithm *alg, enum mult_variant *variant, |
3223 | int mult_cost) |
3224 | { |
3225 | struct algorithm alg2; |
3226 | struct mult_cost limit; |
3227 | int op_cost; |
3228 | bool speed = optimize_insn_for_speed_p (); |
3229 | |
3230 | /* Fail quickly for impossible bounds. */ |
3231 | if (mult_cost < 0) |
3232 | return false; |
3233 | |
3234 | /* Ensure that mult_cost provides a reasonable upper bound. |
3235 | Any constant multiplication can be performed with less |
3236 | than 2 * bits additions. */ |
3237 | op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); |
3238 | if (mult_cost > op_cost) |
3239 | mult_cost = op_cost; |
3240 | |
3241 | *variant = basic_variant; |
3242 | limit.cost = mult_cost; |
3243 | limit.latency = mult_cost; |
3244 | synth_mult (alg_out: alg, t: val, cost_limit: &limit, mode); |
3245 | |
3246 | /* This works only if the inverted value actually fits in an |
3247 | `unsigned int' */ |
3248 | if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) |
3249 | { |
3250 | op_cost = neg_cost (speed, mode); |
3251 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
3252 | { |
3253 | limit.cost = alg->cost.cost - op_cost; |
3254 | limit.latency = alg->cost.latency - op_cost; |
3255 | } |
3256 | else |
3257 | { |
3258 | limit.cost = mult_cost - op_cost; |
3259 | limit.latency = mult_cost - op_cost; |
3260 | } |
3261 | |
3262 | synth_mult (alg_out: &alg2, t: -val, cost_limit: &limit, mode); |
3263 | alg2.cost.cost += op_cost; |
3264 | alg2.cost.latency += op_cost; |
3265 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) |
3266 | *alg = alg2, *variant = negate_variant; |
3267 | } |
3268 | |
3269 | /* This proves very useful for division-by-constant. */ |
3270 | op_cost = add_cost (speed, mode); |
3271 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
3272 | { |
3273 | limit.cost = alg->cost.cost - op_cost; |
3274 | limit.latency = alg->cost.latency - op_cost; |
3275 | } |
3276 | else |
3277 | { |
3278 | limit.cost = mult_cost - op_cost; |
3279 | limit.latency = mult_cost - op_cost; |
3280 | } |
3281 | |
3282 | synth_mult (alg_out: &alg2, t: val - 1, cost_limit: &limit, mode); |
3283 | alg2.cost.cost += op_cost; |
3284 | alg2.cost.latency += op_cost; |
3285 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) |
3286 | *alg = alg2, *variant = add_variant; |
3287 | |
3288 | return MULT_COST_LESS (&alg->cost, mult_cost); |
3289 | } |
3290 | |
3291 | /* A subroutine of expand_mult, used for constant multiplications. |
3292 | Multiply OP0 by VAL in mode MODE, storing the result in TARGET if |
3293 | convenient. Use the shift/add sequence described by ALG and apply |
3294 | the final fixup specified by VARIANT. */ |
3295 | |
3296 | static rtx |
3297 | expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val, |
3298 | rtx target, const struct algorithm *alg, |
3299 | enum mult_variant variant) |
3300 | { |
3301 | unsigned HOST_WIDE_INT val_so_far; |
3302 | rtx_insn *insn; |
3303 | rtx accum, tem; |
3304 | int opno; |
3305 | machine_mode nmode; |
3306 | |
3307 | /* Avoid referencing memory over and over and invalid sharing |
3308 | on SUBREGs. */ |
3309 | op0 = force_reg (mode, op0); |
3310 | |
3311 | /* ACCUM starts out either as OP0 or as a zero, depending on |
3312 | the first operation. */ |
3313 | |
3314 | if (alg->op[0] == alg_zero) |
3315 | { |
3316 | accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); |
3317 | val_so_far = 0; |
3318 | } |
3319 | else if (alg->op[0] == alg_m) |
3320 | { |
3321 | accum = copy_to_mode_reg (mode, op0); |
3322 | val_so_far = 1; |
3323 | } |
3324 | else |
3325 | gcc_unreachable (); |
3326 | |
3327 | for (opno = 1; opno < alg->ops; opno++) |
3328 | { |
3329 | int log = alg->log[opno]; |
3330 | rtx shift_subtarget = optimize ? 0 : accum; |
3331 | rtx add_target |
3332 | = (opno == alg->ops - 1 && target != 0 && variant != add_variant |
3333 | && !optimize) |
3334 | ? target : 0; |
3335 | rtx accum_target = optimize ? 0 : accum; |
3336 | rtx accum_inner; |
3337 | |
3338 | switch (alg->op[opno]) |
3339 | { |
3340 | case alg_shift: |
3341 | tem = expand_shift (code: LSHIFT_EXPR, mode, shifted: accum, amount: log, NULL_RTX, unsignedp: 0); |
3342 | /* REG_EQUAL note will be attached to the following insn. */ |
3343 | emit_move_insn (accum, tem); |
3344 | val_so_far <<= log; |
3345 | break; |
3346 | |
3347 | case alg_add_t_m2: |
3348 | tem = expand_shift (code: LSHIFT_EXPR, mode, shifted: op0, amount: log, NULL_RTX, unsignedp: 0); |
3349 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3350 | add_target ? add_target : accum_target); |
3351 | val_so_far += HOST_WIDE_INT_1U << log; |
3352 | break; |
3353 | |
3354 | case alg_sub_t_m2: |
3355 | tem = expand_shift (code: LSHIFT_EXPR, mode, shifted: op0, amount: log, NULL_RTX, unsignedp: 0); |
3356 | accum = force_operand (gen_rtx_MINUS (mode, accum, tem), |
3357 | add_target ? add_target : accum_target); |
3358 | val_so_far -= HOST_WIDE_INT_1U << log; |
3359 | break; |
3360 | |
3361 | case alg_add_t2_m: |
3362 | accum = expand_shift (code: LSHIFT_EXPR, mode, shifted: accum, |
3363 | amount: log, target: shift_subtarget, unsignedp: 0); |
3364 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), |
3365 | add_target ? add_target : accum_target); |
3366 | val_so_far = (val_so_far << log) + 1; |
3367 | break; |
3368 | |
3369 | case alg_sub_t2_m: |
3370 | accum = expand_shift (code: LSHIFT_EXPR, mode, shifted: accum, |
3371 | amount: log, target: shift_subtarget, unsignedp: 0); |
3372 | accum = force_operand (gen_rtx_MINUS (mode, accum, op0), |
3373 | add_target ? add_target : accum_target); |
3374 | val_so_far = (val_so_far << log) - 1; |
3375 | break; |
3376 | |
3377 | case alg_add_factor: |
3378 | tem = expand_shift (code: LSHIFT_EXPR, mode, shifted: accum, amount: log, NULL_RTX, unsignedp: 0); |
3379 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3380 | add_target ? add_target : accum_target); |
3381 | val_so_far += val_so_far << log; |
3382 | break; |
3383 | |
3384 | case alg_sub_factor: |
3385 | tem = expand_shift (code: LSHIFT_EXPR, mode, shifted: accum, amount: log, NULL_RTX, unsignedp: 0); |
3386 | accum = force_operand (gen_rtx_MINUS (mode, tem, accum), |
3387 | (add_target |
3388 | ? add_target : (optimize ? 0 : tem))); |
3389 | val_so_far = (val_so_far << log) - val_so_far; |
3390 | break; |
3391 | |
3392 | default: |
3393 | gcc_unreachable (); |
3394 | } |
3395 | |
3396 | if (SCALAR_INT_MODE_P (mode)) |
3397 | { |
3398 | /* Write a REG_EQUAL note on the last insn so that we can cse |
3399 | multiplication sequences. Note that if ACCUM is a SUBREG, |
3400 | we've set the inner register and must properly indicate that. */ |
3401 | tem = op0, nmode = mode; |
3402 | accum_inner = accum; |
3403 | if (GET_CODE (accum) == SUBREG) |
3404 | { |
3405 | accum_inner = SUBREG_REG (accum); |
3406 | nmode = GET_MODE (accum_inner); |
3407 | tem = gen_lowpart (nmode, op0); |
3408 | } |
3409 | |
3410 | /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG. |
3411 | In that case, only the low bits of accum would be guaranteed to |
3412 | be equal to the content of the REG_EQUAL note, the upper bits |
3413 | can be anything. */ |
3414 | if (!paradoxical_subreg_p (x: tem)) |
3415 | { |
3416 | insn = get_last_insn (); |
3417 | wide_int wval_so_far |
3418 | = wi::uhwi (val: val_so_far, |
3419 | precision: GET_MODE_PRECISION (mode: as_a <scalar_mode> (m: nmode))); |
3420 | rtx c = immed_wide_int_const (wval_so_far, nmode); |
3421 | set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c), |
3422 | accum_inner); |
3423 | } |
3424 | } |
3425 | } |
3426 | |
3427 | if (variant == negate_variant) |
3428 | { |
3429 | val_so_far = -val_so_far; |
3430 | accum = expand_unop (mode, neg_optab, accum, target, 0); |
3431 | } |
3432 | else if (variant == add_variant) |
3433 | { |
3434 | val_so_far = val_so_far + 1; |
3435 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); |
3436 | } |
3437 | |
3438 | /* Compare only the bits of val and val_so_far that are significant |
3439 | in the result mode, to avoid sign-/zero-extension confusion. */ |
3440 | nmode = GET_MODE_INNER (mode); |
3441 | val &= GET_MODE_MASK (nmode); |
3442 | val_so_far &= GET_MODE_MASK (nmode); |
3443 | gcc_assert (val == (HOST_WIDE_INT) val_so_far); |
3444 | |
3445 | return accum; |
3446 | } |
3447 | |
3448 | /* Perform a multiplication and return an rtx for the result. |
3449 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); |
3450 | TARGET is a suggestion for where to store the result (an rtx). |
3451 | |
3452 | We check specially for a constant integer as OP1. |
3453 | If you want this check for OP0 as well, then before calling |
3454 | you should swap the two operands if OP0 would be constant. */ |
3455 | |
3456 | rtx |
3457 | expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
3458 | int unsignedp, bool no_libcall) |
3459 | { |
3460 | enum mult_variant variant; |
3461 | struct algorithm algorithm; |
3462 | rtx scalar_op1; |
3463 | int max_cost; |
3464 | bool speed = optimize_insn_for_speed_p (); |
3465 | bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; |
3466 | |
3467 | if (CONSTANT_P (op0)) |
3468 | std::swap (a&: op0, b&: op1); |
3469 | |
3470 | /* For vectors, there are several simplifications that can be made if |
3471 | all elements of the vector constant are identical. */ |
3472 | scalar_op1 = unwrap_const_vec_duplicate (x: op1); |
3473 | |
3474 | if (INTEGRAL_MODE_P (mode)) |
3475 | { |
3476 | rtx fake_reg; |
3477 | HOST_WIDE_INT coeff; |
3478 | bool is_neg; |
3479 | int mode_bitsize; |
3480 | |
3481 | if (op1 == CONST0_RTX (mode)) |
3482 | return op1; |
3483 | if (op1 == CONST1_RTX (mode)) |
3484 | return op0; |
3485 | if (op1 == CONSTM1_RTX (mode)) |
3486 | return expand_unop (mode, do_trapv ? negv_optab : neg_optab, |
3487 | op0, target, 0); |
3488 | |
3489 | if (do_trapv) |
3490 | goto skip_synth; |
3491 | |
3492 | /* If mode is integer vector mode, check if the backend supports |
3493 | vector lshift (by scalar or vector) at all. If not, we can't use |
3494 | synthetized multiply. */ |
3495 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
3496 | && optab_handler (op: vashl_optab, mode) == CODE_FOR_nothing |
3497 | && optab_handler (op: ashl_optab, mode) == CODE_FOR_nothing) |
3498 | goto skip_synth; |
3499 | |
3500 | /* These are the operations that are potentially turned into |
3501 | a sequence of shifts and additions. */ |
3502 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); |
3503 | |
3504 | /* synth_mult does an `unsigned int' multiply. As long as the mode is |
3505 | less than or equal in size to `unsigned int' this doesn't matter. |
3506 | If the mode is larger than `unsigned int', then synth_mult works |
3507 | only if the constant value exactly fits in an `unsigned int' without |
3508 | any truncation. This means that multiplying by negative values does |
3509 | not work; results are off by 2^32 on a 32 bit machine. */ |
3510 | if (CONST_INT_P (scalar_op1)) |
3511 | { |
3512 | coeff = INTVAL (scalar_op1); |
3513 | is_neg = coeff < 0; |
3514 | } |
3515 | #if TARGET_SUPPORTS_WIDE_INT |
3516 | else if (CONST_WIDE_INT_P (scalar_op1)) |
3517 | #else |
3518 | else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) |
3519 | #endif |
3520 | { |
3521 | int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode)); |
3522 | /* Perfect power of 2 (other than 1, which is handled above). */ |
3523 | if (shift > 0) |
3524 | return expand_shift (code: LSHIFT_EXPR, mode, shifted: op0, |
3525 | amount: shift, target, unsignedp); |
3526 | else |
3527 | goto skip_synth; |
3528 | } |
3529 | else |
3530 | goto skip_synth; |
3531 | |
3532 | /* We used to test optimize here, on the grounds that it's better to |
3533 | produce a smaller program when -O is not used. But this causes |
3534 | such a terrible slowdown sometimes that it seems better to always |
3535 | use synth_mult. */ |
3536 | |
3537 | /* Special case powers of two. */ |
3538 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) |
3539 | && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) |
3540 | return expand_shift (code: LSHIFT_EXPR, mode, shifted: op0, |
3541 | amount: floor_log2 (x: coeff), target, unsignedp); |
3542 | |
3543 | fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
3544 | |
3545 | /* Attempt to handle multiplication of DImode values by negative |
3546 | coefficients, by performing the multiplication by a positive |
3547 | multiplier and then inverting the result. */ |
3548 | if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) |
3549 | { |
3550 | /* Its safe to use -coeff even for INT_MIN, as the |
3551 | result is interpreted as an unsigned coefficient. |
3552 | Exclude cost of op0 from max_cost to match the cost |
3553 | calculation of the synth_mult. */ |
3554 | coeff = -(unsigned HOST_WIDE_INT) coeff; |
3555 | max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), |
3556 | mode, speed_p: speed) |
3557 | - neg_cost (speed, mode)); |
3558 | if (max_cost <= 0) |
3559 | goto skip_synth; |
3560 | |
3561 | /* Special case powers of two. */ |
3562 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) |
3563 | { |
3564 | rtx temp = expand_shift (code: LSHIFT_EXPR, mode, shifted: op0, |
3565 | amount: floor_log2 (x: coeff), target, unsignedp); |
3566 | return expand_unop (mode, neg_optab, temp, target, 0); |
3567 | } |
3568 | |
3569 | if (choose_mult_variant (mode, val: coeff, alg: &algorithm, variant: &variant, |
3570 | mult_cost: max_cost)) |
3571 | { |
3572 | rtx temp = expand_mult_const (mode, op0, val: coeff, NULL_RTX, |
3573 | alg: &algorithm, variant); |
3574 | return expand_unop (mode, neg_optab, temp, target, 0); |
3575 | } |
3576 | goto skip_synth; |
3577 | } |
3578 | |
3579 | /* Exclude cost of op0 from max_cost to match the cost |
3580 | calculation of the synth_mult. */ |
3581 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed_p: speed); |
3582 | if (choose_mult_variant (mode, val: coeff, alg: &algorithm, variant: &variant, mult_cost: max_cost)) |
3583 | return expand_mult_const (mode, op0, val: coeff, target, |
3584 | alg: &algorithm, variant); |
3585 | } |
3586 | skip_synth: |
3587 | |
3588 | /* Expand x*2.0 as x+x. */ |
3589 | if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1) |
3590 | && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2)) |
3591 | { |
3592 | op0 = force_reg (GET_MODE (op0), op0); |
3593 | return expand_binop (mode, add_optab, op0, op0, |
3594 | target, unsignedp, |
3595 | no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN); |
3596 | } |
3597 | |
3598 | /* This used to use umul_optab if unsigned, but for non-widening multiply |
3599 | there is no difference between signed and unsigned. */ |
3600 | op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, |
3601 | op0, op1, target, unsignedp, |
3602 | no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN); |
3603 | gcc_assert (op0 || no_libcall); |
3604 | return op0; |
3605 | } |
3606 | |
3607 | /* Return a cost estimate for multiplying a register by the given |
3608 | COEFFicient in the given MODE and SPEED. */ |
3609 | |
3610 | int |
3611 | mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed) |
3612 | { |
3613 | int max_cost; |
3614 | struct algorithm algorithm; |
3615 | enum mult_variant variant; |
3616 | |
3617 | rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
3618 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), |
3619 | mode, speed_p: speed); |
3620 | if (choose_mult_variant (mode, val: coeff, alg: &algorithm, variant: &variant, mult_cost: max_cost)) |
3621 | return algorithm.cost.cost; |
3622 | else |
3623 | return max_cost; |
3624 | } |
3625 | |
3626 | /* Perform a widening multiplication and return an rtx for the result. |
3627 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); |
3628 | TARGET is a suggestion for where to store the result (an rtx). |
3629 | THIS_OPTAB is the optab we should use, it must be either umul_widen_optab |
3630 | or smul_widen_optab. |
3631 | |
3632 | We check specially for a constant integer as OP1, comparing the |
3633 | cost of a widening multiply against the cost of a sequence of shifts |
3634 | and adds. */ |
3635 | |
3636 | rtx |
3637 | expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
3638 | int unsignedp, optab this_optab) |
3639 | { |
3640 | bool speed = optimize_insn_for_speed_p (); |
3641 | rtx cop1; |
3642 | |
3643 | if (CONST_INT_P (op1) |
3644 | && GET_MODE (op0) != VOIDmode |
3645 | && (cop1 = convert_modes (mode, GET_MODE (op0), x: op1, |
3646 | unsignedp: this_optab == umul_widen_optab)) |
3647 | && CONST_INT_P (cop1) |
3648 | && (INTVAL (cop1) >= 0 |
3649 | || HWI_COMPUTABLE_MODE_P (mode))) |
3650 | { |
3651 | HOST_WIDE_INT coeff = INTVAL (cop1); |
3652 | int max_cost; |
3653 | enum mult_variant variant; |
3654 | struct algorithm algorithm; |
3655 | |
3656 | if (coeff == 0) |
3657 | return CONST0_RTX (mode); |
3658 | |
3659 | /* Special case powers of two. */ |
3660 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) |
3661 | { |
3662 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); |
3663 | return expand_shift (code: LSHIFT_EXPR, mode, shifted: op0, |
3664 | amount: floor_log2 (x: coeff), target, unsignedp); |
3665 | } |
3666 | |
3667 | /* Exclude cost of op0 from max_cost to match the cost |
3668 | calculation of the synth_mult. */ |
3669 | max_cost = mul_widen_cost (speed, mode); |
3670 | if (choose_mult_variant (mode, val: coeff, alg: &algorithm, variant: &variant, |
3671 | mult_cost: max_cost)) |
3672 | { |
3673 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); |
3674 | return expand_mult_const (mode, op0, val: coeff, target, |
3675 | alg: &algorithm, variant); |
3676 | } |
3677 | } |
3678 | return expand_binop (mode, this_optab, op0, op1, target, |
3679 | unsignedp, OPTAB_LIB_WIDEN); |
3680 | } |
3681 | |
3682 | /* Choose a minimal N + 1 bit approximation to 1/D that can be used to |
3683 | replace division by D, and put the least significant N bits of the result |
3684 | in *MULTIPLIER_PTR and return the most significant bit. |
3685 | |
3686 | The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the |
3687 | needed precision is in PRECISION (should be <= N). |
3688 | |
3689 | PRECISION should be as small as possible so this function can choose |
3690 | multiplier more freely. |
3691 | |
3692 | The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that |
3693 | is to be used for a final right shift is placed in *POST_SHIFT_PTR. |
3694 | |
3695 | Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), |
3696 | where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ |
3697 | |
3698 | unsigned HOST_WIDE_INT |
3699 | choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, |
3700 | unsigned HOST_WIDE_INT *multiplier_ptr, |
3701 | int *post_shift_ptr, int *lgup_ptr) |
3702 | { |
3703 | int lgup, post_shift; |
3704 | int pow, pow2; |
3705 | |
3706 | /* lgup = ceil(log2(divisor)); */ |
3707 | lgup = ceil_log2 (x: d); |
3708 | |
3709 | gcc_assert (lgup <= n); |
3710 | |
3711 | pow = n + lgup; |
3712 | pow2 = n + lgup - precision; |
3713 | |
3714 | /* mlow = 2^(N + lgup)/d */ |
3715 | wide_int val = wi::set_bit_in_zero (bit: pow, HOST_BITS_PER_DOUBLE_INT); |
3716 | wide_int mlow = wi::udiv_trunc (x: val, y: d); |
3717 | |
3718 | /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */ |
3719 | val |= wi::set_bit_in_zero (bit: pow2, HOST_BITS_PER_DOUBLE_INT); |
3720 | wide_int mhigh = wi::udiv_trunc (x: val, y: d); |
3721 | |
3722 | /* If precision == N, then mlow, mhigh exceed 2^N |
3723 | (but they do not exceed 2^(N+1)). */ |
3724 | |
3725 | /* Reduce to lowest terms. */ |
3726 | for (post_shift = lgup; post_shift > 0; post_shift--) |
3727 | { |
3728 | unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (x: mlow, bitpos: 1, |
3729 | HOST_BITS_PER_WIDE_INT); |
3730 | unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (x: mhigh, bitpos: 1, |
3731 | HOST_BITS_PER_WIDE_INT); |
3732 | if (ml_lo >= mh_lo) |
3733 | break; |
3734 | |
3735 | mlow = wi::uhwi (val: ml_lo, HOST_BITS_PER_DOUBLE_INT); |
3736 | mhigh = wi::uhwi (val: mh_lo, HOST_BITS_PER_DOUBLE_INT); |
3737 | } |
3738 | |
3739 | *post_shift_ptr = post_shift; |
3740 | *lgup_ptr = lgup; |
3741 | if (n < HOST_BITS_PER_WIDE_INT) |
3742 | { |
3743 | unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1; |
3744 | *multiplier_ptr = mhigh.to_uhwi () & mask; |
3745 | return mhigh.to_uhwi () > mask; |
3746 | } |
3747 | else |
3748 | { |
3749 | *multiplier_ptr = mhigh.to_uhwi (); |
3750 | return wi::extract_uhwi (x: mhigh, HOST_BITS_PER_WIDE_INT, width: 1); |
3751 | } |
3752 | } |
3753 | |
3754 | /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is |
3755 | congruent to 1 (mod 2**N). */ |
3756 | |
3757 | static unsigned HOST_WIDE_INT |
3758 | invert_mod2n (unsigned HOST_WIDE_INT x, int n) |
3759 | { |
3760 | /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ |
3761 | |
3762 | /* The algorithm notes that the choice y = x satisfies |
3763 | x*y == 1 mod 2^3, since x is assumed odd. |
3764 | Each iteration doubles the number of bits of significance in y. */ |
3765 | |
3766 | unsigned HOST_WIDE_INT mask; |
3767 | unsigned HOST_WIDE_INT y = x; |
3768 | int nbit = 3; |
3769 | |
3770 | mask = (n == HOST_BITS_PER_WIDE_INT |
3771 | ? HOST_WIDE_INT_M1U |
3772 | : (HOST_WIDE_INT_1U << n) - 1); |
3773 | |
3774 | while (nbit < n) |
3775 | { |
3776 | y = y * (2 - x*y) & mask; /* Modulo 2^N */ |
3777 | nbit *= 2; |
3778 | } |
3779 | return y; |
3780 | } |
3781 | |
3782 | /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness |
3783 | flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the |
3784 | product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product |
3785 | to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to |
3786 | become signed. |
3787 | |
3788 | The result is put in TARGET if that is convenient. |
3789 | |
3790 | MODE is the mode of operation. */ |
3791 | |
3792 | rtx |
3793 | expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0, |
3794 | rtx op1, rtx target, int unsignedp) |
3795 | { |
3796 | rtx tem; |
3797 | enum rtx_code adj_code = unsignedp ? PLUS : MINUS; |
3798 | |
3799 | tem = expand_shift (code: RSHIFT_EXPR, mode, shifted: op0, |
3800 | amount: GET_MODE_BITSIZE (mode) - 1, NULL_RTX, unsignedp: 0); |
3801 | tem = expand_and (mode, tem, op1, NULL_RTX); |
3802 | adj_operand |
3803 | = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |
3804 | adj_operand); |
3805 | |
3806 | tem = expand_shift (code: RSHIFT_EXPR, mode, shifted: op1, |
3807 | amount: GET_MODE_BITSIZE (mode) - 1, NULL_RTX, unsignedp: 0); |
3808 | tem = expand_and (mode, tem, op0, NULL_RTX); |
3809 | target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |
3810 | target); |
3811 | |
3812 | return target; |
3813 | } |
3814 | |
3815 | /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ |
3816 | |
3817 | static rtx |
3818 | (scalar_int_mode mode, rtx op) |
3819 | { |
3820 | if (mode == word_mode) |
3821 | return gen_highpart (mode, op); |
3822 | |
3823 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (m: mode).require (); |
3824 | |
3825 | op = expand_shift (code: RSHIFT_EXPR, mode: wider_mode, shifted: op, |
3826 | amount: GET_MODE_BITSIZE (mode), target: 0, unsignedp: 1); |
3827 | return convert_modes (mode, oldmode: wider_mode, x: op, unsignedp: 0); |
3828 | } |
3829 | |
3830 | /* Like expmed_mult_highpart, but only consider using a multiplication |
3831 | optab. OP1 is an rtx for the constant operand. */ |
3832 | |
3833 | static rtx |
3834 | expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1, |
3835 | rtx target, int unsignedp, int max_cost) |
3836 | { |
3837 | rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); |
3838 | optab moptab; |
3839 | rtx tem; |
3840 | int size; |
3841 | bool speed = optimize_insn_for_speed_p (); |
3842 | |
3843 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (m: mode).require (); |
3844 | |
3845 | size = GET_MODE_BITSIZE (mode); |
3846 | |
3847 | /* Firstly, try using a multiplication insn that only generates the needed |
3848 | high part of the product, and in the sign flavor of unsignedp. */ |
3849 | if (mul_highpart_cost (speed, mode) < max_cost) |
3850 | { |
3851 | moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |
3852 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
3853 | unsignedp, OPTAB_DIRECT); |
3854 | if (tem) |
3855 | return tem; |
3856 | } |
3857 | |
3858 | /* Secondly, same as above, but use sign flavor opposite of unsignedp. |
3859 | Need to adjust the result after the multiplication. */ |
3860 | if (size - 1 < BITS_PER_WORD |
3861 | && (mul_highpart_cost (speed, mode) |
3862 | + 2 * shift_cost (speed, mode, bits: size-1) |
3863 | + 4 * add_cost (speed, mode) < max_cost)) |
3864 | { |
3865 | moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; |
3866 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
3867 | unsignedp, OPTAB_DIRECT); |
3868 | if (tem) |
3869 | /* We used the wrong signedness. Adjust the result. */ |
3870 | return expand_mult_highpart_adjust (mode, adj_operand: tem, op0, op1: narrow_op1, |
3871 | target: tem, unsignedp); |
3872 | } |
3873 | |
3874 | /* Try widening multiplication. */ |
3875 | moptab = unsignedp ? umul_widen_optab : smul_widen_optab; |
3876 | if (convert_optab_handler (op: moptab, to_mode: wider_mode, from_mode: mode) != CODE_FOR_nothing |
3877 | && mul_widen_cost (speed, mode: wider_mode) < max_cost) |
3878 | { |
3879 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, |
3880 | unsignedp, OPTAB_WIDEN); |
3881 | if (tem) |
3882 | return extract_high_half (mode, op: tem); |
3883 | } |
3884 | |
3885 | /* Try widening the mode and perform a non-widening multiplication. */ |
3886 | if (optab_handler (op: smul_optab, mode: wider_mode) != CODE_FOR_nothing |
3887 | && size - 1 < BITS_PER_WORD |
3888 | && (mul_cost (speed, mode: wider_mode) + shift_cost (speed, mode, bits: size-1) |
3889 | < max_cost)) |
3890 | { |
3891 | rtx_insn *insns; |
3892 | rtx wop0, wop1; |
3893 | |
3894 | /* We need to widen the operands, for example to ensure the |
3895 | constant multiplier is correctly sign or zero extended. |
3896 | Use a sequence to clean-up any instructions emitted by |
3897 | the conversions if things don't work out. */ |
3898 | start_sequence (); |
3899 | wop0 = convert_modes (mode: wider_mode, oldmode: mode, x: op0, unsignedp); |
3900 | wop1 = convert_modes (mode: wider_mode, oldmode: mode, x: op1, unsignedp); |
3901 | tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, |
3902 | unsignedp, OPTAB_WIDEN); |
3903 | insns = get_insns (); |
3904 | end_sequence (); |
3905 | |
3906 | if (tem) |
3907 | { |
3908 | emit_insn (insns); |
3909 | return extract_high_half (mode, op: tem); |
3910 | } |
3911 | } |
3912 | |
3913 | /* Try widening multiplication of opposite signedness, and adjust. */ |
3914 | moptab = unsignedp ? smul_widen_optab : umul_widen_optab; |
3915 | if (convert_optab_handler (op: moptab, to_mode: wider_mode, from_mode: mode) != CODE_FOR_nothing |
3916 | && size - 1 < BITS_PER_WORD |
3917 | && (mul_widen_cost (speed, mode: wider_mode) |
3918 | + 2 * shift_cost (speed, mode, bits: size-1) |
3919 | + 4 * add_cost (speed, mode) < max_cost)) |
3920 | { |
3921 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, |
3922 | NULL_RTX, ! unsignedp, OPTAB_WIDEN); |
3923 | if (tem != 0) |
3924 | { |
3925 | tem = extract_high_half (mode, op: tem); |
3926 | /* We used the wrong signedness. Adjust the result. */ |
3927 | return expand_mult_highpart_adjust (mode, adj_operand: tem, op0, op1: narrow_op1, |
3928 | target, unsignedp); |
3929 | } |
3930 | } |
3931 | |
3932 | return 0; |
3933 | } |
3934 | |
3935 | /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), |
3936 | putting the high half of the result in TARGET if that is convenient, |
3937 | and return where the result is. If the operation cannot be performed, |
3938 | 0 is returned. |
3939 | |
3940 | MODE is the mode of operation and result. |
3941 | |
3942 | UNSIGNEDP nonzero means unsigned multiply. |
3943 | |
3944 | MAX_COST is the total allowed cost for the expanded RTL. */ |
3945 | |
3946 | static rtx |
3947 | expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1, |
3948 | rtx target, int unsignedp, int max_cost) |
3949 | { |
3950 | unsigned HOST_WIDE_INT cnst1; |
3951 | int ; |
3952 | bool sign_adjust = false; |
3953 | enum mult_variant variant; |
3954 | struct algorithm alg; |
3955 | rtx tem; |
3956 | bool speed = optimize_insn_for_speed_p (); |
3957 | |
3958 | /* We can't support modes wider than HOST_BITS_PER_INT. */ |
3959 | gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); |
3960 | |
3961 | cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); |
3962 | |
3963 | /* We can't optimize modes wider than BITS_PER_WORD. |
3964 | ??? We might be able to perform double-word arithmetic if |
3965 | mode == word_mode, however all the cost calculations in |
3966 | synth_mult etc. assume single-word operations. */ |
3967 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (m: mode).require (); |
3968 | if (GET_MODE_BITSIZE (mode: wider_mode) > BITS_PER_WORD) |
3969 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
3970 | unsignedp, max_cost); |
3971 | |
3972 | extra_cost = shift_cost (speed, mode, bits: GET_MODE_BITSIZE (mode) - 1); |
3973 | |
3974 | /* Check whether we try to multiply by a negative constant. */ |
3975 | if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) |
3976 | { |
3977 | sign_adjust = true; |
3978 | extra_cost += add_cost (speed, mode); |
3979 | } |
3980 | |
3981 | /* See whether shift/add multiplication is cheap enough. */ |
3982 | if (choose_mult_variant (mode: wider_mode, val: cnst1, alg: &alg, variant: &variant, |
3983 | mult_cost: max_cost - extra_cost)) |
3984 | { |
3985 | /* See whether the specialized multiplication optabs are |
3986 | cheaper than the shift/add version. */ |
3987 | tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, |
3988 | max_cost: alg.cost.cost + extra_cost); |
3989 | if (tem) |
3990 | return tem; |
3991 | |
3992 | tem = convert_to_mode (wider_mode, op0, unsignedp); |
3993 | tem = expand_mult_const (mode: wider_mode, op0: tem, val: cnst1, target: 0, alg: &alg, variant); |
3994 | tem = extract_high_half (mode, op: tem); |
3995 | |
3996 | /* Adjust result for signedness. */ |
3997 | if (sign_adjust) |
3998 | tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); |
3999 | |
4000 | return tem; |
4001 | } |
4002 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
4003 | unsignedp, max_cost); |
4004 | } |
4005 | |
4006 | |
4007 | /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ |
4008 | |
4009 | static rtx |
4010 | expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d) |
4011 | { |
4012 | rtx result, temp, shift; |
4013 | rtx_code_label *label; |
4014 | int logd; |
4015 | int prec = GET_MODE_PRECISION (mode); |
4016 | |
4017 | logd = floor_log2 (x: d); |
4018 | result = gen_reg_rtx (mode); |
4019 | |
4020 | /* Avoid conditional branches when they're expensive. */ |
4021 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 |
4022 | && optimize_insn_for_speed_p ()) |
4023 | { |
4024 | rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, |
4025 | mode, 0, -1); |
4026 | if (signmask) |
4027 | { |
4028 | HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1; |
4029 | signmask = force_reg (mode, signmask); |
4030 | shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd); |
4031 | |
4032 | /* Use the rtx_cost of a LSHIFTRT instruction to determine |
4033 | which instruction sequence to use. If logical right shifts |
4034 | are expensive the use 2 XORs, 2 SUBs and an AND, otherwise |
4035 | use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ |
4036 | |
4037 | temp = gen_rtx_LSHIFTRT (mode, result, shift); |
4038 | if (optab_handler (op: lshr_optab, mode) == CODE_FOR_nothing |
4039 | || (set_src_cost (x: temp, mode, speed_p: optimize_insn_for_speed_p ()) |
4040 | > COSTS_N_INSNS (2))) |
4041 | { |
4042 | temp = expand_binop (mode, xor_optab, op0, signmask, |
4043 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4044 | temp = expand_binop (mode, sub_optab, temp, signmask, |
4045 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4046 | temp = expand_binop (mode, and_optab, temp, |
4047 | gen_int_mode (masklow, mode), |
4048 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4049 | temp = expand_binop (mode, xor_optab, temp, signmask, |
4050 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4051 | temp = expand_binop (mode, sub_optab, temp, signmask, |
4052 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4053 | } |
4054 | else |
4055 | { |
4056 | signmask = expand_binop (mode, lshr_optab, signmask, shift, |
4057 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4058 | signmask = force_reg (mode, signmask); |
4059 | |
4060 | temp = expand_binop (mode, add_optab, op0, signmask, |
4061 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4062 | temp = expand_binop (mode, and_optab, temp, |
4063 | gen_int_mode (masklow, mode), |
4064 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4065 | temp = expand_binop (mode, sub_optab, temp, signmask, |
4066 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4067 | } |
4068 | return temp; |
4069 | } |
4070 | } |
4071 | |
4072 | /* Mask contains the mode's signbit and the significant bits of the |
4073 | modulus. By including the signbit in the operation, many targets |
4074 | can avoid an explicit compare operation in the following comparison |
4075 | against zero. */ |
4076 | wide_int mask = wi::mask (width: logd, negate_p: false, precision: prec); |
4077 | mask = wi::set_bit (x: mask, bit: prec - 1); |
4078 | |
4079 | temp = expand_binop (mode, and_optab, op0, |
4080 | immed_wide_int_const (mask, mode), |
4081 | result, 1, OPTAB_LIB_WIDEN); |
4082 | if (temp != result) |
4083 | emit_move_insn (result, temp); |
4084 | |
4085 | label = gen_label_rtx (); |
4086 | do_cmp_and_jump (result, const0_rtx, GE, mode, label); |
4087 | |
4088 | temp = expand_binop (mode, sub_optab, result, const1_rtx, result, |
4089 | 0, OPTAB_LIB_WIDEN); |
4090 | |
4091 | mask = wi::mask (width: logd, negate_p: true, precision: prec); |
4092 | temp = expand_binop (mode, ior_optab, temp, |
4093 | immed_wide_int_const (mask, mode), |
4094 | result, 1, OPTAB_LIB_WIDEN); |
4095 | temp = expand_binop (mode, add_optab, temp, const1_rtx, result, |
4096 | 0, OPTAB_LIB_WIDEN); |
4097 | if (temp != result) |
4098 | emit_move_insn (result, temp); |
4099 | emit_label (label); |
4100 | return result; |
4101 | } |
4102 | |
4103 | /* Expand signed division of OP0 by a power of two D in mode MODE. |
4104 | This routine is only called for positive values of D. */ |
4105 | |
4106 | static rtx |
4107 | expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d) |
4108 | { |
4109 | rtx temp; |
4110 | rtx_code_label *label; |
4111 | int logd; |
4112 | |
4113 | logd = floor_log2 (x: d); |
4114 | |
4115 | if (d == 2 |
4116 | && BRANCH_COST (optimize_insn_for_speed_p (), |
4117 | false) >= 1) |
4118 | { |
4119 | temp = gen_reg_rtx (mode); |
4120 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); |
4121 | if (temp != NULL_RTX) |
4122 | { |
4123 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, |
4124 | 0, OPTAB_LIB_WIDEN); |
4125 | return expand_shift (code: RSHIFT_EXPR, mode, shifted: temp, amount: logd, NULL_RTX, unsignedp: 0); |
4126 | } |
4127 | } |
4128 | |
4129 | if (HAVE_conditional_move |
4130 | && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2) |
4131 | { |
4132 | rtx temp2; |
4133 | |
4134 | start_sequence (); |
4135 | temp2 = copy_to_mode_reg (mode, op0); |
4136 | temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode), |
4137 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
4138 | temp = force_reg (mode, temp); |
4139 | |
4140 | /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ |
4141 | temp2 = emit_conditional_move (temp2, { .code: LT, .op0: temp2, const0_rtx, .mode: mode }, |
4142 | temp, temp2, mode, 0); |
4143 | if (temp2) |
4144 | { |
4145 | rtx_insn *seq = get_insns (); |
4146 | end_sequence (); |
4147 | emit_insn (seq); |
4148 | return expand_shift (code: RSHIFT_EXPR, mode, shifted: temp2, amount: logd, NULL_RTX, unsignedp: 0); |
4149 | } |
4150 | end_sequence (); |
4151 | } |
4152 | |
4153 | if (BRANCH_COST (optimize_insn_for_speed_p (), |
4154 | false) >= 2) |
4155 | { |
4156 | int ushift = GET_MODE_BITSIZE (mode) - logd; |
4157 | |
4158 | temp = gen_reg_rtx (mode); |
4159 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); |
4160 | if (temp != NULL_RTX) |
4161 | { |
4162 | if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD |
4163 | || shift_cost (speed: optimize_insn_for_speed_p (), mode, bits: ushift) |
4164 | > COSTS_N_INSNS (1)) |
4165 | temp = expand_binop (mode, and_optab, temp, |
4166 | gen_int_mode (d - 1, mode), |
4167 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
4168 | else |
4169 | temp = expand_shift (code: RSHIFT_EXPR, mode, shifted: temp, |
4170 | amount: ushift, NULL_RTX, unsignedp: 1); |
4171 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, |
4172 | 0, OPTAB_LIB_WIDEN); |
4173 | return expand_shift (code: RSHIFT_EXPR, mode, shifted: temp, amount: logd, NULL_RTX, unsignedp: 0); |
4174 | } |
4175 | } |
4176 | |
4177 | label = gen_label_rtx (); |
4178 | temp = copy_to_mode_reg (mode, op0); |
4179 | do_cmp_and_jump (temp, const0_rtx, GE, mode, label); |
4180 | expand_inc (target: temp, inc: gen_int_mode (d - 1, mode)); |
4181 | emit_label (label); |
4182 | return expand_shift (code: RSHIFT_EXPR, mode, shifted: temp, amount: logd, NULL_RTX, unsignedp: 0); |
4183 | } |
4184 | |
4185 | /* Emit the code to divide OP0 by OP1, putting the result in TARGET |
4186 | if that is convenient, and returning where the result is. |
4187 | You may request either the quotient or the remainder as the result; |
4188 | specify REM_FLAG nonzero to get the remainder. |
4189 | |
4190 | CODE is the expression code for which kind of division this is; |
4191 | it controls how rounding is done. MODE is the machine mode to use. |
4192 | UNSIGNEDP nonzero means do unsigned division. */ |
4193 | |
4194 | /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI |
4195 | and then correct it by or'ing in missing high bits |
4196 | if result of ANDI is nonzero. |
4197 | For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. |
4198 | This could optimize to a bfexts instruction. |
4199 | But C doesn't use these operations, so their optimizations are |
4200 | left for later. */ |
4201 | /* ??? For modulo, we don't actually need the highpart of the first product, |
4202 | the low part will do nicely. And for small divisors, the second multiply |
4203 | can also be a low-part only multiply or even be completely left out. |
4204 | E.g. to calculate the remainder of a division by 3 with a 32 bit |
4205 | multiply, multiply with 0x55555556 and extract the upper two bits; |
4206 | the result is exact for inputs up to 0x1fffffff. |
4207 | The input range can be reduced by using cross-sum rules. |
4208 | For odd divisors >= 3, the following table gives right shift counts |
4209 | so that if a number is shifted by an integer multiple of the given |
4210 | amount, the remainder stays the same: |
4211 | 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, |
4212 | 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, |
4213 | 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, |
4214 | 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, |
4215 | 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 |
4216 | |
4217 | Cross-sum rules for even numbers can be derived by leaving as many bits |
4218 | to the right alone as the divisor has zeros to the right. |
4219 | E.g. if x is an unsigned 32 bit number: |
4220 | (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 |
4221 | */ |
4222 | |
4223 | rtx |
4224 | expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, |
4225 | rtx op0, rtx op1, rtx target, int unsignedp, |
4226 | enum optab_methods methods) |
4227 | { |
4228 | machine_mode compute_mode; |
4229 | rtx tquotient; |
4230 | rtx quotient = 0, remainder = 0; |
4231 | rtx_insn *last; |
4232 | rtx_insn *insn; |
4233 | optab optab1, optab2; |
4234 | int op1_is_constant, op1_is_pow2 = 0; |
4235 | int max_cost, ; |
4236 | static HOST_WIDE_INT last_div_const = 0; |
4237 | bool speed = optimize_insn_for_speed_p (); |
4238 | |
4239 | op1_is_constant = CONST_INT_P (op1); |
4240 | if (op1_is_constant) |
4241 | { |
4242 | wide_int ext_op1 = rtx_mode_t (op1, mode); |
4243 | op1_is_pow2 = (wi::popcount (ext_op1) == 1 |
4244 | || (! unsignedp |
4245 | && wi::popcount (wi::neg (x: ext_op1)) == 1)); |
4246 | } |
4247 | |
4248 | /* |
4249 | This is the structure of expand_divmod: |
4250 | |
4251 | First comes code to fix up the operands so we can perform the operations |
4252 | correctly and efficiently. |
4253 | |
4254 | Second comes a switch statement with code specific for each rounding mode. |
4255 | For some special operands this code emits all RTL for the desired |
4256 | operation, for other cases, it generates only a quotient and stores it in |
4257 | QUOTIENT. The case for trunc division/remainder might leave quotient = 0, |
4258 | to indicate that it has not done anything. |
4259 | |
4260 | Last comes code that finishes the operation. If QUOTIENT is set and |
4261 | REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If |
4262 | QUOTIENT is not set, it is computed using trunc rounding. |
4263 | |
4264 | We try to generate special code for division and remainder when OP1 is a |
4265 | constant. If |OP1| = 2**n we can use shifts and some other fast |
4266 | operations. For other values of OP1, we compute a carefully selected |
4267 | fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 |
4268 | by m. |
4269 | |
4270 | In all cases but EXACT_DIV_EXPR, this multiplication requires the upper |
4271 | half of the product. Different strategies for generating the product are |
4272 | implemented in expmed_mult_highpart. |
4273 | |
4274 | If what we actually want is the remainder, we generate that by another |
4275 | by-constant multiplication and a subtraction. */ |
4276 | |
4277 | /* We shouldn't be called with OP1 == const1_rtx, but some of the |
4278 | code below will malfunction if we are, so check here and handle |
4279 | the special case if so. */ |
4280 | if (op1 == const1_rtx) |
4281 | return rem_flag ? const0_rtx : op0; |
4282 | |
4283 | /* When dividing by -1, we could get an overflow. |
4284 | negv_optab can handle overflows. */ |
4285 | if (! unsignedp && op1 == constm1_rtx) |
4286 | { |
4287 | if (rem_flag) |
4288 | return const0_rtx; |
4289 | return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT |
4290 | ? negv_optab : neg_optab, op0, target, 0); |
4291 | } |
4292 | |
4293 | if (target |
4294 | /* Don't use the function value register as a target |
4295 | since we have to read it as well as write it, |
4296 | and function-inlining gets confused by this. */ |
4297 | && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) |
4298 | /* Don't clobber an operand while doing a multi-step calculation. */ |
4299 | || ((rem_flag || op1_is_constant) |
4300 | && (reg_mentioned_p (target, op0) |
4301 | || (MEM_P (op0) && MEM_P (target)))) |
4302 | || reg_mentioned_p (target, op1) |
4303 | || (MEM_P (op1) && MEM_P (target)))) |
4304 | target = 0; |
4305 | |
4306 | /* Get the mode in which to perform this computation. Normally it will |
4307 | be MODE, but sometimes we can't do the desired operation in MODE. |
4308 | If so, pick a wider mode in which we can do the operation. Convert |
4309 | to that mode at the start to avoid repeated conversions. |
4310 | |
4311 | First see what operations we need. These depend on the expression |
4312 | we are evaluating. (We assume that divxx3 insns exist under the |
4313 | same conditions that modxx3 insns and that these insns don't normally |
4314 | fail. If these assumptions are not correct, we may generate less |
4315 | efficient code in some cases.) |
4316 | |
4317 | Then see if we find a mode in which we can open-code that operation |
4318 | (either a division, modulus, or shift). Finally, check for the smallest |
4319 | mode for which we can do the operation with a library call. */ |
4320 | |
4321 | /* We might want to refine this now that we have division-by-constant |
4322 | optimization. Since expmed_mult_highpart tries so many variants, it is |
4323 | not straightforward to generalize this. Maybe we should make an array |
4324 | of possible modes in init_expmed? Save this for GCC 2.7. */ |
4325 | |
4326 | optab1 = (op1_is_pow2 |
4327 | ? (unsignedp ? lshr_optab : ashr_optab) |
4328 | : (unsignedp ? udiv_optab : sdiv_optab)); |
4329 | optab2 = (op1_is_pow2 ? optab1 |
4330 | : (unsignedp ? udivmod_optab : sdivmod_optab)); |
4331 | |
4332 | if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN) |
4333 | { |
4334 | FOR_EACH_MODE_FROM (compute_mode, mode) |
4335 | if (optab_handler (op: optab1, mode: compute_mode) != CODE_FOR_nothing |
4336 | || optab_handler (op: optab2, mode: compute_mode) != CODE_FOR_nothing) |
4337 | break; |
4338 | |
4339 | if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN) |
4340 | FOR_EACH_MODE_FROM (compute_mode, mode) |
4341 | if (optab_libfunc (optab1, compute_mode) |
4342 | || optab_libfunc (optab2, compute_mode)) |
4343 | break; |
4344 | } |
4345 | else |
4346 | compute_mode = mode; |
4347 | |
4348 | /* If we still couldn't find a mode, use MODE, but expand_binop will |
4349 | probably die. */ |
4350 | if (compute_mode == VOIDmode) |
4351 | compute_mode = mode; |
4352 | |
4353 | if (target && GET_MODE (target) == compute_mode) |
4354 | tquotient = target; |
4355 | else |
4356 | tquotient = gen_reg_rtx (compute_mode); |
4357 | |
4358 | #if 0 |
4359 | /* It should be possible to restrict the precision to GET_MODE_BITSIZE |
4360 | (mode), and thereby get better code when OP1 is a constant. Do that |
4361 | later. It will require going over all usages of SIZE below. */ |
4362 | size = GET_MODE_BITSIZE (mode); |
4363 | #endif |
4364 | |
4365 | /* Only deduct something for a REM if the last divide done was |
4366 | for a different constant. Then set the constant of the last |
4367 | divide. */ |
4368 | max_cost = (unsignedp |
4369 | ? udiv_cost (speed, mode: compute_mode) |
4370 | : sdiv_cost (speed, mode: compute_mode)); |
4371 | if (rem_flag && ! (last_div_const != 0 && op1_is_constant |
4372 | && INTVAL (op1) == last_div_const)) |
4373 | max_cost -= (mul_cost (speed, mode: compute_mode) |
4374 | + add_cost (speed, mode: compute_mode)); |
4375 | |
4376 | last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; |
4377 | |
4378 | /* Now convert to the best mode to use. */ |
4379 | if (compute_mode != mode) |
4380 | { |
4381 | op0 = convert_modes (mode: compute_mode, oldmode: mode, x: op0, unsignedp); |
4382 | op1 = convert_modes (mode: compute_mode, oldmode: mode, x: op1, unsignedp); |
4383 | |
4384 | /* convert_modes may have placed op1 into a register, so we |
4385 | must recompute the following. */ |
4386 | op1_is_constant = CONST_INT_P (op1); |
4387 | if (op1_is_constant) |
4388 | { |
4389 | wide_int ext_op1 = rtx_mode_t (op1, compute_mode); |
4390 | op1_is_pow2 = (wi::popcount (ext_op1) == 1 |
4391 | || (! unsignedp |
4392 | && wi::popcount (wi::neg (x: ext_op1)) == 1)); |
4393 | } |
4394 | else |
4395 | op1_is_pow2 = 0; |
4396 | } |
4397 | |
4398 | /* If one of the operands is a volatile MEM, copy it into a register. */ |
4399 | |
4400 | if (MEM_P (op0) && MEM_VOLATILE_P (op0)) |
4401 | op0 = force_reg (compute_mode, op0); |
4402 | if (MEM_P (op1) && MEM_VOLATILE_P (op1)) |
4403 | op1 = force_reg (compute_mode, op1); |
4404 | |
4405 | /* If we need the remainder or if OP1 is constant, we need to |
4406 | put OP0 in a register in case it has any queued subexpressions. */ |
4407 | if (rem_flag || op1_is_constant) |
4408 | op0 = force_reg (compute_mode, op0); |
4409 | |
4410 | last = get_last_insn (); |
4411 | |
4412 | /* Promote floor rounding to trunc rounding for unsigned operations. */ |
4413 | if (unsignedp) |
4414 | { |
4415 | if (code == FLOOR_DIV_EXPR) |
4416 | code = TRUNC_DIV_EXPR; |
4417 | if (code == FLOOR_MOD_EXPR) |
4418 | code = TRUNC_MOD_EXPR; |
4419 | if (code == EXACT_DIV_EXPR && op1_is_pow2) |
4420 | code = TRUNC_DIV_EXPR; |
4421 | } |
4422 | |
4423 | if (op1 != const0_rtx) |
4424 | switch (code) |
4425 | { |
4426 | case TRUNC_MOD_EXPR: |
4427 | case TRUNC_DIV_EXPR: |
4428 | if (op1_is_constant) |
4429 | { |
4430 | scalar_int_mode int_mode = as_a <scalar_int_mode> (m: compute_mode); |
4431 | int size = GET_MODE_BITSIZE (mode: int_mode); |
4432 | if (unsignedp) |
4433 | { |
4434 | unsigned HOST_WIDE_INT mh, ml; |
4435 | int pre_shift, post_shift; |
4436 | int dummy; |
4437 | wide_int wd = rtx_mode_t (op1, int_mode); |
4438 | unsigned HOST_WIDE_INT d = wd.to_uhwi (); |
4439 | |
4440 | if (wi::popcount (wd) == 1) |
4441 | { |
4442 | pre_shift = floor_log2 (x: d); |
4443 | if (rem_flag) |
4444 | { |
4445 | unsigned HOST_WIDE_INT mask |
4446 | = (HOST_WIDE_INT_1U << pre_shift) - 1; |
4447 | remainder |
4448 | = expand_binop (int_mode, and_optab, op0, |
4449 | gen_int_mode (mask, int_mode), |
4450 | remainder, 1, methods); |
4451 | if (remainder) |
4452 | return gen_lowpart (mode, remainder); |
4453 | } |
4454 | quotient = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4455 | amount: pre_shift, target: tquotient, unsignedp: 1); |
4456 | } |
4457 | else if (size <= HOST_BITS_PER_WIDE_INT) |
4458 | { |
4459 | if (d >= (HOST_WIDE_INT_1U << (size - 1))) |
4460 | { |
4461 | /* Most significant bit of divisor is set; emit an scc |
4462 | insn. */ |
4463 | quotient = emit_store_flag_force (tquotient, GEU, op0, op1, |
4464 | int_mode, 1, 1); |
4465 | } |
4466 | else |
4467 | { |
4468 | /* Find a suitable multiplier and right shift count |
4469 | instead of multiplying with D. */ |
4470 | |
4471 | mh = choose_multiplier (d, n: size, precision: size, |
4472 | multiplier_ptr: &ml, post_shift_ptr: &post_shift, lgup_ptr: &dummy); |
4473 | |
4474 | /* If the suggested multiplier is more than SIZE bits, |
4475 | we can do better for even divisors, using an |
4476 | initial right shift. */ |
4477 | if (mh != 0 && (d & 1) == 0) |
4478 | { |
4479 | pre_shift = ctz_or_zero (x: d); |
4480 | mh = choose_multiplier (d: d >> pre_shift, n: size, |
4481 | precision: size - pre_shift, |
4482 | multiplier_ptr: &ml, post_shift_ptr: &post_shift, lgup_ptr: &dummy); |
4483 | gcc_assert (!mh); |
4484 | } |
4485 | else |
4486 | pre_shift = 0; |
4487 | |
4488 | if (mh != 0) |
4489 | { |
4490 | rtx t1, t2, t3, t4; |
4491 | |
4492 | if (post_shift - 1 >= BITS_PER_WORD) |
4493 | goto fail1; |
4494 | |
4495 | extra_cost |
4496 | = (shift_cost (speed, mode: int_mode, bits: post_shift - 1) |
4497 | + shift_cost (speed, mode: int_mode, bits: 1) |
4498 | + 2 * add_cost (speed, mode: int_mode)); |
4499 | t1 = expmed_mult_highpart |
4500 | (mode: int_mode, op0, op1: gen_int_mode (ml, int_mode), |
4501 | NULL_RTX, unsignedp: 1, max_cost: max_cost - extra_cost); |
4502 | if (t1 == 0) |
4503 | goto fail1; |
4504 | t2 = force_operand (gen_rtx_MINUS (int_mode, |
4505 | op0, t1), |
4506 | NULL_RTX); |
4507 | t3 = expand_shift (code: RSHIFT_EXPR, mode: int_mode, |
4508 | shifted: t2, amount: 1, NULL_RTX, unsignedp: 1); |
4509 | t4 = force_operand (gen_rtx_PLUS (int_mode, |
4510 | t1, t3), |
4511 | NULL_RTX); |
4512 | quotient = expand_shift |
4513 | (code: RSHIFT_EXPR, mode: int_mode, shifted: t4, |
4514 | amount: post_shift - 1, target: tquotient, unsignedp: 1); |
4515 | } |
4516 | else |
4517 | { |
4518 | rtx t1, t2; |
4519 | |
4520 | if (pre_shift >= BITS_PER_WORD |
4521 | || post_shift >= BITS_PER_WORD) |
4522 | goto fail1; |
4523 | |
4524 | t1 = expand_shift |
4525 | (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4526 | amount: pre_shift, NULL_RTX, unsignedp: 1); |
4527 | extra_cost |
4528 | = (shift_cost (speed, mode: int_mode, bits: pre_shift) |
4529 | + shift_cost (speed, mode: int_mode, bits: post_shift)); |
4530 | t2 = expmed_mult_highpart |
4531 | (mode: int_mode, op0: t1, |
4532 | op1: gen_int_mode (ml, int_mode), |
4533 | NULL_RTX, unsignedp: 1, max_cost: max_cost - extra_cost); |
4534 | if (t2 == 0) |
4535 | goto fail1; |
4536 | quotient = expand_shift |
4537 | (code: RSHIFT_EXPR, mode: int_mode, shifted: t2, |
4538 | amount: post_shift, target: tquotient, unsignedp: 1); |
4539 | } |
4540 | } |
4541 | } |
4542 | else /* Too wide mode to use tricky code */ |
4543 | break; |
4544 | |
4545 | insn = get_last_insn (); |
4546 | if (insn != last) |
4547 | set_dst_reg_note (insn, REG_EQUAL, |
4548 | gen_rtx_UDIV (int_mode, op0, op1), |
4549 | quotient); |
4550 | } |
4551 | else /* TRUNC_DIV, signed */ |
4552 | { |
4553 | unsigned HOST_WIDE_INT ml; |
4554 | int lgup, post_shift; |
4555 | rtx mlr; |
4556 | HOST_WIDE_INT d = INTVAL (op1); |
4557 | unsigned HOST_WIDE_INT abs_d; |
4558 | |
4559 | /* Not prepared to handle division/remainder by |
4560 | 0xffffffffffffffff8000000000000000 etc. */ |
4561 | if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT) |
4562 | break; |
4563 | |
4564 | /* Since d might be INT_MIN, we have to cast to |
4565 | unsigned HOST_WIDE_INT before negating to avoid |
4566 | undefined signed overflow. */ |
4567 | abs_d = (d >= 0 |
4568 | ? (unsigned HOST_WIDE_INT) d |
4569 | : - (unsigned HOST_WIDE_INT) d); |
4570 | |
4571 | /* n rem d = n rem -d */ |
4572 | if (rem_flag && d < 0) |
4573 | { |
4574 | d = abs_d; |
4575 | op1 = gen_int_mode (abs_d, int_mode); |
4576 | } |
4577 | |
4578 | if (d == 1) |
4579 | quotient = op0; |
4580 | else if (d == -1) |
4581 | quotient = expand_unop (int_mode, neg_optab, op0, |
4582 | tquotient, 0); |
4583 | else if (size <= HOST_BITS_PER_WIDE_INT |
4584 | && abs_d == HOST_WIDE_INT_1U << (size - 1)) |
4585 | { |
4586 | /* This case is not handled correctly below. */ |
4587 | quotient = emit_store_flag (tquotient, EQ, op0, op1, |
4588 | int_mode, 1, 1); |
4589 | if (quotient == 0) |
4590 | goto fail1; |
4591 | } |
4592 | else if (EXACT_POWER_OF_2_OR_ZERO_P (d) |
4593 | && (size <= HOST_BITS_PER_WIDE_INT || d >= 0) |
4594 | && (rem_flag |
4595 | ? smod_pow2_cheap (speed, mode: int_mode) |
4596 | : sdiv_pow2_cheap (speed, mode: int_mode)) |
4597 | /* We assume that cheap metric is true if the |
4598 | optab has an expander for this mode. */ |
4599 | && ((optab_handler (op: (rem_flag ? smod_optab |
4600 | : sdiv_optab), |
4601 | mode: int_mode) |
4602 | != CODE_FOR_nothing) |
4603 | || (optab_handler (op: sdivmod_optab, mode: int_mode) |
4604 | != CODE_FOR_nothing))) |
4605 | ; |
4606 | else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) |
4607 | { |
4608 | if (rem_flag) |
4609 | { |
4610 | remainder = expand_smod_pow2 (mode: int_mode, op0, d); |
4611 | if (remainder) |
4612 | return gen_lowpart (mode, remainder); |
4613 | } |
4614 | |
4615 | if (sdiv_pow2_cheap (speed, mode: int_mode) |
4616 | && ((optab_handler (op: sdiv_optab, mode: int_mode) |
4617 | != CODE_FOR_nothing) |
4618 | || (optab_handler (op: sdivmod_optab, mode: int_mode) |
4619 | != CODE_FOR_nothing))) |
4620 | quotient = expand_divmod (rem_flag: 0, code: TRUNC_DIV_EXPR, |
4621 | mode: int_mode, op0, |
4622 | op1: gen_int_mode (abs_d, |
4623 | int_mode), |
4624 | NULL_RTX, unsignedp: 0); |
4625 | else |
4626 | quotient = expand_sdiv_pow2 (mode: int_mode, op0, d: abs_d); |
4627 | |
4628 | /* We have computed OP0 / abs(OP1). If OP1 is negative, |
4629 | negate the quotient. */ |
4630 | if (d < 0) |
4631 | { |
4632 | insn = get_last_insn (); |
4633 | if (insn != last |
4634 | && abs_d < (HOST_WIDE_INT_1U |
4635 | << (HOST_BITS_PER_WIDE_INT - 1))) |
4636 | set_dst_reg_note (insn, REG_EQUAL, |
4637 | gen_rtx_DIV (int_mode, op0, |
4638 | gen_int_mode |
4639 | (abs_d, |
4640 | int_mode)), |
4641 | quotient); |
4642 | |
4643 | quotient = expand_unop (int_mode, neg_optab, |
4644 | quotient, quotient, 0); |
4645 | } |
4646 | } |
4647 | else if (size <= HOST_BITS_PER_WIDE_INT) |
4648 | { |
4649 | choose_multiplier (d: abs_d, n: size, precision: size - 1, |
4650 | multiplier_ptr: &ml, post_shift_ptr: &post_shift, lgup_ptr: &lgup); |
4651 | if (ml < HOST_WIDE_INT_1U << (size - 1)) |
4652 | { |
4653 | rtx t1, t2, t3; |
4654 | |
4655 | if (post_shift >= BITS_PER_WORD |
4656 | || size - 1 >= BITS_PER_WORD) |
4657 | goto fail1; |
4658 | |
4659 | extra_cost = (shift_cost (speed, mode: int_mode, bits: post_shift) |
4660 | + shift_cost (speed, mode: int_mode, bits: size - 1) |
4661 | + add_cost (speed, mode: int_mode)); |
4662 | t1 = expmed_mult_highpart |
4663 | (mode: int_mode, op0, op1: gen_int_mode (ml, int_mode), |
4664 | NULL_RTX, unsignedp: 0, max_cost: max_cost - extra_cost); |
4665 | if (t1 == 0) |
4666 | goto fail1; |
4667 | t2 = expand_shift |
4668 | (code: RSHIFT_EXPR, mode: int_mode, shifted: t1, |
4669 | amount: post_shift, NULL_RTX, unsignedp: 0); |
4670 | t3 = expand_shift |
4671 | (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4672 | amount: size - 1, NULL_RTX, unsignedp: 0); |
4673 | if (d < 0) |
4674 | quotient |
4675 | = force_operand (gen_rtx_MINUS (int_mode, t3, t2), |
4676 | tquotient); |
4677 | else |
4678 | quotient |
4679 | = force_operand (gen_rtx_MINUS (int_mode, t2, t3), |
4680 | tquotient); |
4681 | } |
4682 | else |
4683 | { |
4684 | rtx t1, t2, t3, t4; |
4685 | |
4686 | if (post_shift >= BITS_PER_WORD |
4687 | || size - 1 >= BITS_PER_WORD) |
4688 | goto fail1; |
4689 | |
4690 | ml |= HOST_WIDE_INT_M1U << (size - 1); |
4691 | mlr = gen_int_mode (ml, int_mode); |
4692 | extra_cost = (shift_cost (speed, mode: int_mode, bits: post_shift) |
4693 | + shift_cost (speed, mode: int_mode, bits: size - 1) |
4694 | + 2 * add_cost (speed, mode: int_mode)); |
4695 | t1 = expmed_mult_highpart (mode: int_mode, op0, op1: mlr, |
4696 | NULL_RTX, unsignedp: 0, |
4697 | max_cost: max_cost - extra_cost); |
4698 | if (t1 == 0) |
4699 | goto fail1; |
4700 | t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0), |
4701 | NULL_RTX); |
4702 | t3 = expand_shift |
4703 | (code: RSHIFT_EXPR, mode: int_mode, shifted: t2, |
4704 | amount: post_shift, NULL_RTX, unsignedp: 0); |
4705 | t4 = expand_shift |
4706 | (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4707 | amount: size - 1, NULL_RTX, unsignedp: 0); |
4708 | if (d < 0) |
4709 | quotient |
4710 | = force_operand (gen_rtx_MINUS (int_mode, t4, t3), |
4711 | tquotient); |
4712 | else |
4713 | quotient |
4714 | = force_operand (gen_rtx_MINUS (int_mode, t3, t4), |
4715 | tquotient); |
4716 | } |
4717 | } |
4718 | else /* Too wide mode to use tricky code */ |
4719 | break; |
4720 | |
4721 | insn = get_last_insn (); |
4722 | if (insn != last) |
4723 | set_dst_reg_note (insn, REG_EQUAL, |
4724 | gen_rtx_DIV (int_mode, op0, op1), |
4725 | quotient); |
4726 | } |
4727 | break; |
4728 | } |
4729 | fail1: |
4730 | delete_insns_since (last); |
4731 | break; |
4732 | |
4733 | case FLOOR_DIV_EXPR: |
4734 | case FLOOR_MOD_EXPR: |
4735 | /* We will come here only for signed operations. */ |
4736 | if (op1_is_constant && HWI_COMPUTABLE_MODE_P (mode: compute_mode)) |
4737 | { |
4738 | scalar_int_mode int_mode = as_a <scalar_int_mode> (m: compute_mode); |
4739 | int size = GET_MODE_BITSIZE (mode: int_mode); |
4740 | unsigned HOST_WIDE_INT mh, ml; |
4741 | int pre_shift, lgup, post_shift; |
4742 | HOST_WIDE_INT d = INTVAL (op1); |
4743 | |
4744 | if (d > 0) |
4745 | { |
4746 | /* We could just as easily deal with negative constants here, |
4747 | but it does not seem worth the trouble for GCC 2.6. */ |
4748 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) |
4749 | { |
4750 | pre_shift = floor_log2 (x: d); |
4751 | if (rem_flag) |
4752 | { |
4753 | unsigned HOST_WIDE_INT mask |
4754 | = (HOST_WIDE_INT_1U << pre_shift) - 1; |
4755 | remainder = expand_binop |
4756 | (int_mode, and_optab, op0, |
4757 | gen_int_mode (mask, int_mode), |
4758 | remainder, 0, methods); |
4759 | if (remainder) |
4760 | return gen_lowpart (mode, remainder); |
4761 | } |
4762 | quotient = expand_shift |
4763 | (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4764 | amount: pre_shift, target: tquotient, unsignedp: 0); |
4765 | } |
4766 | else |
4767 | { |
4768 | rtx t1, t2, t3, t4; |
4769 | |
4770 | mh = choose_multiplier (d, n: size, precision: size - 1, |
4771 | multiplier_ptr: &ml, post_shift_ptr: &post_shift, lgup_ptr: &lgup); |
4772 | gcc_assert (!mh); |
4773 | |
4774 | if (post_shift < BITS_PER_WORD |
4775 | && size - 1 < BITS_PER_WORD) |
4776 | { |
4777 | t1 = expand_shift |
4778 | (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4779 | amount: size - 1, NULL_RTX, unsignedp: 0); |
4780 | t2 = expand_binop (int_mode, xor_optab, op0, t1, |
4781 | NULL_RTX, 0, OPTAB_WIDEN); |
4782 | extra_cost = (shift_cost (speed, mode: int_mode, bits: post_shift) |
4783 | + shift_cost (speed, mode: int_mode, bits: size - 1) |
4784 | + 2 * add_cost (speed, mode: int_mode)); |
4785 | t3 = expmed_mult_highpart |
4786 | (mode: int_mode, op0: t2, op1: gen_int_mode (ml, int_mode), |
4787 | NULL_RTX, unsignedp: 1, max_cost: max_cost - extra_cost); |
4788 | if (t3 != 0) |
4789 | { |
4790 | t4 = expand_shift |
4791 | (code: RSHIFT_EXPR, mode: int_mode, shifted: t3, |
4792 | amount: post_shift, NULL_RTX, unsignedp: 1); |
4793 | quotient = expand_binop (int_mode, xor_optab, |
4794 | t4, t1, tquotient, 0, |
4795 | OPTAB_WIDEN); |
4796 | } |
4797 | } |
4798 | } |
4799 | } |
4800 | else |
4801 | { |
4802 | rtx nsign, t1, t2, t3, t4; |
4803 | t1 = force_operand (gen_rtx_PLUS (int_mode, |
4804 | op0, constm1_rtx), NULL_RTX); |
4805 | t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX, |
4806 | 0, OPTAB_WIDEN); |
4807 | nsign = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: t2, |
4808 | amount: size - 1, NULL_RTX, unsignedp: 0); |
4809 | t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign), |
4810 | NULL_RTX); |
4811 | t4 = expand_divmod (rem_flag: 0, code: TRUNC_DIV_EXPR, mode: int_mode, op0: t3, op1, |
4812 | NULL_RTX, unsignedp: 0); |
4813 | if (t4) |
4814 | { |
4815 | rtx t5; |
4816 | t5 = expand_unop (int_mode, one_cmpl_optab, nsign, |
4817 | NULL_RTX, 0); |
4818 | quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5), |
4819 | tquotient); |
4820 | } |
4821 | } |
4822 | } |
4823 | |
4824 | if (quotient != 0) |
4825 | break; |
4826 | delete_insns_since (last); |
4827 | |
4828 | /* Try using an instruction that produces both the quotient and |
4829 | remainder, using truncation. We can easily compensate the quotient |
4830 | or remainder to get floor rounding, once we have the remainder. |
4831 | Notice that we compute also the final remainder value here, |
4832 | and return the result right away. */ |
4833 | if (target == 0 || GET_MODE (target) != compute_mode) |
4834 | target = gen_reg_rtx (compute_mode); |
4835 | |
4836 | if (rem_flag) |
4837 | { |
4838 | remainder |
4839 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
4840 | quotient = gen_reg_rtx (compute_mode); |
4841 | } |
4842 | else |
4843 | { |
4844 | quotient |
4845 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
4846 | remainder = gen_reg_rtx (compute_mode); |
4847 | } |
4848 | |
4849 | if (expand_twoval_binop (sdivmod_optab, op0, op1, |
4850 | quotient, remainder, 0)) |
4851 | { |
4852 | /* This could be computed with a branch-less sequence. |
4853 | Save that for later. */ |
4854 | rtx tem; |
4855 | rtx_code_label *label = gen_label_rtx (); |
4856 | do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); |
4857 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4858 | NULL_RTX, 0, OPTAB_WIDEN); |
4859 | do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); |
4860 | expand_dec (target: quotient, const1_rtx); |
4861 | expand_inc (target: remainder, inc: op1); |
4862 | emit_label (label); |
4863 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
4864 | } |
4865 | |
4866 | /* No luck with division elimination or divmod. Have to do it |
4867 | by conditionally adjusting op0 *and* the result. */ |
4868 | { |
4869 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
4870 | rtx adjusted_op0; |
4871 | rtx tem; |
4872 | |
4873 | quotient = gen_reg_rtx (compute_mode); |
4874 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); |
4875 | label1 = gen_label_rtx (); |
4876 | label2 = gen_label_rtx (); |
4877 | label3 = gen_label_rtx (); |
4878 | label4 = gen_label_rtx (); |
4879 | label5 = gen_label_rtx (); |
4880 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4881 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); |
4882 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4883 | quotient, 0, methods); |
4884 | if (tem != quotient) |
4885 | emit_move_insn (quotient, tem); |
4886 | emit_jump_insn (targetm.gen_jump (label5)); |
4887 | emit_barrier (); |
4888 | emit_label (label1); |
4889 | expand_inc (target: adjusted_op0, const1_rtx); |
4890 | emit_jump_insn (targetm.gen_jump (label4)); |
4891 | emit_barrier (); |
4892 | emit_label (label2); |
4893 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); |
4894 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4895 | quotient, 0, methods); |
4896 | if (tem != quotient) |
4897 | emit_move_insn (quotient, tem); |
4898 | emit_jump_insn (targetm.gen_jump (label5)); |
4899 | emit_barrier (); |
4900 | emit_label (label3); |
4901 | expand_dec (target: adjusted_op0, const1_rtx); |
4902 | emit_label (label4); |
4903 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4904 | quotient, 0, methods); |
4905 | if (tem != quotient) |
4906 | emit_move_insn (quotient, tem); |
4907 | expand_dec (target: quotient, const1_rtx); |
4908 | emit_label (label5); |
4909 | } |
4910 | break; |
4911 | |
4912 | case CEIL_DIV_EXPR: |
4913 | case CEIL_MOD_EXPR: |
4914 | if (unsignedp) |
4915 | { |
4916 | if (op1_is_constant |
4917 | && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) |
4918 | && (HWI_COMPUTABLE_MODE_P (mode: compute_mode) |
4919 | || INTVAL (op1) >= 0)) |
4920 | { |
4921 | scalar_int_mode int_mode |
4922 | = as_a <scalar_int_mode> (m: compute_mode); |
4923 | rtx t1, t2, t3; |
4924 | unsigned HOST_WIDE_INT d = INTVAL (op1); |
4925 | t1 = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4926 | amount: floor_log2 (x: d), target: tquotient, unsignedp: 1); |
4927 | t2 = expand_binop (int_mode, and_optab, op0, |
4928 | gen_int_mode (d - 1, int_mode), |
4929 | NULL_RTX, 1, methods); |
4930 | t3 = gen_reg_rtx (int_mode); |
4931 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1); |
4932 | if (t3 == 0) |
4933 | { |
4934 | rtx_code_label *lab; |
4935 | lab = gen_label_rtx (); |
4936 | do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab); |
4937 | expand_inc (target: t1, const1_rtx); |
4938 | emit_label (lab); |
4939 | quotient = t1; |
4940 | } |
4941 | else |
4942 | quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3), |
4943 | tquotient); |
4944 | break; |
4945 | } |
4946 | |
4947 | /* Try using an instruction that produces both the quotient and |
4948 | remainder, using truncation. We can easily compensate the |
4949 | quotient or remainder to get ceiling rounding, once we have the |
4950 | remainder. Notice that we compute also the final remainder |
4951 | value here, and return the result right away. */ |
4952 | if (target == 0 || GET_MODE (target) != compute_mode) |
4953 | target = gen_reg_rtx (compute_mode); |
4954 | |
4955 | if (rem_flag) |
4956 | { |
4957 | remainder = (REG_P (target) |
4958 | ? target : gen_reg_rtx (compute_mode)); |
4959 | quotient = gen_reg_rtx (compute_mode); |
4960 | } |
4961 | else |
4962 | { |
4963 | quotient = (REG_P (target) |
4964 | ? target : gen_reg_rtx (compute_mode)); |
4965 | remainder = gen_reg_rtx (compute_mode); |
4966 | } |
4967 | |
4968 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, |
4969 | remainder, 1)) |
4970 | { |
4971 | /* This could be computed with a branch-less sequence. |
4972 | Save that for later. */ |
4973 | rtx_code_label *label = gen_label_rtx (); |
4974 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4975 | compute_mode, label); |
4976 | expand_inc (target: quotient, const1_rtx); |
4977 | expand_dec (target: remainder, dec: op1); |
4978 | emit_label (label); |
4979 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
4980 | } |
4981 | |
4982 | /* No luck with division elimination or divmod. Have to do it |
4983 | by conditionally adjusting op0 *and* the result. */ |
4984 | { |
4985 | rtx_code_label *label1, *label2; |
4986 | rtx adjusted_op0, tem; |
4987 | |
4988 | quotient = gen_reg_rtx (compute_mode); |
4989 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); |
4990 | label1 = gen_label_rtx (); |
4991 | label2 = gen_label_rtx (); |
4992 | do_cmp_and_jump (adjusted_op0, const0_rtx, NE, |
4993 | compute_mode, label1); |
4994 | emit_move_insn (quotient, const0_rtx); |
4995 | emit_jump_insn (targetm.gen_jump (label2)); |
4996 | emit_barrier (); |
4997 | emit_label (label1); |
4998 | expand_dec (target: adjusted_op0, const1_rtx); |
4999 | tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, |
5000 | quotient, 1, methods); |
5001 | if (tem != quotient) |
5002 | emit_move_insn (quotient, tem); |
5003 | expand_inc (target: quotient, const1_rtx); |
5004 | emit_label (label2); |
5005 | } |
5006 | } |
5007 | else /* signed */ |
5008 | { |
5009 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) |
5010 | && INTVAL (op1) >= 0) |
5011 | { |
5012 | /* This is extremely similar to the code for the unsigned case |
5013 | above. For 2.7 we should merge these variants, but for |
5014 | 2.6.1 I don't want to touch the code for unsigned since that |
5015 | get used in C. The signed case will only be used by other |
5016 | languages (Ada). */ |
5017 | |
5018 | rtx t1, t2, t3; |
5019 | unsigned HOST_WIDE_INT d = INTVAL (op1); |
5020 | t1 = expand_shift (code: RSHIFT_EXPR, mode: compute_mode, shifted: op0, |
5021 | amount: floor_log2 (x: d), target: tquotient, unsignedp: 0); |
5022 | t2 = expand_binop (compute_mode, and_optab, op0, |
5023 | gen_int_mode (d - 1, compute_mode), |
5024 | NULL_RTX, 1, methods); |
5025 | t3 = gen_reg_rtx (compute_mode); |
5026 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, |
5027 | compute_mode, 1, 1); |
5028 | if (t3 == 0) |
5029 | { |
5030 | rtx_code_label *lab; |
5031 | lab = gen_label_rtx (); |
5032 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
5033 | expand_inc (target: t1, const1_rtx); |
5034 | emit_label (lab); |
5035 | quotient = t1; |
5036 | } |
5037 | else |
5038 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
5039 | t1, t3), |
5040 | tquotient); |
5041 | break; |
5042 | } |
5043 | |
5044 | /* Try using an instruction that produces both the quotient and |
5045 | remainder, using truncation. We can easily compensate the |
5046 | quotient or remainder to get ceiling rounding, once we have the |
5047 | remainder. Notice that we compute also the final remainder |
5048 | value here, and return the result right away. */ |
5049 | if (target == 0 || GET_MODE (target) != compute_mode) |
5050 | target = gen_reg_rtx (compute_mode); |
5051 | if (rem_flag) |
5052 | { |
5053 | remainder= (REG_P (target) |
5054 | ? target : gen_reg_rtx (compute_mode)); |
5055 | quotient = gen_reg_rtx (compute_mode); |
5056 | } |
5057 | else |
5058 | { |
5059 | quotient = (REG_P (target) |
5060 | ? target : gen_reg_rtx (compute_mode)); |
5061 | remainder = gen_reg_rtx (compute_mode); |
5062 | } |
5063 | |
5064 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, |
5065 | remainder, 0)) |
5066 | { |
5067 | /* This could be computed with a branch-less sequence. |
5068 | Save that for later. */ |
5069 | rtx tem; |
5070 | rtx_code_label *label = gen_label_rtx (); |
5071 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
5072 | compute_mode, label); |
5073 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
5074 | NULL_RTX, 0, OPTAB_WIDEN); |
5075 | do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); |
5076 | expand_inc (target: quotient, const1_rtx); |
5077 | expand_dec (target: remainder, dec: op1); |
5078 | emit_label (label); |
5079 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
5080 | } |
5081 | |
5082 | /* No luck with division elimination or divmod. Have to do it |
5083 | by conditionally adjusting op0 *and* the result. */ |
5084 | { |
5085 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
5086 | rtx adjusted_op0; |
5087 | rtx tem; |
5088 | |
5089 | quotient = gen_reg_rtx (compute_mode); |
5090 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); |
5091 | label1 = gen_label_rtx (); |
5092 | label2 = gen_label_rtx (); |
5093 | label3 = gen_label_rtx (); |
5094 | label4 = gen_label_rtx (); |
5095 | label5 = gen_label_rtx (); |
5096 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
5097 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, |
5098 | compute_mode, label1); |
5099 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
5100 | quotient, 0, methods); |
5101 | if (tem != quotient) |
5102 | emit_move_insn (quotient, tem); |
5103 | emit_jump_insn (targetm.gen_jump (label5)); |
5104 | emit_barrier (); |
5105 | emit_label (label1); |
5106 | expand_dec (target: adjusted_op0, const1_rtx); |
5107 | emit_jump_insn (targetm.gen_jump (label4)); |
5108 | emit_barrier (); |
5109 | emit_label (label2); |
5110 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, |
5111 | compute_mode, label3); |
5112 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
5113 | quotient, 0, methods); |
5114 | if (tem != quotient) |
5115 | emit_move_insn (quotient, tem); |
5116 | emit_jump_insn (targetm.gen_jump (label5)); |
5117 | emit_barrier (); |
5118 | emit_label (label3); |
5119 | expand_inc (target: adjusted_op0, const1_rtx); |
5120 | emit_label (label4); |
5121 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
5122 | quotient, 0, methods); |
5123 | if (tem != quotient) |
5124 | emit_move_insn (quotient, tem); |
5125 | expand_inc (target: quotient, const1_rtx); |
5126 | emit_label (label5); |
5127 | } |
5128 | } |
5129 | break; |
5130 | |
5131 | case EXACT_DIV_EXPR: |
5132 | if (op1_is_constant && HWI_COMPUTABLE_MODE_P (mode: compute_mode)) |
5133 | { |
5134 | scalar_int_mode int_mode = as_a <scalar_int_mode> (m: compute_mode); |
5135 | int size = GET_MODE_BITSIZE (mode: int_mode); |
5136 | HOST_WIDE_INT d = INTVAL (op1); |
5137 | unsigned HOST_WIDE_INT ml; |
5138 | int pre_shift; |
5139 | rtx t1; |
5140 | |
5141 | pre_shift = ctz_or_zero (x: d); |
5142 | ml = invert_mod2n (x: d >> pre_shift, n: size); |
5143 | t1 = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
5144 | amount: pre_shift, NULL_RTX, unsignedp); |
5145 | quotient = expand_mult (mode: int_mode, op0: t1, op1: gen_int_mode (ml, int_mode), |
5146 | NULL_RTX, unsignedp: 1); |
5147 | |
5148 | insn = get_last_insn (); |
5149 | set_dst_reg_note (insn, REG_EQUAL, |
5150 | gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, |
5151 | int_mode, op0, op1), |
5152 | quotient); |
5153 | } |
5154 | break; |
5155 | |
5156 | case ROUND_DIV_EXPR: |
5157 | case ROUND_MOD_EXPR: |
5158 | if (unsignedp) |
5159 | { |
5160 | scalar_int_mode int_mode = as_a <scalar_int_mode> (m: compute_mode); |
5161 | rtx tem; |
5162 | rtx_code_label *label; |
5163 | label = gen_label_rtx (); |
5164 | quotient = gen_reg_rtx (int_mode); |
5165 | remainder = gen_reg_rtx (int_mode); |
5166 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) |
5167 | { |
5168 | rtx tem; |
5169 | quotient = expand_binop (int_mode, udiv_optab, op0, op1, |
5170 | quotient, 1, methods); |
5171 | tem = expand_mult (mode: int_mode, op0: quotient, op1, NULL_RTX, unsignedp: 1); |
5172 | remainder = expand_binop (int_mode, sub_optab, op0, tem, |
5173 | remainder, 1, methods); |
5174 | } |
5175 | tem = plus_constant (int_mode, op1, -1); |
5176 | tem = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: tem, amount: 1, NULL_RTX, unsignedp: 1); |
5177 | do_cmp_and_jump (remainder, tem, LEU, int_mode, label); |
5178 | expand_inc (target: quotient, const1_rtx); |
5179 | expand_dec (target: remainder, dec: op1); |
5180 | emit_label (label); |
5181 | } |
5182 | else |
5183 | { |
5184 | scalar_int_mode int_mode = as_a <scalar_int_mode> (m: compute_mode); |
5185 | int size = GET_MODE_BITSIZE (mode: int_mode); |
5186 | rtx abs_rem, abs_op1, tem, mask; |
5187 | rtx_code_label *label; |
5188 | label = gen_label_rtx (); |
5189 | quotient = gen_reg_rtx (int_mode); |
5190 | remainder = gen_reg_rtx (int_mode); |
5191 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) |
5192 | { |
5193 | rtx tem; |
5194 | quotient = expand_binop (int_mode, sdiv_optab, op0, op1, |
5195 | quotient, 0, methods); |
5196 | tem = expand_mult (mode: int_mode, op0: quotient, op1, NULL_RTX, unsignedp: 0); |
5197 | remainder = expand_binop (int_mode, sub_optab, op0, tem, |
5198 | remainder, 0, methods); |
5199 | } |
5200 | abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0); |
5201 | abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0); |
5202 | tem = expand_shift (code: LSHIFT_EXPR, mode: int_mode, shifted: abs_rem, |
5203 | amount: 1, NULL_RTX, unsignedp: 1); |
5204 | do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label); |
5205 | tem = expand_binop (int_mode, xor_optab, op0, op1, |
5206 | NULL_RTX, 0, OPTAB_WIDEN); |
5207 | mask = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: tem, |
5208 | amount: size - 1, NULL_RTX, unsignedp: 0); |
5209 | tem = expand_binop (int_mode, xor_optab, mask, const1_rtx, |
5210 | NULL_RTX, 0, OPTAB_WIDEN); |
5211 | tem = expand_binop (int_mode, sub_optab, tem, mask, |
5212 | NULL_RTX, 0, OPTAB_WIDEN); |
5213 | expand_inc (target: quotient, inc: tem); |
5214 | tem = expand_binop (int_mode, xor_optab, mask, op1, |
5215 | NULL_RTX, 0, OPTAB_WIDEN); |
5216 | tem = expand_binop (int_mode, sub_optab, tem, mask, |
5217 | NULL_RTX, 0, OPTAB_WIDEN); |
5218 | expand_dec (target: remainder, dec: tem); |
5219 | emit_label (label); |
5220 | } |
5221 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
5222 | |
5223 | default: |
5224 | gcc_unreachable (); |
5225 | } |
5226 | |
5227 | if (quotient == 0) |
5228 | { |
5229 | if (target && GET_MODE (target) != compute_mode) |
5230 | target = 0; |
5231 | |
5232 | if (rem_flag) |
5233 | { |
5234 | /* Try to produce the remainder without producing the quotient. |
5235 | If we seem to have a divmod pattern that does not require widening, |
5236 | don't try widening here. We should really have a WIDEN argument |
5237 | to expand_twoval_binop, since what we'd really like to do here is |
5238 | 1) try a mod insn in compute_mode |
5239 | 2) try a divmod insn in compute_mode |
5240 | 3) try a div insn in compute_mode and multiply-subtract to get |
5241 | remainder |
5242 | 4) try the same things with widening allowed. */ |
5243 | remainder |
5244 | = sign_expand_binop (compute_mode, umod_optab, smod_optab, |
5245 | op0, op1, target, |
5246 | unsignedp, |
5247 | ((optab_handler (op: optab2, mode: compute_mode) |
5248 | != CODE_FOR_nothing) |
5249 | ? OPTAB_DIRECT : OPTAB_WIDEN)); |
5250 | if (remainder == 0) |
5251 | { |
5252 | /* No luck there. Can we do remainder and divide at once |
5253 | without a library call? */ |
5254 | remainder = gen_reg_rtx (compute_mode); |
5255 | if (! expand_twoval_binop ((unsignedp |
5256 | ? udivmod_optab |
5257 | : sdivmod_optab), |
5258 | op0, op1, |
5259 | NULL_RTX, remainder, unsignedp)) |
5260 | remainder = 0; |
5261 | } |
5262 | |
5263 | if (remainder) |
5264 | return gen_lowpart (mode, remainder); |
5265 | } |
5266 | |
5267 | /* Produce the quotient. Try a quotient insn, but not a library call. |
5268 | If we have a divmod in this mode, use it in preference to widening |
5269 | the div (for this test we assume it will not fail). Note that optab2 |
5270 | is set to the one of the two optabs that the call below will use. */ |
5271 | quotient |
5272 | = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, |
5273 | op0, op1, rem_flag ? NULL_RTX : target, |
5274 | unsignedp, |
5275 | ((optab_handler (op: optab2, mode: compute_mode) |
5276 | != CODE_FOR_nothing) |
5277 | ? OPTAB_DIRECT : OPTAB_WIDEN)); |
5278 | |
5279 | if (quotient == 0) |
5280 | { |
5281 | /* No luck there. Try a quotient-and-remainder insn, |
5282 | keeping the quotient alone. */ |
5283 | quotient = gen_reg_rtx (compute_mode); |
5284 | if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, |
5285 | op0, op1, |
5286 | quotient, NULL_RTX, unsignedp)) |
5287 | { |
5288 | quotient = 0; |
5289 | if (! rem_flag) |
5290 | /* Still no luck. If we are not computing the remainder, |
5291 | use a library call for the quotient. */ |
5292 | quotient = sign_expand_binop (compute_mode, |
5293 | udiv_optab, sdiv_optab, |
5294 | op0, op1, target, |
5295 | unsignedp, methods); |
5296 | } |
5297 | } |
5298 | } |
5299 | |
5300 | if (rem_flag) |
5301 | { |
5302 | if (target && GET_MODE (target) != compute_mode) |
5303 | target = 0; |
5304 | |
5305 | if (quotient == 0) |
5306 | { |
5307 | /* No divide instruction either. Use library for remainder. */ |
5308 | remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, |
5309 | op0, op1, target, |
5310 | unsignedp, methods); |
5311 | /* No remainder function. Try a quotient-and-remainder |
5312 | function, keeping the remainder. */ |
5313 | if (!remainder |
5314 | && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN)) |
5315 | { |
5316 | remainder = gen_reg_rtx (compute_mode); |
5317 | if (!expand_twoval_binop_libfunc |
5318 | (unsignedp ? udivmod_optab : sdivmod_optab, |
5319 | op0, op1, |
5320 | NULL_RTX, remainder, |
5321 | unsignedp ? UMOD : MOD)) |
5322 | remainder = NULL_RTX; |
5323 | } |
5324 | } |
5325 | else |
5326 | { |
5327 | /* We divided. Now finish doing X - Y * (X / Y). */ |
5328 | remainder = expand_mult (mode: compute_mode, op0: quotient, op1, |
5329 | NULL_RTX, unsignedp); |
5330 | remainder = expand_binop (compute_mode, sub_optab, op0, |
5331 | remainder, target, unsignedp, |
5332 | methods); |
5333 | } |
5334 | } |
5335 | |
5336 | if (methods != OPTAB_LIB_WIDEN |
5337 | && (rem_flag ? remainder : quotient) == NULL_RTX) |
5338 | return NULL_RTX; |
5339 | |
5340 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
5341 | } |
5342 | |
5343 | /* Return a tree node with data type TYPE, describing the value of X. |
5344 | Usually this is an VAR_DECL, if there is no obvious better choice. |
5345 | X may be an expression, however we only support those expressions |
5346 | generated by loop.c. */ |
5347 | |
5348 | tree |
5349 | make_tree (tree type, rtx x) |
5350 | { |
5351 | tree t; |
5352 | |
5353 | switch (GET_CODE (x)) |
5354 | { |
5355 | case CONST_INT: |
5356 | case CONST_WIDE_INT: |
5357 | t = wide_int_to_tree (type, cst: rtx_mode_t (x, TYPE_MODE (type))); |
5358 | return t; |
5359 | |
5360 | case CONST_DOUBLE: |
5361 | STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT); |
5362 | if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode) |
5363 | t = wide_int_to_tree (type, |
5364 | cst: wide_int::from_array (val: &CONST_DOUBLE_LOW (x), len: 2, |
5365 | HOST_BITS_PER_WIDE_INT * 2)); |
5366 | else |
5367 | t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x)); |
5368 | |
5369 | return t; |
5370 | |
5371 | case CONST_VECTOR: |
5372 | { |
5373 | unsigned int npatterns = CONST_VECTOR_NPATTERNS (x); |
5374 | unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); |
5375 | tree itype = TREE_TYPE (type); |
5376 | |
5377 | /* Build a tree with vector elements. */ |
5378 | tree_vector_builder elts (type, npatterns, nelts_per_pattern); |
5379 | unsigned int count = elts.encoded_nelts (); |
5380 | for (unsigned int i = 0; i < count; ++i) |
5381 | { |
5382 | rtx elt = CONST_VECTOR_ELT (x, i); |
5383 | elts.quick_push (obj: make_tree (type: itype, x: elt)); |
5384 | } |
5385 | |
5386 | return elts.build (); |
5387 | } |
5388 | |
5389 | case PLUS: |
5390 | return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5391 | make_tree (type, XEXP (x, 1))); |
5392 | |
5393 | case MINUS: |
5394 | return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5395 | make_tree (type, XEXP (x, 1))); |
5396 | |
5397 | case NEG: |
5398 | return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); |
5399 | |
5400 | case MULT: |
5401 | return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5402 | make_tree (type, XEXP (x, 1))); |
5403 | |
5404 | case ASHIFT: |
5405 | return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5406 | make_tree (type, XEXP (x, 1))); |
5407 | |
5408 | case LSHIFTRT: |
5409 | t = unsigned_type_for (type); |
5410 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5411 | make_tree (t, XEXP (x, 0)), |
5412 | make_tree (type, XEXP (x, 1)))); |
5413 | |
5414 | case ASHIFTRT: |
5415 | t = signed_type_for (type); |
5416 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5417 | make_tree (t, XEXP (x, 0)), |
5418 | make_tree (type, XEXP (x, 1)))); |
5419 | |
5420 | case DIV: |
5421 | if (TREE_CODE (type) != REAL_TYPE) |
5422 | t = signed_type_for (type); |
5423 | else |
5424 | t = type; |
5425 | |
5426 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5427 | make_tree (t, XEXP (x, 0)), |
5428 | make_tree (t, XEXP (x, 1)))); |
5429 | case UDIV: |
5430 | t = unsigned_type_for (type); |
5431 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5432 | make_tree (t, XEXP (x, 0)), |
5433 | make_tree (t, XEXP (x, 1)))); |
5434 | |
5435 | case SIGN_EXTEND: |
5436 | case ZERO_EXTEND: |
5437 | t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), |
5438 | GET_CODE (x) == ZERO_EXTEND); |
5439 | return fold_convert (type, make_tree (t, XEXP (x, 0))); |
5440 | |
5441 | case CONST: |
5442 | return make_tree (type, XEXP (x, 0)); |
5443 | |
5444 | case SYMBOL_REF: |
5445 | t = SYMBOL_REF_DECL (x); |
5446 | if (t) |
5447 | return fold_convert (type, build_fold_addr_expr (t)); |
5448 | /* fall through. */ |
5449 | |
5450 | default: |
5451 | if (CONST_POLY_INT_P (x)) |
5452 | return wide_int_to_tree (type: t, cst: const_poly_int_value (x)); |
5453 | |
5454 | t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); |
5455 | |
5456 | /* If TYPE is a POINTER_TYPE, we might need to convert X from |
5457 | address mode to pointer mode. */ |
5458 | if (POINTER_TYPE_P (type)) |
5459 | x = convert_memory_address_addr_space |
5460 | (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); |
5461 | |
5462 | /* Note that we do *not* use SET_DECL_RTL here, because we do not |
5463 | want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ |
5464 | t->decl_with_rtl.rtl = x; |
5465 | |
5466 | return t; |
5467 | } |
5468 | } |
5469 | |
5470 | /* Compute the logical-and of OP0 and OP1, storing it in TARGET |
5471 | and returning TARGET. |
5472 | |
5473 | If TARGET is 0, a pseudo-register or constant is returned. */ |
5474 | |
5475 | rtx |
5476 | expand_and (machine_mode mode, rtx op0, rtx op1, rtx target) |
5477 | { |
5478 | rtx tem = 0; |
5479 | |
5480 | if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) |
5481 | tem = simplify_binary_operation (code: AND, mode, op0, op1); |
5482 | if (tem == 0) |
5483 | tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); |
5484 | |
5485 | if (target == 0) |
5486 | target = tem; |
5487 | else if (tem != target) |
5488 | emit_move_insn (target, tem); |
5489 | return target; |
5490 | } |
5491 | |
5492 | /* Helper function for emit_store_flag. */ |
5493 | rtx |
5494 | emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, |
5495 | machine_mode mode, machine_mode compare_mode, |
5496 | int unsignedp, rtx x, rtx y, int normalizep, |
5497 | machine_mode target_mode) |
5498 | { |
5499 | class expand_operand ops[4]; |
5500 | rtx op0, comparison, subtarget; |
5501 | rtx_insn *last; |
5502 | scalar_int_mode result_mode = targetm.cstore_mode (icode); |
5503 | scalar_int_mode int_target_mode; |
5504 | |
5505 | last = get_last_insn (); |
5506 | x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); |
5507 | y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); |
5508 | if (!x || !y) |
5509 | { |
5510 | delete_insns_since (last); |
5511 | return NULL_RTX; |
5512 | } |
5513 | |
5514 | if (target_mode == VOIDmode) |
5515 | int_target_mode = result_mode; |
5516 | else |
5517 | int_target_mode = as_a <scalar_int_mode> (m: target_mode); |
5518 | if (!target) |
5519 | target = gen_reg_rtx (int_target_mode); |
5520 | |
5521 | comparison = gen_rtx_fmt_ee (code, result_mode, x, y); |
5522 | |
5523 | create_output_operand (op: &ops[0], optimize ? NULL_RTX : target, mode: result_mode); |
5524 | create_fixed_operand (op: &ops[1], x: comparison); |
5525 | create_fixed_operand (op: &ops[2], x); |
5526 | create_fixed_operand (op: &ops[3], x: y); |
5527 | if (!maybe_expand_insn (icode, nops: 4, ops)) |
5528 | { |
5529 | delete_insns_since (last); |
5530 | return NULL_RTX; |
5531 | } |
5532 | subtarget = ops[0].value; |
5533 | |
5534 | /* If we are converting to a wider mode, first convert to |
5535 | INT_TARGET_MODE, then normalize. This produces better combining |
5536 | opportunities on machines that have a SIGN_EXTRACT when we are |
5537 | testing a single bit. This mostly benefits the 68k. |
5538 | |
5539 | If STORE_FLAG_VALUE does not have the sign bit set when |
5540 | interpreted in MODE, we can do this conversion as unsigned, which |
5541 | is usually more efficient. */ |
5542 | if (GET_MODE_PRECISION (mode: int_target_mode) > GET_MODE_PRECISION (mode: result_mode)) |
5543 | { |
5544 | gcc_assert (GET_MODE_PRECISION (result_mode) != 1 |
5545 | || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1); |
5546 | |
5547 | bool unsignedp = (STORE_FLAG_VALUE >= 0); |
5548 | convert_move (target, subtarget, unsignedp); |
5549 | |
5550 | op0 = target; |
5551 | result_mode = int_target_mode; |
5552 | } |
5553 | else |
5554 | op0 = subtarget; |
5555 | |
5556 | /* If we want to keep subexpressions around, don't reuse our last |
5557 | target. */ |
5558 | if (optimize) |
5559 | subtarget = 0; |
5560 | |
5561 | /* Now normalize to the proper value in MODE. Sometimes we don't |
5562 | have to do anything. */ |
5563 | if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) |
5564 | ; |
5565 | /* STORE_FLAG_VALUE might be the most negative number, so write |
5566 | the comparison this way to avoid a compiler-time warning. */ |
5567 | else if (- normalizep == STORE_FLAG_VALUE) |
5568 | op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); |
5569 | |
5570 | /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes |
5571 | it hard to use a value of just the sign bit due to ANSI integer |
5572 | constant typing rules. */ |
5573 | else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) |
5574 | op0 = expand_shift (code: RSHIFT_EXPR, mode: result_mode, shifted: op0, |
5575 | amount: GET_MODE_BITSIZE (mode: result_mode) - 1, target: subtarget, |
5576 | unsignedp: normalizep == 1); |
5577 | else |
5578 | { |
5579 | gcc_assert (STORE_FLAG_VALUE & 1); |
5580 | |
5581 | op0 = expand_and (mode: result_mode, op0, const1_rtx, target: subtarget); |
5582 | if (normalizep == -1) |
5583 | op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); |
5584 | } |
5585 | |
5586 | /* If we were converting to a smaller mode, do the conversion now. */ |
5587 | if (int_target_mode != result_mode) |
5588 | { |
5589 | convert_move (target, op0, 0); |
5590 | return target; |
5591 | } |
5592 | else |
5593 | return op0; |
5594 | } |
5595 | |
5596 | |
5597 | /* A subroutine of emit_store_flag only including "tricks" that do not |
5598 | need a recursive call. These are kept separate to avoid infinite |
5599 | loops. */ |
5600 | |
5601 | static rtx |
5602 | emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, |
5603 | machine_mode mode, int unsignedp, int normalizep, |
5604 | machine_mode target_mode) |
5605 | { |
5606 | rtx subtarget; |
5607 | enum insn_code icode; |
5608 | machine_mode compare_mode; |
5609 | enum mode_class mclass; |
5610 | enum rtx_code scode; |
5611 | |
5612 | if (unsignedp) |
5613 | code = unsigned_condition (code); |
5614 | scode = swap_condition (code); |
5615 | |
5616 | /* If one operand is constant, make it the second one. Only do this |
5617 | if the other operand is not constant as well. */ |
5618 | |
5619 | if (swap_commutative_operands_p (op0, op1)) |
5620 | { |
5621 | std::swap (a&: op0, b&: op1); |
5622 | code = swap_condition (code); |
5623 | } |
5624 | |
5625 | if (mode == VOIDmode) |
5626 | mode = GET_MODE (op0); |
5627 | |
5628 | if (CONST_SCALAR_INT_P (op1)) |
5629 | canonicalize_comparison (mode, &code, &op1); |
5630 | |
5631 | /* For some comparisons with 1 and -1, we can convert this to |
5632 | comparisons with zero. This will often produce more opportunities for |
5633 | store-flag insns. */ |
5634 | |
5635 | switch (code) |
5636 | { |
5637 | case LT: |
5638 | if (op1 == const1_rtx) |
5639 | op1 = const0_rtx, code = LE; |
5640 | break; |
5641 | case LE: |
5642 | if (op1 == constm1_rtx) |
5643 | op1 = const0_rtx, code = LT; |
5644 | break; |
5645 | case GE: |
5646 | if (op1 == const1_rtx) |
5647 | op1 = const0_rtx, code = GT; |
5648 | break; |
5649 | case GT: |
5650 | if (op1 == constm1_rtx) |
5651 | op1 = const0_rtx, code = GE; |
5652 | break; |
5653 | case GEU: |
5654 | if (op1 == const1_rtx) |
5655 | op1 = const0_rtx, code = NE; |
5656 | break; |
5657 | case LTU: |
5658 | if (op1 == const1_rtx) |
5659 | op1 = const0_rtx, code = EQ; |
5660 | break; |
5661 | default: |
5662 | break; |
5663 | } |
5664 | |
5665 | /* If this is A < 0 or A >= 0, we can do this by taking the ones |
5666 | complement of A (for GE) and shifting the sign bit to the low bit. */ |
5667 | scalar_int_mode int_mode; |
5668 | if (op1 == const0_rtx && (code == LT || code == GE) |
5669 | && is_int_mode (mode, int_mode: &int_mode) |
5670 | && (normalizep || STORE_FLAG_VALUE == 1 |
5671 | || val_signbit_p (int_mode, STORE_FLAG_VALUE))) |
5672 | { |
5673 | scalar_int_mode int_target_mode; |
5674 | subtarget = target; |
5675 | |
5676 | if (!target) |
5677 | int_target_mode = int_mode; |
5678 | else |
5679 | { |
5680 | /* If the result is to be wider than OP0, it is best to convert it |
5681 | first. If it is to be narrower, it is *incorrect* to convert it |
5682 | first. */ |
5683 | int_target_mode = as_a <scalar_int_mode> (m: target_mode); |
5684 | if (GET_MODE_SIZE (mode: int_target_mode) > GET_MODE_SIZE (mode: int_mode)) |
5685 | { |
5686 | op0 = convert_modes (mode: int_target_mode, oldmode: int_mode, x: op0, unsignedp: 0); |
5687 | int_mode = int_target_mode; |
5688 | } |
5689 | } |
5690 | |
5691 | if (int_target_mode != int_mode) |
5692 | subtarget = 0; |
5693 | |
5694 | if (code == GE) |
5695 | op0 = expand_unop (int_mode, one_cmpl_optab, op0, |
5696 | ((STORE_FLAG_VALUE == 1 || normalizep) |
5697 | ? 0 : subtarget), 0); |
5698 | |
5699 | if (STORE_FLAG_VALUE == 1 || normalizep) |
5700 | /* If we are supposed to produce a 0/1 value, we want to do |
5701 | a logical shift from the sign bit to the low-order bit; for |
5702 | a -1/0 value, we do an arithmetic shift. */ |
5703 | op0 = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
5704 | amount: GET_MODE_BITSIZE (mode: int_mode) - 1, |
5705 | target: subtarget, unsignedp: normalizep != -1); |
5706 | |
5707 | if (int_mode != int_target_mode) |
5708 | op0 = convert_modes (mode: int_target_mode, oldmode: int_mode, x: op0, unsignedp: 0); |
5709 | |
5710 | return op0; |
5711 | } |
5712 | |
5713 | /* Next try expanding this via the backend's cstore<mode>4. */ |
5714 | mclass = GET_MODE_CLASS (mode); |
5715 | FOR_EACH_WIDER_MODE_FROM (compare_mode, mode) |
5716 | { |
5717 | machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; |
5718 | icode = optab_handler (op: cstore_optab, mode: optab_mode); |
5719 | if (icode != CODE_FOR_nothing) |
5720 | { |
5721 | do_pending_stack_adjust (); |
5722 | rtx tem = emit_cstore (target, icode, code, mode, compare_mode, |
5723 | unsignedp, x: op0, y: op1, normalizep, target_mode); |
5724 | if (tem) |
5725 | return tem; |
5726 | |
5727 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
5728 | { |
5729 | tem = emit_cstore (target, icode, code: scode, mode, compare_mode, |
5730 | unsignedp, x: op1, y: op0, normalizep, target_mode); |
5731 | if (tem) |
5732 | return tem; |
5733 | } |
5734 | break; |
5735 | } |
5736 | } |
5737 | |
5738 | /* If we are comparing a double-word integer with zero or -1, we can |
5739 | convert the comparison into one involving a single word. */ |
5740 | if (is_int_mode (mode, int_mode: &int_mode) |
5741 | && GET_MODE_BITSIZE (mode: int_mode) == BITS_PER_WORD * 2 |
5742 | && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) |
5743 | { |
5744 | rtx tem; |
5745 | if ((code == EQ || code == NE) |
5746 | && (op1 == const0_rtx || op1 == constm1_rtx)) |
5747 | { |
5748 | rtx op00, op01; |
5749 | |
5750 | /* Do a logical OR or AND of the two words and compare the |
5751 | result. */ |
5752 | op00 = simplify_gen_subreg (outermode: word_mode, op: op0, innermode: int_mode, byte: 0); |
5753 | op01 = simplify_gen_subreg (outermode: word_mode, op: op0, innermode: int_mode, UNITS_PER_WORD); |
5754 | tem = expand_binop (word_mode, |
5755 | op1 == const0_rtx ? ior_optab : and_optab, |
5756 | op00, op01, NULL_RTX, unsignedp, |
5757 | OPTAB_DIRECT); |
5758 | |
5759 | if (tem != 0) |
5760 | tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, |
5761 | unsignedp, normalizep); |
5762 | } |
5763 | else if ((code == LT || code == GE) && op1 == const0_rtx) |
5764 | { |
5765 | rtx op0h; |
5766 | |
5767 | /* If testing the sign bit, can just test on high word. */ |
5768 | op0h = simplify_gen_subreg (outermode: word_mode, op: op0, innermode: int_mode, |
5769 | byte: subreg_highpart_offset (outermode: word_mode, |
5770 | innermode: int_mode)); |
5771 | tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, |
5772 | unsignedp, normalizep); |
5773 | } |
5774 | else |
5775 | tem = NULL_RTX; |
5776 | |
5777 | if (tem) |
5778 | { |
5779 | if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) |
5780 | return tem; |
5781 | if (!target) |
5782 | target = gen_reg_rtx (target_mode); |
5783 | |
5784 | convert_move (target, tem, |
5785 | !val_signbit_known_set_p (word_mode, |
5786 | (normalizep ? normalizep |
5787 | : STORE_FLAG_VALUE))); |
5788 | return target; |
5789 | } |
5790 | } |
5791 | |
5792 | return 0; |
5793 | } |
5794 | |
5795 | /* Subroutine of emit_store_flag that handles cases in which the operands |
5796 | are scalar integers. SUBTARGET is the target to use for temporary |
5797 | operations and TRUEVAL is the value to store when the condition is |
5798 | true. All other arguments are as for emit_store_flag. */ |
5799 | |
5800 | rtx |
5801 | emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0, |
5802 | rtx op1, scalar_int_mode mode, int unsignedp, |
5803 | int normalizep, rtx trueval) |
5804 | { |
5805 | machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; |
5806 | rtx_insn *last = get_last_insn (); |
5807 | |
5808 | /* If this is an equality comparison of integers, we can try to exclusive-or |
5809 | (or subtract) the two operands and use a recursive call to try the |
5810 | comparison with zero. Don't do any of these cases if branches are |
5811 | very cheap. */ |
5812 | |
5813 | if ((code == EQ || code == NE) && op1 != const0_rtx) |
5814 | { |
5815 | rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, |
5816 | OPTAB_WIDEN); |
5817 | |
5818 | if (tem == 0) |
5819 | tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, |
5820 | OPTAB_WIDEN); |
5821 | if (tem != 0) |
5822 | tem = emit_store_flag (target, code, tem, const0_rtx, |
5823 | mode, unsignedp, normalizep); |
5824 | if (tem != 0) |
5825 | return tem; |
5826 | |
5827 | delete_insns_since (last); |
5828 | } |
5829 | |
5830 | /* For integer comparisons, try the reverse comparison. However, for |
5831 | small X and if we'd have anyway to extend, implementing "X != 0" |
5832 | as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ |
5833 | rtx_code rcode = reverse_condition (code); |
5834 | if (can_compare_p (rcode, mode, ccp_store_flag) |
5835 | && ! (optab_handler (op: cstore_optab, mode) == CODE_FOR_nothing |
5836 | && code == NE |
5837 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD |
5838 | && op1 == const0_rtx)) |
5839 | { |
5840 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5841 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); |
5842 | |
5843 | /* Again, for the reverse comparison, use either an addition or a XOR. */ |
5844 | if (want_add |
5845 | && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, |
5846 | optimize_insn_for_speed_p ()) == 0) |
5847 | { |
5848 | rtx tem = emit_store_flag_1 (target: subtarget, code: rcode, op0, op1, mode, unsignedp: 0, |
5849 | STORE_FLAG_VALUE, target_mode); |
5850 | if (tem != 0) |
5851 | tem = expand_binop (target_mode, add_optab, tem, |
5852 | gen_int_mode (normalizep, target_mode), |
5853 | target, 0, OPTAB_WIDEN); |
5854 | if (tem != 0) |
5855 | return tem; |
5856 | } |
5857 | else if (!want_add |
5858 | && rtx_cost (trueval, mode, XOR, 1, |
5859 | optimize_insn_for_speed_p ()) == 0) |
5860 | { |
5861 | rtx tem = emit_store_flag_1 (target: subtarget, code: rcode, op0, op1, mode, unsignedp: 0, |
5862 | normalizep, target_mode); |
5863 | if (tem != 0) |
5864 | tem = expand_binop (target_mode, xor_optab, tem, trueval, target, |
5865 | INTVAL (trueval) >= 0, OPTAB_WIDEN); |
5866 | if (tem != 0) |
5867 | return tem; |
5868 | } |
5869 | |
5870 | delete_insns_since (last); |
5871 | } |
5872 | |
5873 | /* Some other cases we can do are EQ, NE, LE, and GT comparisons with |
5874 | the constant zero. Reject all other comparisons at this point. Only |
5875 | do LE and GT if branches are expensive since they are expensive on |
5876 | 2-operand machines. */ |
5877 | |
5878 | if (op1 != const0_rtx |
5879 | || (code != EQ && code != NE |
5880 | && (BRANCH_COST (optimize_insn_for_speed_p (), |
5881 | false) <= 1 || (code != LE && code != GT)))) |
5882 | return 0; |
5883 | |
5884 | /* Try to put the result of the comparison in the sign bit. Assume we can't |
5885 | do the necessary operation below. */ |
5886 | |
5887 | rtx tem = 0; |
5888 | |
5889 | /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has |
5890 | the sign bit set. */ |
5891 | |
5892 | if (code == LE) |
5893 | { |
5894 | /* This is destructive, so SUBTARGET can't be OP0. */ |
5895 | if (rtx_equal_p (subtarget, op0)) |
5896 | subtarget = 0; |
5897 | |
5898 | tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, |
5899 | OPTAB_WIDEN); |
5900 | if (tem) |
5901 | tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, |
5902 | OPTAB_WIDEN); |
5903 | } |
5904 | |
5905 | /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the |
5906 | number of bits in the mode of OP0, minus one. */ |
5907 | |
5908 | if (code == GT) |
5909 | { |
5910 | if (rtx_equal_p (subtarget, op0)) |
5911 | subtarget = 0; |
5912 | |
5913 | tem = maybe_expand_shift (code: RSHIFT_EXPR, mode, shifted: op0, |
5914 | amount: GET_MODE_BITSIZE (mode) - 1, |
5915 | target: subtarget, unsignedp: 0); |
5916 | if (tem) |
5917 | tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, |
5918 | OPTAB_WIDEN); |
5919 | } |
5920 | |
5921 | if (code == EQ || code == NE) |
5922 | { |
5923 | /* For EQ or NE, one way to do the comparison is to apply an operation |
5924 | that converts the operand into a positive number if it is nonzero |
5925 | or zero if it was originally zero. Then, for EQ, we subtract 1 and |
5926 | for NE we negate. This puts the result in the sign bit. Then we |
5927 | normalize with a shift, if needed. |
5928 | |
5929 | Two operations that can do the above actions are ABS and FFS, so try |
5930 | them. If that doesn't work, and MODE is smaller than a full word, |
5931 | we can use zero-extension to the wider mode (an unsigned conversion) |
5932 | as the operation. */ |
5933 | |
5934 | /* Note that ABS doesn't yield a positive number for INT_MIN, but |
5935 | that is compensated by the subsequent overflow when subtracting |
5936 | one / negating. */ |
5937 | |
5938 | if (optab_handler (op: abs_optab, mode) != CODE_FOR_nothing) |
5939 | tem = expand_unop (mode, abs_optab, op0, subtarget, 1); |
5940 | else if (optab_handler (op: ffs_optab, mode) != CODE_FOR_nothing) |
5941 | tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); |
5942 | else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) |
5943 | { |
5944 | tem = convert_modes (mode: word_mode, oldmode: mode, x: op0, unsignedp: 1); |
5945 | mode = word_mode; |
5946 | } |
5947 | |
5948 | if (tem != 0) |
5949 | { |
5950 | if (code == EQ) |
5951 | tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, |
5952 | 0, OPTAB_WIDEN); |
5953 | else |
5954 | tem = expand_unop (mode, neg_optab, tem, subtarget, 0); |
5955 | } |
5956 | |
5957 | /* If we couldn't do it that way, for NE we can "or" the two's complement |
5958 | of the value with itself. For EQ, we take the one's complement of |
5959 | that "or", which is an extra insn, so we only handle EQ if branches |
5960 | are expensive. */ |
5961 | |
5962 | if (tem == 0 |
5963 | && (code == NE |
5964 | || BRANCH_COST (optimize_insn_for_speed_p (), |
5965 | false) > 1)) |
5966 | { |
5967 | if (rtx_equal_p (subtarget, op0)) |
5968 | subtarget = 0; |
5969 | |
5970 | tem = expand_unop (mode, neg_optab, op0, subtarget, 0); |
5971 | tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, |
5972 | OPTAB_WIDEN); |
5973 | |
5974 | if (tem && code == EQ) |
5975 | tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); |
5976 | } |
5977 | } |
5978 | |
5979 | if (tem && normalizep) |
5980 | tem = maybe_expand_shift (code: RSHIFT_EXPR, mode, shifted: tem, |
5981 | amount: GET_MODE_BITSIZE (mode) - 1, |
5982 | target: subtarget, unsignedp: normalizep == 1); |
5983 | |
5984 | if (tem) |
5985 | { |
5986 | if (!target) |
5987 | ; |
5988 | else if (GET_MODE (tem) != target_mode) |
5989 | { |
5990 | convert_move (target, tem, 0); |
5991 | tem = target; |
5992 | } |
5993 | else if (!subtarget) |
5994 | { |
5995 | emit_move_insn (target, tem); |
5996 | tem = target; |
5997 | } |
5998 | } |
5999 | else |
6000 | delete_insns_since (last); |
6001 | |
6002 | return tem; |
6003 | } |
6004 | |
6005 | /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 |
6006 | and storing in TARGET. Normally return TARGET. |
6007 | Return 0 if that cannot be done. |
6008 | |
6009 | MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If |
6010 | it is VOIDmode, they cannot both be CONST_INT. |
6011 | |
6012 | UNSIGNEDP is for the case where we have to widen the operands |
6013 | to perform the operation. It says to use zero-extension. |
6014 | |
6015 | NORMALIZEP is 1 if we should convert the result to be either zero |
6016 | or one. Normalize is -1 if we should convert the result to be |
6017 | either zero or -1. If NORMALIZEP is zero, the result will be left |
6018 | "raw" out of the scc insn. */ |
6019 | |
6020 | rtx |
6021 | emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, |
6022 | machine_mode mode, int unsignedp, int normalizep) |
6023 | { |
6024 | machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; |
6025 | enum rtx_code rcode; |
6026 | rtx subtarget; |
6027 | rtx tem, trueval; |
6028 | rtx_insn *last; |
6029 | |
6030 | /* If we compare constants, we shouldn't use a store-flag operation, |
6031 | but a constant load. We can get there via the vanilla route that |
6032 | usually generates a compare-branch sequence, but will in this case |
6033 | fold the comparison to a constant, and thus elide the branch. */ |
6034 | if (CONSTANT_P (op0) && CONSTANT_P (op1)) |
6035 | return NULL_RTX; |
6036 | |
6037 | tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, |
6038 | target_mode); |
6039 | if (tem) |
6040 | return tem; |
6041 | |
6042 | /* If we reached here, we can't do this with a scc insn, however there |
6043 | are some comparisons that can be done in other ways. Don't do any |
6044 | of these cases if branches are very cheap. */ |
6045 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) |
6046 | return 0; |
6047 | |
6048 | /* See what we need to return. We can only return a 1, -1, or the |
6049 | sign bit. */ |
6050 | |
6051 | if (normalizep == 0) |
6052 | { |
6053 | if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) |
6054 | normalizep = STORE_FLAG_VALUE; |
6055 | |
6056 | else if (val_signbit_p (mode, STORE_FLAG_VALUE)) |
6057 | ; |
6058 | else |
6059 | return 0; |
6060 | } |
6061 | |
6062 | last = get_last_insn (); |
6063 | |
6064 | /* If optimizing, use different pseudo registers for each insn, instead |
6065 | of reusing the same pseudo. This leads to better CSE, but slows |
6066 | down the compiler, since there are more pseudos. */ |
6067 | subtarget = (!optimize |
6068 | && (target_mode == mode)) ? target : NULL_RTX; |
6069 | trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); |
6070 | |
6071 | /* For floating-point comparisons, try the reverse comparison or try |
6072 | changing the "orderedness" of the comparison. */ |
6073 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
6074 | { |
6075 | enum rtx_code first_code; |
6076 | bool and_them; |
6077 | |
6078 | rcode = reverse_condition_maybe_unordered (code); |
6079 | if (can_compare_p (rcode, mode, ccp_store_flag) |
6080 | && (code == ORDERED || code == UNORDERED |
6081 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) |
6082 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) |
6083 | { |
6084 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
6085 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); |
6086 | |
6087 | /* For the reverse comparison, use either an addition or a XOR. */ |
6088 | if (want_add |
6089 | && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, |
6090 | optimize_insn_for_speed_p ()) == 0) |
6091 | { |
6092 | tem = emit_store_flag_1 (target: subtarget, code: rcode, op0, op1, mode, unsignedp: 0, |
6093 | STORE_FLAG_VALUE, target_mode); |
6094 | if (tem) |
6095 | return expand_binop (target_mode, add_optab, tem, |
6096 | gen_int_mode (normalizep, target_mode), |
6097 | target, 0, OPTAB_WIDEN); |
6098 | } |
6099 | else if (!want_add |
6100 | && rtx_cost (trueval, mode, XOR, 1, |
6101 | optimize_insn_for_speed_p ()) == 0) |
6102 | { |
6103 | tem = emit_store_flag_1 (target: subtarget, code: rcode, op0, op1, mode, unsignedp: 0, |
6104 | normalizep, target_mode); |
6105 | if (tem) |
6106 | return expand_binop (target_mode, xor_optab, tem, trueval, |
6107 | target, INTVAL (trueval) >= 0, |
6108 | OPTAB_WIDEN); |
6109 | } |
6110 | } |
6111 | |
6112 | delete_insns_since (last); |
6113 | |
6114 | /* Cannot split ORDERED and UNORDERED, only try the above trick. */ |
6115 | if (code == ORDERED || code == UNORDERED) |
6116 | return 0; |
6117 | |
6118 | and_them = split_comparison (code, mode, &first_code, &code); |
6119 | |
6120 | /* If there are no NaNs, the first comparison should always fall through. |
6121 | Effectively change the comparison to the other one. */ |
6122 | if (!HONOR_NANS (mode)) |
6123 | { |
6124 | gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); |
6125 | return emit_store_flag_1 (target, code, op0, op1, mode, unsignedp: 0, normalizep, |
6126 | target_mode); |
6127 | } |
6128 | |
6129 | if (!HAVE_conditional_move) |
6130 | return 0; |
6131 | |
6132 | /* Do not turn a trapping comparison into a non-trapping one. */ |
6133 | if ((code != EQ && code != NE && code != UNEQ && code != LTGT) |
6134 | && flag_trapping_math) |
6135 | return 0; |
6136 | |
6137 | /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a |
6138 | conditional move. */ |
6139 | tem = emit_store_flag_1 (target: subtarget, code: first_code, op0, op1, mode, unsignedp: 0, |
6140 | normalizep, target_mode); |
6141 | if (tem == 0) |
6142 | return 0; |
6143 | |
6144 | if (and_them) |
6145 | tem = emit_conditional_move (target, { .code: code, .op0: op0, .op1: op1, .mode: mode }, |
6146 | tem, const0_rtx, GET_MODE (tem), 0); |
6147 | else |
6148 | tem = emit_conditional_move (target, { .code: code, .op0: op0, .op1: op1, .mode: mode }, |
6149 | trueval, tem, GET_MODE (tem), 0); |
6150 | |
6151 | if (tem == 0) |
6152 | delete_insns_since (last); |
6153 | return tem; |
6154 | } |
6155 | |
6156 | /* The remaining tricks only apply to integer comparisons. */ |
6157 | |
6158 | scalar_int_mode int_mode; |
6159 | if (is_int_mode (mode, int_mode: &int_mode)) |
6160 | return emit_store_flag_int (target, subtarget, code, op0, op1, mode: int_mode, |
6161 | unsignedp, normalizep, trueval); |
6162 | |
6163 | return 0; |
6164 | } |
6165 | |
6166 | /* Like emit_store_flag, but always succeeds. */ |
6167 | |
6168 | rtx |
6169 | emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, |
6170 | machine_mode mode, int unsignedp, int normalizep) |
6171 | { |
6172 | rtx tem; |
6173 | rtx_code_label *label; |
6174 | rtx trueval, falseval; |
6175 | |
6176 | /* First see if emit_store_flag can do the job. */ |
6177 | tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); |
6178 | if (tem != 0) |
6179 | return tem; |
6180 | |
6181 | /* If one operand is constant, make it the second one. Only do this |
6182 | if the other operand is not constant as well. */ |
6183 | if (swap_commutative_operands_p (op0, op1)) |
6184 | { |
6185 | std::swap (a&: op0, b&: op1); |
6186 | code = swap_condition (code); |
6187 | } |
6188 | |
6189 | if (mode == VOIDmode) |
6190 | mode = GET_MODE (op0); |
6191 | |
6192 | if (!target) |
6193 | target = gen_reg_rtx (word_mode); |
6194 | |
6195 | /* If this failed, we have to do this with set/compare/jump/set code. |
6196 | For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ |
6197 | trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; |
6198 | if (code == NE |
6199 | && GET_MODE_CLASS (mode) == MODE_INT |
6200 | && REG_P (target) |
6201 | && op0 == target |
6202 | && op1 == const0_rtx) |
6203 | { |
6204 | label = gen_label_rtx (); |
6205 | do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode, |
6206 | NULL_RTX, NULL, label, |
6207 | profile_probability::uninitialized ()); |
6208 | emit_move_insn (target, trueval); |
6209 | emit_label (label); |
6210 | return target; |
6211 | } |
6212 | |
6213 | if (!REG_P (target) |
6214 | || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) |
6215 | target = gen_reg_rtx (GET_MODE (target)); |
6216 | |
6217 | /* Jump in the right direction if the target cannot implement CODE |
6218 | but can jump on its reverse condition. */ |
6219 | falseval = const0_rtx; |
6220 | if (! can_compare_p (code, mode, ccp_jump) |
6221 | && (! FLOAT_MODE_P (mode) |
6222 | || code == ORDERED || code == UNORDERED |
6223 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) |
6224 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) |
6225 | { |
6226 | enum rtx_code rcode; |
6227 | if (FLOAT_MODE_P (mode)) |
6228 | rcode = reverse_condition_maybe_unordered (code); |
6229 | else |
6230 | rcode = reverse_condition (code); |
6231 | |
6232 | /* Canonicalize to UNORDERED for the libcall. */ |
6233 | if (can_compare_p (rcode, mode, ccp_jump) |
6234 | || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) |
6235 | { |
6236 | falseval = trueval; |
6237 | trueval = const0_rtx; |
6238 | code = rcode; |
6239 | } |
6240 | } |
6241 | |
6242 | emit_move_insn (target, trueval); |
6243 | label = gen_label_rtx (); |
6244 | do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL, |
6245 | label, profile_probability::uninitialized ()); |
6246 | |
6247 | emit_move_insn (target, falseval); |
6248 | emit_label (label); |
6249 | |
6250 | return target; |
6251 | } |
6252 | |
6253 | /* Helper function for canonicalize_cmp_for_target. Swap between inclusive |
6254 | and exclusive ranges in order to create an equivalent comparison. See |
6255 | canonicalize_cmp_for_target for the possible cases. */ |
6256 | |
6257 | static enum rtx_code |
6258 | equivalent_cmp_code (enum rtx_code code) |
6259 | { |
6260 | switch (code) |
6261 | { |
6262 | case GT: |
6263 | return GE; |
6264 | case GE: |
6265 | return GT; |
6266 | case LT: |
6267 | return LE; |
6268 | case LE: |
6269 | return LT; |
6270 | case GTU: |
6271 | return GEU; |
6272 | case GEU: |
6273 | return GTU; |
6274 | case LTU: |
6275 | return LEU; |
6276 | case LEU: |
6277 | return LTU; |
6278 | |
6279 | default: |
6280 | return code; |
6281 | } |
6282 | } |
6283 | |
6284 | /* Choose the more appropiate immediate in scalar integer comparisons. The |
6285 | purpose of this is to end up with an immediate which can be loaded into a |
6286 | register in fewer moves, if possible. |
6287 | |
6288 | For each integer comparison there exists an equivalent choice: |
6289 | i) a > b or a >= b + 1 |
6290 | ii) a <= b or a < b + 1 |
6291 | iii) a >= b or a > b - 1 |
6292 | iv) a < b or a <= b - 1 |
6293 | |
6294 | MODE is the mode of the first operand. |
6295 | CODE points to the comparison code. |
6296 | IMM points to the rtx containing the immediate. *IMM must satisfy |
6297 | CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P |
6298 | on exit. */ |
6299 | |
6300 | void |
6301 | canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm) |
6302 | { |
6303 | if (!SCALAR_INT_MODE_P (mode)) |
6304 | return; |
6305 | |
6306 | int to_add = 0; |
6307 | enum signop sgn = unsigned_condition_p (code: *code) ? UNSIGNED : SIGNED; |
6308 | |
6309 | /* Extract the immediate value from the rtx. */ |
6310 | wide_int imm_val = rtx_mode_t (*imm, mode); |
6311 | |
6312 | if (*code == GT || *code == GTU || *code == LE || *code == LEU) |
6313 | to_add = 1; |
6314 | else if (*code == GE || *code == GEU || *code == LT || *code == LTU) |
6315 | to_add = -1; |
6316 | else |
6317 | return; |
6318 | |
6319 | /* Check for overflow/underflow in the case of signed values and |
6320 | wrapping around in the case of unsigned values. If any occur |
6321 | cancel the optimization. */ |
6322 | wi::overflow_type overflow = wi::OVF_NONE; |
6323 | wide_int imm_modif; |
6324 | |
6325 | if (to_add == 1) |
6326 | imm_modif = wi::add (x: imm_val, y: 1, sgn, overflow: &overflow); |
6327 | else |
6328 | imm_modif = wi::sub (x: imm_val, y: 1, sgn, overflow: &overflow); |
6329 | |
6330 | if (overflow) |
6331 | return; |
6332 | |
6333 | /* The following creates a pseudo; if we cannot do that, bail out. */ |
6334 | if (!can_create_pseudo_p ()) |
6335 | return; |
6336 | |
6337 | rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
6338 | rtx new_imm = immed_wide_int_const (imm_modif, mode); |
6339 | |
6340 | rtx_insn *old_rtx = gen_move_insn (reg, *imm); |
6341 | rtx_insn *new_rtx = gen_move_insn (reg, new_imm); |
6342 | |
6343 | /* Update the immediate and the code. */ |
6344 | if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true)) |
6345 | { |
6346 | *code = equivalent_cmp_code (code: *code); |
6347 | *imm = new_imm; |
6348 | } |
6349 | } |
6350 | |
6351 | |
6352 | |
6353 | /* Perform possibly multi-word comparison and conditional jump to LABEL |
6354 | if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is |
6355 | now a thin wrapper around do_compare_rtx_and_jump. */ |
6356 | |
6357 | static void |
6358 | do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode, |
6359 | rtx_code_label *label) |
6360 | { |
6361 | int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); |
6362 | do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX, |
6363 | NULL, label, profile_probability::uninitialized ()); |
6364 | } |
6365 | |