1/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
2
3 Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4 2004, 2005, 2006
5 Free Software Foundation, Inc.
6*/
7
8!! libgcc routines for the Renesas / SuperH SH CPUs.
9!! Contributed by Steve Chamberlain.
10!! sac@cygnus.com
11
12!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
13!! recoded in assembly by Toshiyasu Morita
14!! tm@netcom.com
15
16/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
17 ELF local label prefixes by J"orn Rennecke
18 amylaar@cygnus.com */
19
20/* This code used shld, thus is not suitable for SH1 / SH2. */
21
22/* Signed / unsigned division without use of FPU, optimized for SH4.
23 Uses a lookup table for divisors in the range -128 .. +128, and
24 div1 with case distinction for larger divisors in three more ranges.
25 The code is lumped together with the table to allow the use of mova. */
26#ifdef CONFIG_CPU_LITTLE_ENDIAN
27#define L_LSB 0
28#define L_LSWMSB 1
29#define L_MSWLSB 2
30#else
31#define L_LSB 3
32#define L_LSWMSB 2
33#define L_MSWLSB 1
34#endif
35
36 .balign 4
37 .global __udivsi3_i4i
38 .global __udivsi3_i4
39 .set __udivsi3_i4, __udivsi3_i4i
40 .type __udivsi3_i4i, @function
41__udivsi3_i4i:
42 mov.w c128_w, r1
43 div0u
44 mov r4,r0
45 shlr8 r0
46 cmp/hi r1,r5
47 extu.w r5,r1
48 bf udiv_le128
49 cmp/eq r5,r1
50 bf udiv_ge64k
51 shlr r0
52 mov r5,r1
53 shll16 r5
54 mov.l r4,@-r15
55 div1 r5,r0
56 mov.l r1,@-r15
57 div1 r5,r0
58 div1 r5,r0
59 bra udiv_25
60 div1 r5,r0
61
62div_le128:
63 mova div_table_ix,r0
64 bra div_le128_2
65 mov.b @(r0,r5),r1
66udiv_le128:
67 mov.l r4,@-r15
68 mova div_table_ix,r0
69 mov.b @(r0,r5),r1
70 mov.l r5,@-r15
71div_le128_2:
72 mova div_table_inv,r0
73 mov.l @(r0,r1),r1
74 mov r5,r0
75 tst #0xfe,r0
76 mova div_table_clz,r0
77 dmulu.l r1,r4
78 mov.b @(r0,r5),r1
79 bt/s div_by_1
80 mov r4,r0
81 mov.l @r15+,r5
82 sts mach,r0
83 /* clrt */
84 addc r4,r0
85 mov.l @r15+,r4
86 rotcr r0
87 rts
88 shld r1,r0
89
90div_by_1_neg:
91 neg r4,r0
92div_by_1:
93 mov.l @r15+,r5
94 rts
95 mov.l @r15+,r4
96
97div_ge64k:
98 bt/s div_r8
99 div0u
100 shll8 r5
101 bra div_ge64k_2
102 div1 r5,r0
103udiv_ge64k:
104 cmp/hi r0,r5
105 mov r5,r1
106 bt udiv_r8
107 shll8 r5
108 mov.l r4,@-r15
109 div1 r5,r0
110 mov.l r1,@-r15
111div_ge64k_2:
112 div1 r5,r0
113 mov.l zero_l,r1
114 .rept 4
115 div1 r5,r0
116 .endr
117 mov.l r1,@-r15
118 div1 r5,r0
119 mov.w m256_w,r1
120 div1 r5,r0
121 mov.b r0,@(L_LSWMSB,r15)
122 xor r4,r0
123 and r1,r0
124 bra div_ge64k_end
125 xor r4,r0
126
127div_r8:
128 shll16 r4
129 bra div_r8_2
130 shll8 r4
131udiv_r8:
132 mov.l r4,@-r15
133 shll16 r4
134 clrt
135 shll8 r4
136 mov.l r5,@-r15
137div_r8_2:
138 rotcl r4
139 mov r0,r1
140 div1 r5,r1
141 mov r4,r0
142 rotcl r0
143 mov r5,r4
144 div1 r5,r1
145 .rept 5
146 rotcl r0; div1 r5,r1
147 .endr
148 rotcl r0
149 mov.l @r15+,r5
150 div1 r4,r1
151 mov.l @r15+,r4
152 rts
153 rotcl r0
154
155 .global __sdivsi3_i4i
156 .global __sdivsi3_i4
157 .global __sdivsi3
158 .set __sdivsi3_i4, __sdivsi3_i4i
159 .set __sdivsi3, __sdivsi3_i4i
160 .type __sdivsi3_i4i, @function
161 /* This is link-compatible with a __sdivsi3 call,
162 but we effectively clobber only r1. */
163__sdivsi3_i4i:
164 mov.l r4,@-r15
165 cmp/pz r5
166 mov.w c128_w, r1
167 bt/s pos_divisor
168 cmp/pz r4
169 mov.l r5,@-r15
170 neg r5,r5
171 bt/s neg_result
172 cmp/hi r1,r5
173 neg r4,r4
174pos_result:
175 extu.w r5,r0
176 bf div_le128
177 cmp/eq r5,r0
178 mov r4,r0
179 shlr8 r0
180 bf/s div_ge64k
181 cmp/hi r0,r5
182 div0u
183 shll16 r5
184 div1 r5,r0
185 div1 r5,r0
186 div1 r5,r0
187udiv_25:
188 mov.l zero_l,r1
189 div1 r5,r0
190 div1 r5,r0
191 mov.l r1,@-r15
192 .rept 3
193 div1 r5,r0
194 .endr
195 mov.b r0,@(L_MSWLSB,r15)
196 xtrct r4,r0
197 swap.w r0,r0
198 .rept 8
199 div1 r5,r0
200 .endr
201 mov.b r0,@(L_LSWMSB,r15)
202div_ge64k_end:
203 .rept 8
204 div1 r5,r0
205 .endr
206 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
207 extu.b r0,r0
208 mov.l @r15+,r5
209 or r4,r0
210 mov.l @r15+,r4
211 rts
212 rotcl r0
213
214div_le128_neg:
215 tst #0xfe,r0
216 mova div_table_ix,r0
217 mov.b @(r0,r5),r1
218 mova div_table_inv,r0
219 bt/s div_by_1_neg
220 mov.l @(r0,r1),r1
221 mova div_table_clz,r0
222 dmulu.l r1,r4
223 mov.b @(r0,r5),r1
224 mov.l @r15+,r5
225 sts mach,r0
226 /* clrt */
227 addc r4,r0
228 mov.l @r15+,r4
229 rotcr r0
230 shld r1,r0
231 rts
232 neg r0,r0
233
234pos_divisor:
235 mov.l r5,@-r15
236 bt/s pos_result
237 cmp/hi r1,r5
238 neg r4,r4
239neg_result:
240 extu.w r5,r0
241 bf div_le128_neg
242 cmp/eq r5,r0
243 mov r4,r0
244 shlr8 r0
245 bf/s div_ge64k_neg
246 cmp/hi r0,r5
247 div0u
248 mov.l zero_l,r1
249 shll16 r5
250 div1 r5,r0
251 mov.l r1,@-r15
252 .rept 7
253 div1 r5,r0
254 .endr
255 mov.b r0,@(L_MSWLSB,r15)
256 xtrct r4,r0
257 swap.w r0,r0
258 .rept 8
259 div1 r5,r0
260 .endr
261 mov.b r0,@(L_LSWMSB,r15)
262div_ge64k_neg_end:
263 .rept 8
264 div1 r5,r0
265 .endr
266 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
267 extu.b r0,r1
268 mov.l @r15+,r5
269 or r4,r1
270div_r8_neg_end:
271 mov.l @r15+,r4
272 rotcl r1
273 rts
274 neg r1,r0
275
276div_ge64k_neg:
277 bt/s div_r8_neg
278 div0u
279 shll8 r5
280 mov.l zero_l,r1
281 .rept 6
282 div1 r5,r0
283 .endr
284 mov.l r1,@-r15
285 div1 r5,r0
286 mov.w m256_w,r1
287 div1 r5,r0
288 mov.b r0,@(L_LSWMSB,r15)
289 xor r4,r0
290 and r1,r0
291 bra div_ge64k_neg_end
292 xor r4,r0
293
294c128_w:
295 .word 128
296
297div_r8_neg:
298 clrt
299 shll16 r4
300 mov r4,r1
301 shll8 r1
302 mov r5,r4
303 .rept 7
304 rotcl r1; div1 r5,r0
305 .endr
306 mov.l @r15+,r5
307 rotcl r1
308 bra div_r8_neg_end
309 div1 r4,r0
310
311m256_w:
312 .word 0xff00
313/* This table has been generated by divtab-sh4.c. */
314 .balign 4
315div_table_clz:
316 .byte 0
317 .byte 1
318 .byte 0
319 .byte -1
320 .byte -1
321 .byte -2
322 .byte -2
323 .byte -2
324 .byte -2
325 .byte -3
326 .byte -3
327 .byte -3
328 .byte -3
329 .byte -3
330 .byte -3
331 .byte -3
332 .byte -3
333 .byte -4
334 .byte -4
335 .byte -4
336 .byte -4
337 .byte -4
338 .byte -4
339 .byte -4
340 .byte -4
341 .byte -4
342 .byte -4
343 .byte -4
344 .byte -4
345 .byte -4
346 .byte -4
347 .byte -4
348 .byte -4
349 .byte -5
350 .byte -5
351 .byte -5
352 .byte -5
353 .byte -5
354 .byte -5
355 .byte -5
356 .byte -5
357 .byte -5
358 .byte -5
359 .byte -5
360 .byte -5
361 .byte -5
362 .byte -5
363 .byte -5
364 .byte -5
365 .byte -5
366 .byte -5
367 .byte -5
368 .byte -5
369 .byte -5
370 .byte -5
371 .byte -5
372 .byte -5
373 .byte -5
374 .byte -5
375 .byte -5
376 .byte -5
377 .byte -5
378 .byte -5
379 .byte -5
380 .byte -5
381 .byte -6
382 .byte -6
383 .byte -6
384 .byte -6
385 .byte -6
386 .byte -6
387 .byte -6
388 .byte -6
389 .byte -6
390 .byte -6
391 .byte -6
392 .byte -6
393 .byte -6
394 .byte -6
395 .byte -6
396 .byte -6
397 .byte -6
398 .byte -6
399 .byte -6
400 .byte -6
401 .byte -6
402 .byte -6
403 .byte -6
404 .byte -6
405 .byte -6
406 .byte -6
407 .byte -6
408 .byte -6
409 .byte -6
410 .byte -6
411 .byte -6
412 .byte -6
413 .byte -6
414 .byte -6
415 .byte -6
416 .byte -6
417 .byte -6
418 .byte -6
419 .byte -6
420 .byte -6
421 .byte -6
422 .byte -6
423 .byte -6
424 .byte -6
425 .byte -6
426 .byte -6
427 .byte -6
428 .byte -6
429 .byte -6
430 .byte -6
431 .byte -6
432 .byte -6
433 .byte -6
434 .byte -6
435 .byte -6
436 .byte -6
437 .byte -6
438 .byte -6
439 .byte -6
440 .byte -6
441 .byte -6
442 .byte -6
443 .byte -6
444/* Lookup table translating positive divisor to index into table of
445 normalized inverse. N.B. the '0' entry is also the last entry of the
446 previous table, and causes an unaligned access for division by zero. */
447div_table_ix:
448 .byte -6
449 .byte -128
450 .byte -128
451 .byte 0
452 .byte -128
453 .byte -64
454 .byte 0
455 .byte 64
456 .byte -128
457 .byte -96
458 .byte -64
459 .byte -32
460 .byte 0
461 .byte 32
462 .byte 64
463 .byte 96
464 .byte -128
465 .byte -112
466 .byte -96
467 .byte -80
468 .byte -64
469 .byte -48
470 .byte -32
471 .byte -16
472 .byte 0
473 .byte 16
474 .byte 32
475 .byte 48
476 .byte 64
477 .byte 80
478 .byte 96
479 .byte 112
480 .byte -128
481 .byte -120
482 .byte -112
483 .byte -104
484 .byte -96
485 .byte -88
486 .byte -80
487 .byte -72
488 .byte -64
489 .byte -56
490 .byte -48
491 .byte -40
492 .byte -32
493 .byte -24
494 .byte -16
495 .byte -8
496 .byte 0
497 .byte 8
498 .byte 16
499 .byte 24
500 .byte 32
501 .byte 40
502 .byte 48
503 .byte 56
504 .byte 64
505 .byte 72
506 .byte 80
507 .byte 88
508 .byte 96
509 .byte 104
510 .byte 112
511 .byte 120
512 .byte -128
513 .byte -124
514 .byte -120
515 .byte -116
516 .byte -112
517 .byte -108
518 .byte -104
519 .byte -100
520 .byte -96
521 .byte -92
522 .byte -88
523 .byte -84
524 .byte -80
525 .byte -76
526 .byte -72
527 .byte -68
528 .byte -64
529 .byte -60
530 .byte -56
531 .byte -52
532 .byte -48
533 .byte -44
534 .byte -40
535 .byte -36
536 .byte -32
537 .byte -28
538 .byte -24
539 .byte -20
540 .byte -16
541 .byte -12
542 .byte -8
543 .byte -4
544 .byte 0
545 .byte 4
546 .byte 8
547 .byte 12
548 .byte 16
549 .byte 20
550 .byte 24
551 .byte 28
552 .byte 32
553 .byte 36
554 .byte 40
555 .byte 44
556 .byte 48
557 .byte 52
558 .byte 56
559 .byte 60
560 .byte 64
561 .byte 68
562 .byte 72
563 .byte 76
564 .byte 80
565 .byte 84
566 .byte 88
567 .byte 92
568 .byte 96
569 .byte 100
570 .byte 104
571 .byte 108
572 .byte 112
573 .byte 116
574 .byte 120
575 .byte 124
576 .byte -128
577/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
578 .balign 4
579zero_l:
580 .long 0x0
581 .long 0xF81F81F9
582 .long 0xF07C1F08
583 .long 0xE9131AC0
584 .long 0xE1E1E1E2
585 .long 0xDAE6076C
586 .long 0xD41D41D5
587 .long 0xCD856891
588 .long 0xC71C71C8
589 .long 0xC0E07039
590 .long 0xBACF914D
591 .long 0xB4E81B4F
592 .long 0xAF286BCB
593 .long 0xA98EF607
594 .long 0xA41A41A5
595 .long 0x9EC8E952
596 .long 0x9999999A
597 .long 0x948B0FCE
598 .long 0x8F9C18FA
599 .long 0x8ACB90F7
600 .long 0x86186187
601 .long 0x81818182
602 .long 0x7D05F418
603 .long 0x78A4C818
604 .long 0x745D1746
605 .long 0x702E05C1
606 .long 0x6C16C16D
607 .long 0x68168169
608 .long 0x642C8591
609 .long 0x60581606
610 .long 0x5C9882BA
611 .long 0x58ED2309
612div_table_inv:
613 .long 0x55555556
614 .long 0x51D07EAF
615 .long 0x4E5E0A73
616 .long 0x4AFD6A06
617 .long 0x47AE147B
618 .long 0x446F8657
619 .long 0x41414142
620 .long 0x3E22CBCF
621 .long 0x3B13B13C
622 .long 0x38138139
623 .long 0x3521CFB3
624 .long 0x323E34A3
625 .long 0x2F684BDB
626 .long 0x2C9FB4D9
627 .long 0x29E4129F
628 .long 0x27350B89
629 .long 0x24924925
630 .long 0x21FB7813
631 .long 0x1F7047DD
632 .long 0x1CF06ADB
633 .long 0x1A7B9612
634 .long 0x18118119
635 .long 0x15B1E5F8
636 .long 0x135C8114
637 .long 0x11111112
638 .long 0xECF56BF
639 .long 0xC9714FC
640 .long 0xA6810A7
641 .long 0x8421085
642 .long 0x624DD30
643 .long 0x4104105
644 .long 0x2040811
645 /* maximum error: 0.987342 scaled: 0.921875*/
646

source code of linux/arch/sh/lib/udivsi3_i4i.S