1 | # Alpha EV5 __mpn_rshift -- |
2 | |
3 | # Copyright (C) 1994-2024 Free Software Foundation, Inc. |
4 | |
5 | # This file is part of the GNU MP Library. |
6 | |
7 | # The GNU MP Library is free software; you can redistribute it and/or modify |
8 | # it under the terms of the GNU Lesser General Public License as published by |
9 | # the Free Software Foundation; either version 2.1 of the License, or (at your |
10 | # option) any later version. |
11 | |
12 | # The GNU MP Library is distributed in the hope that it will be useful, but |
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
14 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
15 | # License for more details. |
16 | |
17 | # You should have received a copy of the GNU Lesser General Public License |
18 | # along with the GNU MP Library. If not, see <https://www.gnu.org/licenses/>. |
19 | |
20 | |
21 | # INPUT PARAMETERS |
22 | # res_ptr r16 |
23 | # s1_ptr r17 |
24 | # size r18 |
25 | # cnt r19 |
26 | |
27 | # This code runs at 3.25 cycles/limb on the EV5. |
28 | |
29 | .set noreorder |
30 | .set noat |
31 | .text |
32 | .align 3 |
33 | .globl __mpn_rshift |
34 | .ent __mpn_rshift |
35 | __mpn_rshift: |
36 | .frame $30,0,$26,0 |
37 | |
38 | ldq $4,0($17) # load first limb |
39 | subq $31,$19,$20 |
40 | subq $18,1,$18 |
41 | and $18,4-1,$28 # number of limbs in first loop |
42 | sll $4,$20,$0 # compute function result |
43 | |
44 | beq $28,.L0 |
45 | subq $18,$28,$18 |
46 | |
47 | .align 3 |
48 | .Loop0: ldq $3,8($17) |
49 | addq $16,8,$16 |
50 | srl $4,$19,$5 |
51 | addq $17,8,$17 |
52 | subq $28,1,$28 |
53 | sll $3,$20,$6 |
54 | or $3,$3,$4 |
55 | or $5,$6,$8 |
56 | stq $8,-8($16) |
57 | bne $28,.Loop0 |
58 | |
59 | .L0: srl $4,$19,$24 |
60 | beq $18,.Lend |
61 | # warm up phase 1 |
62 | ldq $1,8($17) |
63 | subq $18,4,$18 |
64 | ldq $2,16($17) |
65 | ldq $3,24($17) |
66 | ldq $4,32($17) |
67 | beq $18,.Lend1 |
68 | # warm up phase 2 |
69 | sll $1,$20,$7 |
70 | srl $1,$19,$21 |
71 | sll $2,$20,$8 |
72 | ldq $1,40($17) |
73 | srl $2,$19,$22 |
74 | ldq $2,48($17) |
75 | sll $3,$20,$5 |
76 | or $7,$24,$7 |
77 | srl $3,$19,$23 |
78 | or $8,$21,$8 |
79 | sll $4,$20,$6 |
80 | ldq $3,56($17) |
81 | srl $4,$19,$24 |
82 | ldq $4,64($17) |
83 | subq $18,4,$18 |
84 | beq $18,.Lend2 |
85 | .align 4 |
86 | # main loop |
87 | .Loop: stq $7,0($16) |
88 | or $5,$22,$5 |
89 | stq $8,8($16) |
90 | or $6,$23,$6 |
91 | |
92 | sll $1,$20,$7 |
93 | subq $18,4,$18 |
94 | srl $1,$19,$21 |
95 | unop # ldq $31,-96($17) |
96 | |
97 | sll $2,$20,$8 |
98 | ldq $1,72($17) |
99 | srl $2,$19,$22 |
100 | ldq $2,80($17) |
101 | |
102 | stq $5,16($16) |
103 | or $7,$24,$7 |
104 | stq $6,24($16) |
105 | or $8,$21,$8 |
106 | |
107 | sll $3,$20,$5 |
108 | unop # ldq $31,-96($17) |
109 | srl $3,$19,$23 |
110 | addq $16,32,$16 |
111 | |
112 | sll $4,$20,$6 |
113 | ldq $3,88($17) |
114 | srl $4,$19,$24 |
115 | ldq $4,96($17) |
116 | |
117 | addq $17,32,$17 |
118 | bne $18,.Loop |
119 | # cool down phase 2/1 |
120 | .Lend2: stq $7,0($16) |
121 | or $5,$22,$5 |
122 | stq $8,8($16) |
123 | or $6,$23,$6 |
124 | sll $1,$20,$7 |
125 | srl $1,$19,$21 |
126 | sll $2,$20,$8 |
127 | srl $2,$19,$22 |
128 | stq $5,16($16) |
129 | or $7,$24,$7 |
130 | stq $6,24($16) |
131 | or $8,$21,$8 |
132 | sll $3,$20,$5 |
133 | srl $3,$19,$23 |
134 | sll $4,$20,$6 |
135 | srl $4,$19,$24 |
136 | # cool down phase 2/2 |
137 | stq $7,32($16) |
138 | or $5,$22,$5 |
139 | stq $8,40($16) |
140 | or $6,$23,$6 |
141 | stq $5,48($16) |
142 | stq $6,56($16) |
143 | # cool down phase 2/3 |
144 | stq $24,64($16) |
145 | ret $31,($26),1 |
146 | |
147 | # cool down phase 1/1 |
148 | .Lend1: sll $1,$20,$7 |
149 | srl $1,$19,$21 |
150 | sll $2,$20,$8 |
151 | srl $2,$19,$22 |
152 | sll $3,$20,$5 |
153 | or $7,$24,$7 |
154 | srl $3,$19,$23 |
155 | or $8,$21,$8 |
156 | sll $4,$20,$6 |
157 | srl $4,$19,$24 |
158 | # cool down phase 1/2 |
159 | stq $7,0($16) |
160 | or $5,$22,$5 |
161 | stq $8,8($16) |
162 | or $6,$23,$6 |
163 | stq $5,16($16) |
164 | stq $6,24($16) |
165 | stq $24,32($16) |
166 | ret $31,($26),1 |
167 | |
168 | .Lend: stq $24,0($16) |
169 | ret $31,($26),1 |
170 | .end __mpn_rshift |
171 | |