1 | ! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and |
2 | ! store difference in a third limb vector. |
3 | ! |
4 | ! Copyright (C) 1995-2024 Free Software Foundation, Inc. |
5 | ! |
6 | ! This file is part of the GNU MP Library. |
7 | ! |
8 | ! The GNU MP Library is free software; you can redistribute it and/or modify |
9 | ! it under the terms of the GNU Lesser General Public License as published by |
10 | ! the Free Software Foundation; either version 2.1 of the License, or (at your |
11 | ! option) any later version. |
12 | ! |
13 | ! The GNU MP Library is distributed in the hope that it will be useful, but |
14 | ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
15 | ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
16 | ! License for more details. |
17 | ! |
18 | ! You should have received a copy of the GNU Lesser General Public License |
19 | ! along with the GNU MP Library; see the file COPYING.LIB. If not, |
20 | ! see <https://www.gnu.org/licenses/>. |
21 | |
22 | |
23 | ! INPUT PARAMETERS |
24 | #define RES_PTR %o0 |
25 | #define S1_PTR %o1 |
26 | #define S2_PTR %o2 |
27 | #define SIZE %o3 |
28 | |
29 | #include <sysdep.h> |
30 | |
31 | ENTRY(__mpn_sub_n) |
32 | xor S2_PTR,RES_PTR,%g1 |
33 | andcc %g1,4,%g0 |
34 | bne LOC(1) ! branch if alignment differs |
35 | nop |
36 | ! ** V1a ** |
37 | andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 |
38 | be LOC(v1) ! if no, branch |
39 | nop |
40 | /* Add least significant limb separately to align RES_PTR and S2_PTR */ |
41 | ld [S1_PTR],%g4 |
42 | add S1_PTR,4,S1_PTR |
43 | ld [S2_PTR],%g2 |
44 | add S2_PTR,4,S2_PTR |
45 | add SIZE,-1,SIZE |
46 | subcc %g4,%g2,%o4 |
47 | st %o4,[RES_PTR] |
48 | add RES_PTR,4,RES_PTR |
49 | LOC(v1): |
50 | addx %g0,%g0,%o4 ! save cy in register |
51 | cmp SIZE,2 ! if SIZE < 2 ... |
52 | bl LOC(end2) ! ... branch to tail code |
53 | subcc %g0,%o4,%g0 ! restore cy |
54 | |
55 | ld [S1_PTR+0],%g4 |
56 | addcc SIZE,-10,SIZE |
57 | ld [S1_PTR+4],%g1 |
58 | ldd [S2_PTR+0],%g2 |
59 | blt LOC(fin1) |
60 | subcc %g0,%o4,%g0 ! restore cy |
61 | /* Add blocks of 8 limbs until less than 8 limbs remain */ |
62 | LOC(loop1): |
63 | subxcc %g4,%g2,%o4 |
64 | ld [S1_PTR+8],%g4 |
65 | subxcc %g1,%g3,%o5 |
66 | ld [S1_PTR+12],%g1 |
67 | ldd [S2_PTR+8],%g2 |
68 | std %o4,[RES_PTR+0] |
69 | subxcc %g4,%g2,%o4 |
70 | ld [S1_PTR+16],%g4 |
71 | subxcc %g1,%g3,%o5 |
72 | ld [S1_PTR+20],%g1 |
73 | ldd [S2_PTR+16],%g2 |
74 | std %o4,[RES_PTR+8] |
75 | subxcc %g4,%g2,%o4 |
76 | ld [S1_PTR+24],%g4 |
77 | subxcc %g1,%g3,%o5 |
78 | ld [S1_PTR+28],%g1 |
79 | ldd [S2_PTR+24],%g2 |
80 | std %o4,[RES_PTR+16] |
81 | subxcc %g4,%g2,%o4 |
82 | ld [S1_PTR+32],%g4 |
83 | subxcc %g1,%g3,%o5 |
84 | ld [S1_PTR+36],%g1 |
85 | ldd [S2_PTR+32],%g2 |
86 | std %o4,[RES_PTR+24] |
87 | addx %g0,%g0,%o4 ! save cy in register |
88 | addcc SIZE,-8,SIZE |
89 | add S1_PTR,32,S1_PTR |
90 | add S2_PTR,32,S2_PTR |
91 | add RES_PTR,32,RES_PTR |
92 | bge LOC(loop1) |
93 | subcc %g0,%o4,%g0 ! restore cy |
94 | |
95 | LOC(fin1): |
96 | addcc SIZE,8-2,SIZE |
97 | blt LOC(end1) |
98 | subcc %g0,%o4,%g0 ! restore cy |
99 | /* Add blocks of 2 limbs until less than 2 limbs remain */ |
100 | LOC(loope1): |
101 | subxcc %g4,%g2,%o4 |
102 | ld [S1_PTR+8],%g4 |
103 | subxcc %g1,%g3,%o5 |
104 | ld [S1_PTR+12],%g1 |
105 | ldd [S2_PTR+8],%g2 |
106 | std %o4,[RES_PTR+0] |
107 | addx %g0,%g0,%o4 ! save cy in register |
108 | addcc SIZE,-2,SIZE |
109 | add S1_PTR,8,S1_PTR |
110 | add S2_PTR,8,S2_PTR |
111 | add RES_PTR,8,RES_PTR |
112 | bge LOC(loope1) |
113 | subcc %g0,%o4,%g0 ! restore cy |
114 | LOC(end1): |
115 | subxcc %g4,%g2,%o4 |
116 | subxcc %g1,%g3,%o5 |
117 | std %o4,[RES_PTR+0] |
118 | addx %g0,%g0,%o4 ! save cy in register |
119 | |
120 | andcc SIZE,1,%g0 |
121 | be LOC(ret1) |
122 | subcc %g0,%o4,%g0 ! restore cy |
123 | /* Add last limb */ |
124 | ld [S1_PTR+8],%g4 |
125 | ld [S2_PTR+8],%g2 |
126 | subxcc %g4,%g2,%o4 |
127 | st %o4,[RES_PTR+8] |
128 | |
129 | LOC(ret1): |
130 | retl |
131 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
132 | |
133 | LOC(1): xor S1_PTR,RES_PTR,%g1 |
134 | andcc %g1,4,%g0 |
135 | bne LOC(2) |
136 | nop |
137 | ! ** V1b ** |
138 | andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 |
139 | be LOC(v1b) ! if no, branch |
140 | nop |
141 | /* Add least significant limb separately to align RES_PTR and S1_PTR */ |
142 | ld [S2_PTR],%g4 |
143 | add S2_PTR,4,S2_PTR |
144 | ld [S1_PTR],%g2 |
145 | add S1_PTR,4,S1_PTR |
146 | add SIZE,-1,SIZE |
147 | subcc %g2,%g4,%o4 |
148 | st %o4,[RES_PTR] |
149 | add RES_PTR,4,RES_PTR |
150 | LOC(v1b): |
151 | addx %g0,%g0,%o4 ! save cy in register |
152 | cmp SIZE,2 ! if SIZE < 2 ... |
153 | bl LOC(end2) ! ... branch to tail code |
154 | subcc %g0,%o4,%g0 ! restore cy |
155 | |
156 | ld [S2_PTR+0],%g4 |
157 | addcc SIZE,-10,SIZE |
158 | ld [S2_PTR+4],%g1 |
159 | ldd [S1_PTR+0],%g2 |
160 | blt LOC(fin1b) |
161 | subcc %g0,%o4,%g0 ! restore cy |
162 | /* Add blocks of 8 limbs until less than 8 limbs remain */ |
163 | LOC(loop1b): |
164 | subxcc %g2,%g4,%o4 |
165 | ld [S2_PTR+8],%g4 |
166 | subxcc %g3,%g1,%o5 |
167 | ld [S2_PTR+12],%g1 |
168 | ldd [S1_PTR+8],%g2 |
169 | std %o4,[RES_PTR+0] |
170 | subxcc %g2,%g4,%o4 |
171 | ld [S2_PTR+16],%g4 |
172 | subxcc %g3,%g1,%o5 |
173 | ld [S2_PTR+20],%g1 |
174 | ldd [S1_PTR+16],%g2 |
175 | std %o4,[RES_PTR+8] |
176 | subxcc %g2,%g4,%o4 |
177 | ld [S2_PTR+24],%g4 |
178 | subxcc %g3,%g1,%o5 |
179 | ld [S2_PTR+28],%g1 |
180 | ldd [S1_PTR+24],%g2 |
181 | std %o4,[RES_PTR+16] |
182 | subxcc %g2,%g4,%o4 |
183 | ld [S2_PTR+32],%g4 |
184 | subxcc %g3,%g1,%o5 |
185 | ld [S2_PTR+36],%g1 |
186 | ldd [S1_PTR+32],%g2 |
187 | std %o4,[RES_PTR+24] |
188 | addx %g0,%g0,%o4 ! save cy in register |
189 | addcc SIZE,-8,SIZE |
190 | add S1_PTR,32,S1_PTR |
191 | add S2_PTR,32,S2_PTR |
192 | add RES_PTR,32,RES_PTR |
193 | bge LOC(loop1b) |
194 | subcc %g0,%o4,%g0 ! restore cy |
195 | |
196 | LOC(fin1b): |
197 | addcc SIZE,8-2,SIZE |
198 | blt LOC(end1b) |
199 | subcc %g0,%o4,%g0 ! restore cy |
200 | /* Add blocks of 2 limbs until less than 2 limbs remain */ |
201 | LOC(loope1b): |
202 | subxcc %g2,%g4,%o4 |
203 | ld [S2_PTR+8],%g4 |
204 | subxcc %g3,%g1,%o5 |
205 | ld [S2_PTR+12],%g1 |
206 | ldd [S1_PTR+8],%g2 |
207 | std %o4,[RES_PTR+0] |
208 | addx %g0,%g0,%o4 ! save cy in register |
209 | addcc SIZE,-2,SIZE |
210 | add S1_PTR,8,S1_PTR |
211 | add S2_PTR,8,S2_PTR |
212 | add RES_PTR,8,RES_PTR |
213 | bge LOC(loope1b) |
214 | subcc %g0,%o4,%g0 ! restore cy |
215 | LOC(end1b): |
216 | subxcc %g2,%g4,%o4 |
217 | subxcc %g3,%g1,%o5 |
218 | std %o4,[RES_PTR+0] |
219 | addx %g0,%g0,%o4 ! save cy in register |
220 | |
221 | andcc SIZE,1,%g0 |
222 | be LOC(ret1b) |
223 | subcc %g0,%o4,%g0 ! restore cy |
224 | /* Add last limb */ |
225 | ld [S2_PTR+8],%g4 |
226 | ld [S1_PTR+8],%g2 |
227 | subxcc %g2,%g4,%o4 |
228 | st %o4,[RES_PTR+8] |
229 | |
230 | LOC(ret1b): |
231 | retl |
232 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
233 | |
234 | ! ** V2 ** |
235 | /* If we come here, the alignment of S1_PTR and RES_PTR as well as the |
236 | alignment of S2_PTR and RES_PTR differ. Since there are only two ways |
237 | things can be aligned (that we care about) we now know that the alignment |
238 | of S1_PTR and S2_PTR are the same. */ |
239 | |
240 | LOC(2): cmp SIZE,1 |
241 | be LOC(jone) |
242 | nop |
243 | andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0 |
244 | be LOC(v2) ! if no, branch |
245 | nop |
246 | /* Add least significant limb separately to align S1_PTR and S2_PTR */ |
247 | ld [S1_PTR],%g4 |
248 | add S1_PTR,4,S1_PTR |
249 | ld [S2_PTR],%g2 |
250 | add S2_PTR,4,S2_PTR |
251 | add SIZE,-1,SIZE |
252 | subcc %g4,%g2,%o4 |
253 | st %o4,[RES_PTR] |
254 | add RES_PTR,4,RES_PTR |
255 | |
256 | LOC(v2): |
257 | addx %g0,%g0,%o4 ! save cy in register |
258 | addcc SIZE,-8,SIZE |
259 | blt LOC(fin2) |
260 | subcc %g0,%o4,%g0 ! restore cy |
261 | /* Add blocks of 8 limbs until less than 8 limbs remain */ |
262 | LOC(loop2): |
263 | ldd [S1_PTR+0],%g2 |
264 | ldd [S2_PTR+0],%o4 |
265 | subxcc %g2,%o4,%g2 |
266 | st %g2,[RES_PTR+0] |
267 | subxcc %g3,%o5,%g3 |
268 | st %g3,[RES_PTR+4] |
269 | ldd [S1_PTR+8],%g2 |
270 | ldd [S2_PTR+8],%o4 |
271 | subxcc %g2,%o4,%g2 |
272 | st %g2,[RES_PTR+8] |
273 | subxcc %g3,%o5,%g3 |
274 | st %g3,[RES_PTR+12] |
275 | ldd [S1_PTR+16],%g2 |
276 | ldd [S2_PTR+16],%o4 |
277 | subxcc %g2,%o4,%g2 |
278 | st %g2,[RES_PTR+16] |
279 | subxcc %g3,%o5,%g3 |
280 | st %g3,[RES_PTR+20] |
281 | ldd [S1_PTR+24],%g2 |
282 | ldd [S2_PTR+24],%o4 |
283 | subxcc %g2,%o4,%g2 |
284 | st %g2,[RES_PTR+24] |
285 | subxcc %g3,%o5,%g3 |
286 | st %g3,[RES_PTR+28] |
287 | addx %g0,%g0,%o4 ! save cy in register |
288 | addcc SIZE,-8,SIZE |
289 | add S1_PTR,32,S1_PTR |
290 | add S2_PTR,32,S2_PTR |
291 | add RES_PTR,32,RES_PTR |
292 | bge LOC(loop2) |
293 | subcc %g0,%o4,%g0 ! restore cy |
294 | |
295 | LOC(fin2): |
296 | addcc SIZE,8-2,SIZE |
297 | blt LOC(end2) |
298 | subcc %g0,%o4,%g0 ! restore cy |
299 | LOC(loope2): |
300 | ldd [S1_PTR+0],%g2 |
301 | ldd [S2_PTR+0],%o4 |
302 | subxcc %g2,%o4,%g2 |
303 | st %g2,[RES_PTR+0] |
304 | subxcc %g3,%o5,%g3 |
305 | st %g3,[RES_PTR+4] |
306 | addx %g0,%g0,%o4 ! save cy in register |
307 | addcc SIZE,-2,SIZE |
308 | add S1_PTR,8,S1_PTR |
309 | add S2_PTR,8,S2_PTR |
310 | add RES_PTR,8,RES_PTR |
311 | bge LOC(loope2) |
312 | subcc %g0,%o4,%g0 ! restore cy |
313 | LOC(end2): |
314 | andcc SIZE,1,%g0 |
315 | be LOC(ret2) |
316 | subcc %g0,%o4,%g0 ! restore cy |
317 | /* Add last limb */ |
318 | LOC(jone): |
319 | ld [S1_PTR],%g4 |
320 | ld [S2_PTR],%g2 |
321 | subxcc %g4,%g2,%o4 |
322 | st %o4,[RES_PTR] |
323 | |
324 | LOC(ret2): |
325 | retl |
326 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
327 | |
328 | END(__mpn_sub_n) |
329 | |