1 | ! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store |
2 | ! sum in a third limb vector. |
3 | ! |
4 | ! Copyright (C) 1995-2024 Free Software Foundation, Inc. |
5 | ! |
6 | ! This file is part of the GNU MP Library. |
7 | ! |
8 | ! The GNU MP Library is free software; you can redistribute it and/or modify |
9 | ! it under the terms of the GNU Lesser General Public License as published by |
10 | ! the Free Software Foundation; either version 2.1 of the License, or (at your |
11 | ! option) any later version. |
12 | ! |
13 | ! The GNU MP Library is distributed in the hope that it will be useful, but |
14 | ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
15 | ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
16 | ! License for more details. |
17 | ! |
18 | ! You should have received a copy of the GNU Lesser General Public License |
19 | ! along with the GNU MP Library; see the file COPYING.LIB. If not, |
20 | ! see <https://www.gnu.org/licenses/>. |
21 | |
22 | |
23 | ! INPUT PARAMETERS |
24 | #define RES_PTR %o0 |
25 | #define S1_PTR %o1 |
26 | #define S2_PTR %o2 |
27 | #define SIZE %o3 |
28 | |
29 | #include <sysdep.h> |
30 | |
31 | ENTRY(__mpn_add_n) |
32 | xor S2_PTR,RES_PTR,%g1 |
33 | andcc %g1,4,%g0 |
34 | bne LOC(1) ! branch if alignment differs |
35 | nop |
36 | ! ** V1a ** |
37 | LOC(0): andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 |
38 | be LOC(v1) ! if no, branch |
39 | nop |
40 | /* Add least significant limb separately to align RES_PTR and S2_PTR */ |
41 | ld [S1_PTR],%g4 |
42 | add S1_PTR,4,S1_PTR |
43 | ld [S2_PTR],%g2 |
44 | add S2_PTR,4,S2_PTR |
45 | add SIZE,-1,SIZE |
46 | addcc %g4,%g2,%o4 |
47 | st %o4,[RES_PTR] |
48 | add RES_PTR,4,RES_PTR |
49 | LOC(v1): |
50 | addx %g0,%g0,%o4 ! save cy in register |
51 | cmp SIZE,2 ! if SIZE < 2 ... |
52 | bl LOC(end2) ! ... branch to tail code |
53 | subcc %g0,%o4,%g0 ! restore cy |
54 | |
55 | ld [S1_PTR+0],%g4 |
56 | addcc SIZE,-10,SIZE |
57 | ld [S1_PTR+4],%g1 |
58 | ldd [S2_PTR+0],%g2 |
59 | blt LOC(fin1) |
60 | subcc %g0,%o4,%g0 ! restore cy |
61 | /* Add blocks of 8 limbs until less than 8 limbs remain */ |
62 | LOC(loop1): |
63 | addxcc %g4,%g2,%o4 |
64 | ld [S1_PTR+8],%g4 |
65 | addxcc %g1,%g3,%o5 |
66 | ld [S1_PTR+12],%g1 |
67 | ldd [S2_PTR+8],%g2 |
68 | std %o4,[RES_PTR+0] |
69 | addxcc %g4,%g2,%o4 |
70 | ld [S1_PTR+16],%g4 |
71 | addxcc %g1,%g3,%o5 |
72 | ld [S1_PTR+20],%g1 |
73 | ldd [S2_PTR+16],%g2 |
74 | std %o4,[RES_PTR+8] |
75 | addxcc %g4,%g2,%o4 |
76 | ld [S1_PTR+24],%g4 |
77 | addxcc %g1,%g3,%o5 |
78 | ld [S1_PTR+28],%g1 |
79 | ldd [S2_PTR+24],%g2 |
80 | std %o4,[RES_PTR+16] |
81 | addxcc %g4,%g2,%o4 |
82 | ld [S1_PTR+32],%g4 |
83 | addxcc %g1,%g3,%o5 |
84 | ld [S1_PTR+36],%g1 |
85 | ldd [S2_PTR+32],%g2 |
86 | std %o4,[RES_PTR+24] |
87 | addx %g0,%g0,%o4 ! save cy in register |
88 | addcc SIZE,-8,SIZE |
89 | add S1_PTR,32,S1_PTR |
90 | add S2_PTR,32,S2_PTR |
91 | add RES_PTR,32,RES_PTR |
92 | bge LOC(loop1) |
93 | subcc %g0,%o4,%g0 ! restore cy |
94 | |
95 | LOC(fin1): |
96 | addcc SIZE,8-2,SIZE |
97 | blt LOC(end1) |
98 | subcc %g0,%o4,%g0 ! restore cy |
99 | /* Add blocks of 2 limbs until less than 2 limbs remain */ |
100 | LOC(loope1): |
101 | addxcc %g4,%g2,%o4 |
102 | ld [S1_PTR+8],%g4 |
103 | addxcc %g1,%g3,%o5 |
104 | ld [S1_PTR+12],%g1 |
105 | ldd [S2_PTR+8],%g2 |
106 | std %o4,[RES_PTR+0] |
107 | addx %g0,%g0,%o4 ! save cy in register |
108 | addcc SIZE,-2,SIZE |
109 | add S1_PTR,8,S1_PTR |
110 | add S2_PTR,8,S2_PTR |
111 | add RES_PTR,8,RES_PTR |
112 | bge LOC(loope1) |
113 | subcc %g0,%o4,%g0 ! restore cy |
114 | LOC(end1): |
115 | addxcc %g4,%g2,%o4 |
116 | addxcc %g1,%g3,%o5 |
117 | std %o4,[RES_PTR+0] |
118 | addx %g0,%g0,%o4 ! save cy in register |
119 | |
120 | andcc SIZE,1,%g0 |
121 | be LOC(ret1) |
122 | subcc %g0,%o4,%g0 ! restore cy |
123 | /* Add last limb */ |
124 | ld [S1_PTR+8],%g4 |
125 | ld [S2_PTR+8],%g2 |
126 | addxcc %g4,%g2,%o4 |
127 | st %o4,[RES_PTR+8] |
128 | |
129 | LOC(ret1): |
130 | retl |
131 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
132 | |
133 | LOC(1): xor S1_PTR,RES_PTR,%g1 |
134 | andcc %g1,4,%g0 |
135 | bne LOC(2) |
136 | nop |
137 | ! ** V1b ** |
138 | mov S2_PTR,%g1 |
139 | mov S1_PTR,S2_PTR |
140 | b LOC(0) |
141 | mov %g1,S1_PTR |
142 | |
143 | ! ** V2 ** |
144 | /* If we come here, the alignment of S1_PTR and RES_PTR as well as the |
145 | alignment of S2_PTR and RES_PTR differ. Since there are only two ways |
146 | things can be aligned (that we care about) we now know that the alignment |
147 | of S1_PTR and S2_PTR are the same. */ |
148 | |
149 | LOC(2): cmp SIZE,1 |
150 | be LOC(jone) |
151 | nop |
152 | andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0 |
153 | be LOC(v2) ! if no, branch |
154 | nop |
155 | /* Add least significant limb separately to align S1_PTR and S2_PTR */ |
156 | ld [S1_PTR],%g4 |
157 | add S1_PTR,4,S1_PTR |
158 | ld [S2_PTR],%g2 |
159 | add S2_PTR,4,S2_PTR |
160 | add SIZE,-1,SIZE |
161 | addcc %g4,%g2,%o4 |
162 | st %o4,[RES_PTR] |
163 | add RES_PTR,4,RES_PTR |
164 | |
165 | LOC(v2): |
166 | addx %g0,%g0,%o4 ! save cy in register |
167 | addcc SIZE,-8,SIZE |
168 | blt LOC(fin2) |
169 | subcc %g0,%o4,%g0 ! restore cy |
170 | /* Add blocks of 8 limbs until less than 8 limbs remain */ |
171 | LOC(loop2): |
172 | ldd [S1_PTR+0],%g2 |
173 | ldd [S2_PTR+0],%o4 |
174 | addxcc %g2,%o4,%g2 |
175 | st %g2,[RES_PTR+0] |
176 | addxcc %g3,%o5,%g3 |
177 | st %g3,[RES_PTR+4] |
178 | ldd [S1_PTR+8],%g2 |
179 | ldd [S2_PTR+8],%o4 |
180 | addxcc %g2,%o4,%g2 |
181 | st %g2,[RES_PTR+8] |
182 | addxcc %g3,%o5,%g3 |
183 | st %g3,[RES_PTR+12] |
184 | ldd [S1_PTR+16],%g2 |
185 | ldd [S2_PTR+16],%o4 |
186 | addxcc %g2,%o4,%g2 |
187 | st %g2,[RES_PTR+16] |
188 | addxcc %g3,%o5,%g3 |
189 | st %g3,[RES_PTR+20] |
190 | ldd [S1_PTR+24],%g2 |
191 | ldd [S2_PTR+24],%o4 |
192 | addxcc %g2,%o4,%g2 |
193 | st %g2,[RES_PTR+24] |
194 | addxcc %g3,%o5,%g3 |
195 | st %g3,[RES_PTR+28] |
196 | addx %g0,%g0,%o4 ! save cy in register |
197 | addcc SIZE,-8,SIZE |
198 | add S1_PTR,32,S1_PTR |
199 | add S2_PTR,32,S2_PTR |
200 | add RES_PTR,32,RES_PTR |
201 | bge LOC(loop2) |
202 | subcc %g0,%o4,%g0 ! restore cy |
203 | |
204 | LOC(fin2): |
205 | addcc SIZE,8-2,SIZE |
206 | blt LOC(end2) |
207 | subcc %g0,%o4,%g0 ! restore cy |
208 | LOC(loope2): |
209 | ldd [S1_PTR+0],%g2 |
210 | ldd [S2_PTR+0],%o4 |
211 | addxcc %g2,%o4,%g2 |
212 | st %g2,[RES_PTR+0] |
213 | addxcc %g3,%o5,%g3 |
214 | st %g3,[RES_PTR+4] |
215 | addx %g0,%g0,%o4 ! save cy in register |
216 | addcc SIZE,-2,SIZE |
217 | add S1_PTR,8,S1_PTR |
218 | add S2_PTR,8,S2_PTR |
219 | add RES_PTR,8,RES_PTR |
220 | bge LOC(loope2) |
221 | subcc %g0,%o4,%g0 ! restore cy |
222 | LOC(end2): |
223 | andcc SIZE,1,%g0 |
224 | be LOC(ret2) |
225 | subcc %g0,%o4,%g0 ! restore cy |
226 | /* Add last limb */ |
227 | LOC(jone): |
228 | ld [S1_PTR],%g4 |
229 | ld [S2_PTR],%g2 |
230 | addxcc %g4,%g2,%o4 |
231 | st %o4,[RES_PTR] |
232 | |
233 | LOC(ret2): |
234 | retl |
235 | addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
236 | |
237 | END(__mpn_add_n) |
238 | |