| 1 | ! SPARC v9 32-bit __mpn_addmul_1 -- Multiply a limb vector with a limb |
| 2 | ! and add the result to a second limb vector. |
| 3 | ! |
| 4 | ! Copyright (C) 2013-2024 Free Software Foundation, Inc. |
| 5 | ! This file is part of the GNU C Library. |
| 6 | ! |
| 7 | ! The GNU C Library is free software; you can redistribute it and/or |
| 8 | ! modify it under the terms of the GNU Lesser General Public |
| 9 | ! License as published by the Free Software Foundation; either |
| 10 | ! version 2.1 of the License, or (at your option) any later version. |
| 11 | ! |
| 12 | ! The GNU C Library is distributed in the hope that it will be useful, |
| 13 | ! but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | ! Lesser General Public License for more details. |
| 16 | ! |
| 17 | ! You should have received a copy of the GNU Lesser General Public |
| 18 | ! License along with the GNU C Library; if not, see |
| 19 | ! <https://www.gnu.org/licenses/>. |
| 20 | |
| 21 | #include <sysdep.h> |
| 22 | |
| 23 | #define res_ptr %i0 |
| 24 | #define s1_ptr %i1 |
| 25 | #define sz_arg %i2 |
| 26 | #define s2l_arg %i3 |
| 27 | #define sz %o4 |
| 28 | #define carry %o5 |
| 29 | #define s2_limb %g1 |
| 30 | #define tmp1 %l0 |
| 31 | #define tmp2 %l1 |
| 32 | #define tmp3 %l2 |
| 33 | #define tmp4 %l3 |
| 34 | #define tmp64_1 %g3 |
| 35 | #define tmp64_2 %o3 |
| 36 | |
| 37 | ENTRY(__mpn_addmul_1) |
| 38 | save %sp, -96, %sp |
| 39 | srl sz_arg, 0, sz |
| 40 | srl s2l_arg, 0, s2_limb |
| 41 | subcc sz, 1, sz |
| 42 | be,pn %icc, .Lfinal_limb |
| 43 | clr carry |
| 44 | |
| 45 | .Lloop: |
| 46 | lduw [s1_ptr + 0x00], tmp1 |
| 47 | lduw [res_ptr + 0x00], tmp3 |
| 48 | lduw [s1_ptr + 0x04], tmp2 |
| 49 | lduw [res_ptr + 0x04], tmp4 |
| 50 | mulx tmp1, s2_limb, tmp64_1 |
| 51 | add s1_ptr, 8, s1_ptr |
| 52 | mulx tmp2, s2_limb, tmp64_2 |
| 53 | sub sz, 2, sz |
| 54 | add res_ptr, 8, res_ptr |
| 55 | add tmp3, tmp64_1, tmp64_1 |
| 56 | add carry, tmp64_1, tmp64_1 |
| 57 | stw tmp64_1, [res_ptr - 0x08] |
| 58 | srlx tmp64_1, 32, carry |
| 59 | add tmp4, tmp64_2, tmp64_2 |
| 60 | add carry, tmp64_2, tmp64_2 |
| 61 | stw tmp64_2, [res_ptr - 0x04] |
| 62 | brgz sz, .Lloop |
| 63 | srlx tmp64_2, 32, carry |
| 64 | |
| 65 | brlz,pt sz, .Lfinish |
| 66 | nop |
| 67 | |
| 68 | .Lfinal_limb: |
| 69 | lduw [s1_ptr + 0x00], tmp1 |
| 70 | lduw [res_ptr + 0x00], tmp3 |
| 71 | mulx tmp1, s2_limb, tmp64_1 |
| 72 | add tmp3, tmp64_1, tmp64_1 |
| 73 | add carry, tmp64_1, tmp64_1 |
| 74 | stw tmp64_1, [res_ptr + 0x00] |
| 75 | srlx tmp64_1, 32, carry |
| 76 | |
| 77 | .Lfinish: |
| 78 | jmpl %i7 + 0x8, %g0 |
| 79 | restore carry, 0, %o0 |
| 80 | END(__mpn_addmul_1) |
| 81 | |