| 1 | /* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0 |
| 2 | and store difference in a third limb vector. |
| 3 | Copyright (C) 1992-2024 Free Software Foundation, Inc. |
| 4 | This file is part of the GNU MP Library. |
| 5 | |
| 6 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 7 | it under the terms of the GNU Lesser General Public License as published by |
| 8 | the Free Software Foundation; either version 2.1 of the License, or (at your |
| 9 | option) any later version. |
| 10 | |
| 11 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 13 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
| 14 | License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU Lesser General Public License |
| 17 | along with the GNU MP Library; see the file COPYING.LIB. If not, |
| 18 | see <https://www.gnu.org/licenses/>. */ |
| 19 | |
| 20 | #include "sysdep.h" |
| 21 | #include "asm-syntax.h" |
| 22 | |
| 23 | #define PARMS 4+16 /* space for 4 saved regs */ |
| 24 | #define RES PARMS |
| 25 | #define S1 RES+4 |
| 26 | #define S2 S1+4 |
| 27 | #define SIZE S2+4 |
| 28 | |
| 29 | .text |
| 30 | ENTRY (__mpn_sub_n) |
| 31 | |
| 32 | pushl %edi |
| 33 | cfi_adjust_cfa_offset (4) |
| 34 | pushl %esi |
| 35 | cfi_adjust_cfa_offset (4) |
| 36 | pushl %ebp |
| 37 | cfi_adjust_cfa_offset (4) |
| 38 | pushl %ebx |
| 39 | cfi_adjust_cfa_offset (4) |
| 40 | |
| 41 | movl RES(%esp),%edi |
| 42 | cfi_rel_offset (edi, 12) |
| 43 | movl S1(%esp),%esi |
| 44 | cfi_rel_offset (esi, 8) |
| 45 | movl S2(%esp),%ebx |
| 46 | cfi_rel_offset (ebx, 0) |
| 47 | movl SIZE(%esp),%ecx |
| 48 | movl (%ebx),%ebp |
| 49 | cfi_rel_offset (ebp, 4) |
| 50 | |
| 51 | decl %ecx |
| 52 | movl %ecx,%edx |
| 53 | shrl $3,%ecx |
| 54 | andl $7,%edx |
| 55 | testl %ecx,%ecx /* zero carry flag */ |
| 56 | jz L(end) |
| 57 | pushl %edx |
| 58 | cfi_adjust_cfa_offset (4) |
| 59 | |
| 60 | ALIGN (3) |
| 61 | L(oop): movl 28(%edi),%eax /* fetch destination cache line */ |
| 62 | leal 32(%edi),%edi |
| 63 | |
| 64 | L(1): movl (%esi),%eax |
| 65 | movl 4(%esi),%edx |
| 66 | sbbl %ebp,%eax |
| 67 | movl 4(%ebx),%ebp |
| 68 | sbbl %ebp,%edx |
| 69 | movl 8(%ebx),%ebp |
| 70 | movl %eax,-32(%edi) |
| 71 | movl %edx,-28(%edi) |
| 72 | |
| 73 | L(2): movl 8(%esi),%eax |
| 74 | movl 12(%esi),%edx |
| 75 | sbbl %ebp,%eax |
| 76 | movl 12(%ebx),%ebp |
| 77 | sbbl %ebp,%edx |
| 78 | movl 16(%ebx),%ebp |
| 79 | movl %eax,-24(%edi) |
| 80 | movl %edx,-20(%edi) |
| 81 | |
| 82 | L(3): movl 16(%esi),%eax |
| 83 | movl 20(%esi),%edx |
| 84 | sbbl %ebp,%eax |
| 85 | movl 20(%ebx),%ebp |
| 86 | sbbl %ebp,%edx |
| 87 | movl 24(%ebx),%ebp |
| 88 | movl %eax,-16(%edi) |
| 89 | movl %edx,-12(%edi) |
| 90 | |
| 91 | L(4): movl 24(%esi),%eax |
| 92 | movl 28(%esi),%edx |
| 93 | sbbl %ebp,%eax |
| 94 | movl 28(%ebx),%ebp |
| 95 | sbbl %ebp,%edx |
| 96 | movl 32(%ebx),%ebp |
| 97 | movl %eax,-8(%edi) |
| 98 | movl %edx,-4(%edi) |
| 99 | |
| 100 | leal 32(%esi),%esi |
| 101 | leal 32(%ebx),%ebx |
| 102 | decl %ecx |
| 103 | jnz L(oop) |
| 104 | |
| 105 | popl %edx |
| 106 | cfi_adjust_cfa_offset (-4) |
| 107 | L(end): |
| 108 | decl %edx /* test %edx w/o clobbering carry */ |
| 109 | js L(end2) |
| 110 | incl %edx |
| 111 | L(oop2): |
| 112 | leal 4(%edi),%edi |
| 113 | movl (%esi),%eax |
| 114 | sbbl %ebp,%eax |
| 115 | movl 4(%ebx),%ebp |
| 116 | movl %eax,-4(%edi) |
| 117 | leal 4(%esi),%esi |
| 118 | leal 4(%ebx),%ebx |
| 119 | decl %edx |
| 120 | jnz L(oop2) |
| 121 | L(end2): |
| 122 | movl (%esi),%eax |
| 123 | sbbl %ebp,%eax |
| 124 | movl %eax,(%edi) |
| 125 | |
| 126 | sbbl %eax,%eax |
| 127 | negl %eax |
| 128 | |
| 129 | popl %ebx |
| 130 | cfi_adjust_cfa_offset (-4) |
| 131 | cfi_restore (ebx) |
| 132 | popl %ebp |
| 133 | cfi_adjust_cfa_offset (-4) |
| 134 | cfi_restore (ebp) |
| 135 | popl %esi |
| 136 | cfi_adjust_cfa_offset (-4) |
| 137 | cfi_restore (esi) |
| 138 | popl %edi |
| 139 | cfi_adjust_cfa_offset (-4) |
| 140 | cfi_restore (edi) |
| 141 | |
| 142 | ret |
| 143 | END (__mpn_sub_n) |
| 144 | |