| 1 | /* Generic helpers for evaluating polynomials with various schemes. |
| 2 | |
| 3 | Copyright (C) 2023-2024 Free Software Foundation, Inc. |
| 4 | This file is part of the GNU C Library. |
| 5 | |
| 6 | The GNU C Library is free software; you can redistribute it and/or |
| 7 | modify it under the terms of the GNU Lesser General Public |
| 8 | License as published by the Free Software Foundation; either |
| 9 | version 2.1 of the License, or (at your option) any later version. |
| 10 | |
| 11 | The GNU C Library is distributed in the hope that it will be useful, |
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | Lesser General Public License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU Lesser General Public |
| 17 | License along with the GNU C Library; if not, see |
| 18 | <https://www.gnu.org/licenses/>. */ |
| 19 | |
| 20 | |
| 21 | #ifndef VTYPE |
| 22 | # error Cannot use poly_generic without defining VTYPE |
| 23 | #endif |
| 24 | #ifndef VWRAP |
| 25 | # error Cannot use poly_generic without defining VWRAP |
| 26 | #endif |
| 27 | #ifndef FMA |
| 28 | # error Cannot use poly_generic without defining FMA |
| 29 | #endif |
| 30 | |
| 31 | static inline VTYPE VWRAP (pairwise_poly_3) (VTYPE x, VTYPE x2, |
| 32 | const VTYPE *poly) |
| 33 | { |
| 34 | /* At order 3, Estrin and Pairwise Horner are identical. */ |
| 35 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 36 | VTYPE p23 = FMA (poly[3], x, poly[2]); |
| 37 | return FMA (p23, x2, p01); |
| 38 | } |
| 39 | |
| 40 | static inline VTYPE VWRAP (estrin_4) (VTYPE x, VTYPE x2, VTYPE x4, |
| 41 | const VTYPE *poly) |
| 42 | { |
| 43 | VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); |
| 44 | return FMA (poly[4], x4, p03); |
| 45 | } |
| 46 | static inline VTYPE VWRAP (estrin_5) (VTYPE x, VTYPE x2, VTYPE x4, |
| 47 | const VTYPE *poly) |
| 48 | { |
| 49 | VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); |
| 50 | VTYPE p45 = FMA (poly[5], x, poly[4]); |
| 51 | return FMA (p45, x4, p03); |
| 52 | } |
| 53 | static inline VTYPE VWRAP (estrin_6) (VTYPE x, VTYPE x2, VTYPE x4, |
| 54 | const VTYPE *poly) |
| 55 | { |
| 56 | VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); |
| 57 | VTYPE p45 = FMA (poly[5], x, poly[4]); |
| 58 | VTYPE p46 = FMA (poly[6], x2, p45); |
| 59 | return FMA (p46, x4, p03); |
| 60 | } |
| 61 | static inline VTYPE VWRAP (estrin_7) (VTYPE x, VTYPE x2, VTYPE x4, |
| 62 | const VTYPE *poly) |
| 63 | { |
| 64 | VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); |
| 65 | VTYPE p47 = VWRAP (pairwise_poly_3) (x, x2, poly + 4); |
| 66 | return FMA (p47, x4, p03); |
| 67 | } |
| 68 | static inline VTYPE VWRAP (estrin_8) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
| 69 | const VTYPE *poly) |
| 70 | { |
| 71 | return FMA (poly[8], x8, VWRAP (estrin_7) (x, x2, x4, poly)); |
| 72 | } |
| 73 | static inline VTYPE VWRAP (estrin_9) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
| 74 | const VTYPE *poly) |
| 75 | { |
| 76 | VTYPE p89 = FMA (poly[9], x, poly[8]); |
| 77 | return FMA (p89, x8, VWRAP (estrin_7) (x, x2, x4, poly)); |
| 78 | } |
| 79 | static inline VTYPE VWRAP (estrin_10) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
| 80 | const VTYPE *poly) |
| 81 | { |
| 82 | VTYPE p89 = FMA (poly[9], x, poly[8]); |
| 83 | VTYPE p8_10 = FMA (poly[10], x2, p89); |
| 84 | return FMA (p8_10, x8, VWRAP (estrin_7) (x, x2, x4, poly)); |
| 85 | } |
| 86 | static inline VTYPE VWRAP (estrin_11) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
| 87 | const VTYPE *poly) |
| 88 | { |
| 89 | VTYPE p8_11 = VWRAP (pairwise_poly_3) (x, x2, poly + 8); |
| 90 | return FMA (p8_11, x8, VWRAP (estrin_7) (x, x2, x4, poly)); |
| 91 | } |
| 92 | static inline VTYPE VWRAP (estrin_12) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
| 93 | const VTYPE *poly) |
| 94 | { |
| 95 | return FMA (VWRAP (estrin_4) (x, x2, x4, poly + 8), x8, |
| 96 | VWRAP (estrin_7) (x, x2, x4, poly)); |
| 97 | } |
| 98 | static inline VTYPE VWRAP (estrin_13) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
| 99 | const VTYPE *poly) |
| 100 | { |
| 101 | return FMA (VWRAP (estrin_5) (x, x2, x4, poly + 8), x8, |
| 102 | VWRAP (estrin_7) (x, x2, x4, poly)); |
| 103 | } |
| 104 | static inline VTYPE VWRAP (estrin_14) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
| 105 | const VTYPE *poly) |
| 106 | { |
| 107 | return FMA (VWRAP (estrin_6) (x, x2, x4, poly + 8), x8, |
| 108 | VWRAP (estrin_7) (x, x2, x4, poly)); |
| 109 | } |
| 110 | static inline VTYPE VWRAP (estrin_15) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
| 111 | const VTYPE *poly) |
| 112 | { |
| 113 | return FMA (VWRAP (estrin_7) (x, x2, x4, poly + 8), x8, |
| 114 | VWRAP (estrin_7) (x, x2, x4, poly)); |
| 115 | } |
| 116 | static inline VTYPE VWRAP (estrin_16) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
| 117 | VTYPE x16, const VTYPE *poly) |
| 118 | { |
| 119 | return FMA (poly[16], x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); |
| 120 | } |
| 121 | static inline VTYPE VWRAP (estrin_17) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
| 122 | VTYPE x16, const VTYPE *poly) |
| 123 | { |
| 124 | VTYPE p16_17 = FMA (poly[17], x, poly[16]); |
| 125 | return FMA (p16_17, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); |
| 126 | } |
| 127 | static inline VTYPE VWRAP (estrin_18) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
| 128 | VTYPE x16, const VTYPE *poly) |
| 129 | { |
| 130 | VTYPE p16_17 = FMA (poly[17], x, poly[16]); |
| 131 | VTYPE p16_18 = FMA (poly[18], x2, p16_17); |
| 132 | return FMA (p16_18, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); |
| 133 | } |
| 134 | static inline VTYPE VWRAP (estrin_19) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
| 135 | VTYPE x16, const VTYPE *poly) |
| 136 | { |
| 137 | VTYPE p16_19 = VWRAP (pairwise_poly_3) (x, x2, poly + 16); |
| 138 | return FMA (p16_19, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); |
| 139 | } |
| 140 | |
| 141 | static inline VTYPE VWRAP (horner_3) (VTYPE x, const VTYPE *poly) |
| 142 | { |
| 143 | VTYPE p = FMA (poly[3], x, poly[2]); |
| 144 | p = FMA (x, p, poly[1]); |
| 145 | p = FMA (x, p, poly[0]); |
| 146 | return p; |
| 147 | } |
| 148 | static inline VTYPE VWRAP (horner_4) (VTYPE x, const VTYPE *poly) |
| 149 | { |
| 150 | VTYPE p = FMA (poly[4], x, poly[3]); |
| 151 | p = FMA (x, p, poly[2]); |
| 152 | p = FMA (x, p, poly[1]); |
| 153 | p = FMA (x, p, poly[0]); |
| 154 | return p; |
| 155 | } |
| 156 | static inline VTYPE VWRAP (horner_5) (VTYPE x, const VTYPE *poly) |
| 157 | { |
| 158 | return FMA (x, VWRAP (horner_4) (x, poly + 1), poly[0]); |
| 159 | } |
| 160 | static inline VTYPE VWRAP (horner_6) (VTYPE x, const VTYPE *poly) |
| 161 | { |
| 162 | return FMA (x, VWRAP (horner_5) (x, poly + 1), poly[0]); |
| 163 | } |
| 164 | static inline VTYPE VWRAP (horner_7) (VTYPE x, const VTYPE *poly) |
| 165 | { |
| 166 | return FMA (x, VWRAP (horner_6) (x, poly + 1), poly[0]); |
| 167 | } |
| 168 | static inline VTYPE VWRAP (horner_8) (VTYPE x, const VTYPE *poly) |
| 169 | { |
| 170 | return FMA (x, VWRAP (horner_7) (x, poly + 1), poly[0]); |
| 171 | } |
| 172 | static inline VTYPE VWRAP (horner_9) (VTYPE x, const VTYPE *poly) |
| 173 | { |
| 174 | return FMA (x, VWRAP (horner_8) (x, poly + 1), poly[0]); |
| 175 | } |
| 176 | static inline VTYPE VWRAP (horner_10) (VTYPE x, const VTYPE *poly) |
| 177 | { |
| 178 | return FMA (x, VWRAP (horner_9) (x, poly + 1), poly[0]); |
| 179 | } |
| 180 | static inline VTYPE VWRAP (horner_11) (VTYPE x, const VTYPE *poly) |
| 181 | { |
| 182 | return FMA (x, VWRAP (horner_10) (x, poly + 1), poly[0]); |
| 183 | } |
| 184 | static inline VTYPE VWRAP (horner_12) (VTYPE x, const VTYPE *poly) |
| 185 | { |
| 186 | return FMA (x, VWRAP (horner_11) (x, poly + 1), poly[0]); |
| 187 | } |
| 188 | |
| 189 | static inline VTYPE VWRAP (pw_horner_4) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 190 | { |
| 191 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 192 | VTYPE p23 = FMA (poly[3], x, poly[2]); |
| 193 | VTYPE p; |
| 194 | p = FMA (x2, poly[4], p23); |
| 195 | p = FMA (x2, p, p01); |
| 196 | return p; |
| 197 | } |
| 198 | static inline VTYPE VWRAP (pw_horner_5) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 199 | { |
| 200 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 201 | VTYPE p23 = FMA (poly[3], x, poly[2]); |
| 202 | VTYPE p45 = FMA (poly[5], x, poly[4]); |
| 203 | VTYPE p; |
| 204 | p = FMA (x2, p45, p23); |
| 205 | p = FMA (x2, p, p01); |
| 206 | return p; |
| 207 | } |
| 208 | static inline VTYPE VWRAP (pw_horner_6) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 209 | { |
| 210 | VTYPE p26 = VWRAP (pw_horner_4) (x, x2, poly + 2); |
| 211 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 212 | return FMA (x2, p26, p01); |
| 213 | } |
| 214 | static inline VTYPE VWRAP (pw_horner_7) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 215 | { |
| 216 | VTYPE p27 = VWRAP (pw_horner_5) (x, x2, poly + 2); |
| 217 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 218 | return FMA (x2, p27, p01); |
| 219 | } |
| 220 | static inline VTYPE VWRAP (pw_horner_8) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 221 | { |
| 222 | VTYPE p28 = VWRAP (pw_horner_6) (x, x2, poly + 2); |
| 223 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 224 | return FMA (x2, p28, p01); |
| 225 | } |
| 226 | static inline VTYPE VWRAP (pw_horner_9) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 227 | { |
| 228 | VTYPE p29 = VWRAP (pw_horner_7) (x, x2, poly + 2); |
| 229 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 230 | return FMA (x2, p29, p01); |
| 231 | } |
| 232 | static inline VTYPE VWRAP (pw_horner_10) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 233 | { |
| 234 | VTYPE p2_10 = VWRAP (pw_horner_8) (x, x2, poly + 2); |
| 235 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 236 | return FMA (x2, p2_10, p01); |
| 237 | } |
| 238 | static inline VTYPE VWRAP (pw_horner_11) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 239 | { |
| 240 | VTYPE p2_11 = VWRAP (pw_horner_9) (x, x2, poly + 2); |
| 241 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 242 | return FMA (x2, p2_11, p01); |
| 243 | } |
| 244 | static inline VTYPE VWRAP (pw_horner_12) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 245 | { |
| 246 | VTYPE p2_12 = VWRAP (pw_horner_10) (x, x2, poly + 2); |
| 247 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 248 | return FMA (x2, p2_12, p01); |
| 249 | } |
| 250 | static inline VTYPE VWRAP (pw_horner_13) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 251 | { |
| 252 | VTYPE p2_13 = VWRAP (pw_horner_11) (x, x2, poly + 2); |
| 253 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 254 | return FMA (x2, p2_13, p01); |
| 255 | } |
| 256 | static inline VTYPE VWRAP (pw_horner_14) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 257 | { |
| 258 | VTYPE p2_14 = VWRAP (pw_horner_12) (x, x2, poly + 2); |
| 259 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 260 | return FMA (x2, p2_14, p01); |
| 261 | } |
| 262 | static inline VTYPE VWRAP (pw_horner_15) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 263 | { |
| 264 | VTYPE p2_15 = VWRAP (pw_horner_13) (x, x2, poly + 2); |
| 265 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 266 | return FMA (x2, p2_15, p01); |
| 267 | } |
| 268 | static inline VTYPE VWRAP (pw_horner_16) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 269 | { |
| 270 | VTYPE p2_16 = VWRAP (pw_horner_14) (x, x2, poly + 2); |
| 271 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 272 | return FMA (x2, p2_16, p01); |
| 273 | } |
| 274 | static inline VTYPE VWRAP (pw_horner_17) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 275 | { |
| 276 | VTYPE p2_17 = VWRAP (pw_horner_15) (x, x2, poly + 2); |
| 277 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 278 | return FMA (x2, p2_17, p01); |
| 279 | } |
| 280 | static inline VTYPE VWRAP (pw_horner_18) (VTYPE x, VTYPE x2, const VTYPE *poly) |
| 281 | { |
| 282 | VTYPE p2_18 = VWRAP (pw_horner_16) (x, x2, poly + 2); |
| 283 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
| 284 | return FMA (x2, p2_18, p01); |
| 285 | } |
| 286 | |