1 | /* Generic helpers for evaluating polynomials with various schemes. |
2 | |
3 | Copyright (C) 2023-2024 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | |
21 | #ifndef VTYPE |
22 | # error Cannot use poly_generic without defining VTYPE |
23 | #endif |
24 | #ifndef VWRAP |
25 | # error Cannot use poly_generic without defining VWRAP |
26 | #endif |
27 | #ifndef FMA |
28 | # error Cannot use poly_generic without defining FMA |
29 | #endif |
30 | |
31 | static inline VTYPE VWRAP (pairwise_poly_3) (VTYPE x, VTYPE x2, |
32 | const VTYPE *poly) |
33 | { |
34 | /* At order 3, Estrin and Pairwise Horner are identical. */ |
35 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
36 | VTYPE p23 = FMA (poly[3], x, poly[2]); |
37 | return FMA (p23, x2, p01); |
38 | } |
39 | |
40 | static inline VTYPE VWRAP (estrin_4) (VTYPE x, VTYPE x2, VTYPE x4, |
41 | const VTYPE *poly) |
42 | { |
43 | VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); |
44 | return FMA (poly[4], x4, p03); |
45 | } |
46 | static inline VTYPE VWRAP (estrin_5) (VTYPE x, VTYPE x2, VTYPE x4, |
47 | const VTYPE *poly) |
48 | { |
49 | VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); |
50 | VTYPE p45 = FMA (poly[5], x, poly[4]); |
51 | return FMA (p45, x4, p03); |
52 | } |
53 | static inline VTYPE VWRAP (estrin_6) (VTYPE x, VTYPE x2, VTYPE x4, |
54 | const VTYPE *poly) |
55 | { |
56 | VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); |
57 | VTYPE p45 = FMA (poly[5], x, poly[4]); |
58 | VTYPE p46 = FMA (poly[6], x2, p45); |
59 | return FMA (p46, x4, p03); |
60 | } |
61 | static inline VTYPE VWRAP (estrin_7) (VTYPE x, VTYPE x2, VTYPE x4, |
62 | const VTYPE *poly) |
63 | { |
64 | VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); |
65 | VTYPE p47 = VWRAP (pairwise_poly_3) (x, x2, poly + 4); |
66 | return FMA (p47, x4, p03); |
67 | } |
68 | static inline VTYPE VWRAP (estrin_8) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
69 | const VTYPE *poly) |
70 | { |
71 | return FMA (poly[8], x8, VWRAP (estrin_7) (x, x2, x4, poly)); |
72 | } |
73 | static inline VTYPE VWRAP (estrin_9) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
74 | const VTYPE *poly) |
75 | { |
76 | VTYPE p89 = FMA (poly[9], x, poly[8]); |
77 | return FMA (p89, x8, VWRAP (estrin_7) (x, x2, x4, poly)); |
78 | } |
79 | static inline VTYPE VWRAP (estrin_10) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
80 | const VTYPE *poly) |
81 | { |
82 | VTYPE p89 = FMA (poly[9], x, poly[8]); |
83 | VTYPE p8_10 = FMA (poly[10], x2, p89); |
84 | return FMA (p8_10, x8, VWRAP (estrin_7) (x, x2, x4, poly)); |
85 | } |
86 | static inline VTYPE VWRAP (estrin_11) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
87 | const VTYPE *poly) |
88 | { |
89 | VTYPE p8_11 = VWRAP (pairwise_poly_3) (x, x2, poly + 8); |
90 | return FMA (p8_11, x8, VWRAP (estrin_7) (x, x2, x4, poly)); |
91 | } |
92 | static inline VTYPE VWRAP (estrin_12) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
93 | const VTYPE *poly) |
94 | { |
95 | return FMA (VWRAP (estrin_4) (x, x2, x4, poly + 8), x8, |
96 | VWRAP (estrin_7) (x, x2, x4, poly)); |
97 | } |
98 | static inline VTYPE VWRAP (estrin_13) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
99 | const VTYPE *poly) |
100 | { |
101 | return FMA (VWRAP (estrin_5) (x, x2, x4, poly + 8), x8, |
102 | VWRAP (estrin_7) (x, x2, x4, poly)); |
103 | } |
104 | static inline VTYPE VWRAP (estrin_14) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
105 | const VTYPE *poly) |
106 | { |
107 | return FMA (VWRAP (estrin_6) (x, x2, x4, poly + 8), x8, |
108 | VWRAP (estrin_7) (x, x2, x4, poly)); |
109 | } |
110 | static inline VTYPE VWRAP (estrin_15) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
111 | const VTYPE *poly) |
112 | { |
113 | return FMA (VWRAP (estrin_7) (x, x2, x4, poly + 8), x8, |
114 | VWRAP (estrin_7) (x, x2, x4, poly)); |
115 | } |
116 | static inline VTYPE VWRAP (estrin_16) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
117 | VTYPE x16, const VTYPE *poly) |
118 | { |
119 | return FMA (poly[16], x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); |
120 | } |
121 | static inline VTYPE VWRAP (estrin_17) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
122 | VTYPE x16, const VTYPE *poly) |
123 | { |
124 | VTYPE p16_17 = FMA (poly[17], x, poly[16]); |
125 | return FMA (p16_17, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); |
126 | } |
127 | static inline VTYPE VWRAP (estrin_18) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
128 | VTYPE x16, const VTYPE *poly) |
129 | { |
130 | VTYPE p16_17 = FMA (poly[17], x, poly[16]); |
131 | VTYPE p16_18 = FMA (poly[18], x2, p16_17); |
132 | return FMA (p16_18, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); |
133 | } |
134 | static inline VTYPE VWRAP (estrin_19) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, |
135 | VTYPE x16, const VTYPE *poly) |
136 | { |
137 | VTYPE p16_19 = VWRAP (pairwise_poly_3) (x, x2, poly + 16); |
138 | return FMA (p16_19, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); |
139 | } |
140 | |
141 | static inline VTYPE VWRAP (horner_3) (VTYPE x, const VTYPE *poly) |
142 | { |
143 | VTYPE p = FMA (poly[3], x, poly[2]); |
144 | p = FMA (x, p, poly[1]); |
145 | p = FMA (x, p, poly[0]); |
146 | return p; |
147 | } |
148 | static inline VTYPE VWRAP (horner_4) (VTYPE x, const VTYPE *poly) |
149 | { |
150 | VTYPE p = FMA (poly[4], x, poly[3]); |
151 | p = FMA (x, p, poly[2]); |
152 | p = FMA (x, p, poly[1]); |
153 | p = FMA (x, p, poly[0]); |
154 | return p; |
155 | } |
156 | static inline VTYPE VWRAP (horner_5) (VTYPE x, const VTYPE *poly) |
157 | { |
158 | return FMA (x, VWRAP (horner_4) (x, poly + 1), poly[0]); |
159 | } |
160 | static inline VTYPE VWRAP (horner_6) (VTYPE x, const VTYPE *poly) |
161 | { |
162 | return FMA (x, VWRAP (horner_5) (x, poly + 1), poly[0]); |
163 | } |
164 | static inline VTYPE VWRAP (horner_7) (VTYPE x, const VTYPE *poly) |
165 | { |
166 | return FMA (x, VWRAP (horner_6) (x, poly + 1), poly[0]); |
167 | } |
168 | static inline VTYPE VWRAP (horner_8) (VTYPE x, const VTYPE *poly) |
169 | { |
170 | return FMA (x, VWRAP (horner_7) (x, poly + 1), poly[0]); |
171 | } |
172 | static inline VTYPE VWRAP (horner_9) (VTYPE x, const VTYPE *poly) |
173 | { |
174 | return FMA (x, VWRAP (horner_8) (x, poly + 1), poly[0]); |
175 | } |
176 | static inline VTYPE VWRAP (horner_10) (VTYPE x, const VTYPE *poly) |
177 | { |
178 | return FMA (x, VWRAP (horner_9) (x, poly + 1), poly[0]); |
179 | } |
180 | static inline VTYPE VWRAP (horner_11) (VTYPE x, const VTYPE *poly) |
181 | { |
182 | return FMA (x, VWRAP (horner_10) (x, poly + 1), poly[0]); |
183 | } |
184 | static inline VTYPE VWRAP (horner_12) (VTYPE x, const VTYPE *poly) |
185 | { |
186 | return FMA (x, VWRAP (horner_11) (x, poly + 1), poly[0]); |
187 | } |
188 | |
189 | static inline VTYPE VWRAP (pw_horner_4) (VTYPE x, VTYPE x2, const VTYPE *poly) |
190 | { |
191 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
192 | VTYPE p23 = FMA (poly[3], x, poly[2]); |
193 | VTYPE p; |
194 | p = FMA (x2, poly[4], p23); |
195 | p = FMA (x2, p, p01); |
196 | return p; |
197 | } |
198 | static inline VTYPE VWRAP (pw_horner_5) (VTYPE x, VTYPE x2, const VTYPE *poly) |
199 | { |
200 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
201 | VTYPE p23 = FMA (poly[3], x, poly[2]); |
202 | VTYPE p45 = FMA (poly[5], x, poly[4]); |
203 | VTYPE p; |
204 | p = FMA (x2, p45, p23); |
205 | p = FMA (x2, p, p01); |
206 | return p; |
207 | } |
208 | static inline VTYPE VWRAP (pw_horner_6) (VTYPE x, VTYPE x2, const VTYPE *poly) |
209 | { |
210 | VTYPE p26 = VWRAP (pw_horner_4) (x, x2, poly + 2); |
211 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
212 | return FMA (x2, p26, p01); |
213 | } |
214 | static inline VTYPE VWRAP (pw_horner_7) (VTYPE x, VTYPE x2, const VTYPE *poly) |
215 | { |
216 | VTYPE p27 = VWRAP (pw_horner_5) (x, x2, poly + 2); |
217 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
218 | return FMA (x2, p27, p01); |
219 | } |
220 | static inline VTYPE VWRAP (pw_horner_8) (VTYPE x, VTYPE x2, const VTYPE *poly) |
221 | { |
222 | VTYPE p28 = VWRAP (pw_horner_6) (x, x2, poly + 2); |
223 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
224 | return FMA (x2, p28, p01); |
225 | } |
226 | static inline VTYPE VWRAP (pw_horner_9) (VTYPE x, VTYPE x2, const VTYPE *poly) |
227 | { |
228 | VTYPE p29 = VWRAP (pw_horner_7) (x, x2, poly + 2); |
229 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
230 | return FMA (x2, p29, p01); |
231 | } |
232 | static inline VTYPE VWRAP (pw_horner_10) (VTYPE x, VTYPE x2, const VTYPE *poly) |
233 | { |
234 | VTYPE p2_10 = VWRAP (pw_horner_8) (x, x2, poly + 2); |
235 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
236 | return FMA (x2, p2_10, p01); |
237 | } |
238 | static inline VTYPE VWRAP (pw_horner_11) (VTYPE x, VTYPE x2, const VTYPE *poly) |
239 | { |
240 | VTYPE p2_11 = VWRAP (pw_horner_9) (x, x2, poly + 2); |
241 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
242 | return FMA (x2, p2_11, p01); |
243 | } |
244 | static inline VTYPE VWRAP (pw_horner_12) (VTYPE x, VTYPE x2, const VTYPE *poly) |
245 | { |
246 | VTYPE p2_12 = VWRAP (pw_horner_10) (x, x2, poly + 2); |
247 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
248 | return FMA (x2, p2_12, p01); |
249 | } |
250 | static inline VTYPE VWRAP (pw_horner_13) (VTYPE x, VTYPE x2, const VTYPE *poly) |
251 | { |
252 | VTYPE p2_13 = VWRAP (pw_horner_11) (x, x2, poly + 2); |
253 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
254 | return FMA (x2, p2_13, p01); |
255 | } |
256 | static inline VTYPE VWRAP (pw_horner_14) (VTYPE x, VTYPE x2, const VTYPE *poly) |
257 | { |
258 | VTYPE p2_14 = VWRAP (pw_horner_12) (x, x2, poly + 2); |
259 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
260 | return FMA (x2, p2_14, p01); |
261 | } |
262 | static inline VTYPE VWRAP (pw_horner_15) (VTYPE x, VTYPE x2, const VTYPE *poly) |
263 | { |
264 | VTYPE p2_15 = VWRAP (pw_horner_13) (x, x2, poly + 2); |
265 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
266 | return FMA (x2, p2_15, p01); |
267 | } |
268 | static inline VTYPE VWRAP (pw_horner_16) (VTYPE x, VTYPE x2, const VTYPE *poly) |
269 | { |
270 | VTYPE p2_16 = VWRAP (pw_horner_14) (x, x2, poly + 2); |
271 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
272 | return FMA (x2, p2_16, p01); |
273 | } |
274 | static inline VTYPE VWRAP (pw_horner_17) (VTYPE x, VTYPE x2, const VTYPE *poly) |
275 | { |
276 | VTYPE p2_17 = VWRAP (pw_horner_15) (x, x2, poly + 2); |
277 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
278 | return FMA (x2, p2_17, p01); |
279 | } |
280 | static inline VTYPE VWRAP (pw_horner_18) (VTYPE x, VTYPE x2, const VTYPE *poly) |
281 | { |
282 | VTYPE p2_18 = VWRAP (pw_horner_16) (x, x2, poly + 2); |
283 | VTYPE p01 = FMA (poly[1], x, poly[0]); |
284 | return FMA (x2, p2_18, p01); |
285 | } |
286 | |