1 | #ifndef X86_FENV_PRIVATE_H |
2 | #define X86_FENV_PRIVATE_H 1 |
3 | |
4 | #include <bits/floatn.h> |
5 | #include <fenv.h> |
6 | #include <fpu_control.h> |
7 | |
8 | /* This file is used by both the 32- and 64-bit ports. The 64-bit port |
9 | has a field in the fenv_t for the mxcsr; the 32-bit port does not. |
10 | Instead, we (ab)use the only 32-bit field extant in the struct. */ |
11 | #ifndef __x86_64__ |
12 | # define __mxcsr __eip |
13 | #endif |
14 | |
15 | |
16 | /* All of these functions are private to libm, and are all used in pairs |
17 | to save+change the fp state and restore the original state. Thus we |
18 | need not care for both the 387 and the sse unit, only the one we're |
19 | actually using. */ |
20 | |
21 | #if defined __AVX__ || defined SSE2AVX |
22 | # define STMXCSR "vstmxcsr" |
23 | # define LDMXCSR "vldmxcsr" |
24 | #else |
25 | # define STMXCSR "stmxcsr" |
26 | # define LDMXCSR "ldmxcsr" |
27 | #endif |
28 | |
29 | static __always_inline void |
30 | libc_feholdexcept_sse (fenv_t *e) |
31 | { |
32 | unsigned int mxcsr; |
33 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
34 | e->__mxcsr = mxcsr; |
35 | mxcsr = (mxcsr | 0x1f80) & ~0x3f; |
36 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
37 | } |
38 | |
39 | static __always_inline void |
40 | libc_feholdexcept_387 (fenv_t *e) |
41 | { |
42 | /* Recall that fnstenv has a side-effect of masking exceptions. |
43 | Clobber all of the fp registers so that the TOS field is 0. */ |
44 | asm volatile ("fnstenv %0; fnclex" |
45 | : "=m" (*e) |
46 | : : "st" , "st(1)" , "st(2)" , "st(3)" , |
47 | "st(4)" , "st(5)" , "st(6)" , "st(7)" ); |
48 | } |
49 | |
50 | static __always_inline void |
51 | libc_fesetround_sse (int r) |
52 | { |
53 | unsigned int mxcsr; |
54 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
55 | mxcsr = (mxcsr & ~0x6000) | (r << 3); |
56 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
57 | } |
58 | |
59 | static __always_inline void |
60 | libc_fesetround_387 (int r) |
61 | { |
62 | fpu_control_t cw; |
63 | _FPU_GETCW (cw); |
64 | cw = (cw & ~0xc00) | r; |
65 | _FPU_SETCW (cw); |
66 | } |
67 | |
68 | static __always_inline void |
69 | libc_feholdexcept_setround_sse (fenv_t *e, int r) |
70 | { |
71 | unsigned int mxcsr; |
72 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
73 | e->__mxcsr = mxcsr; |
74 | mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3); |
75 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
76 | } |
77 | |
78 | /* Set both rounding mode and precision. A convenience function for use |
79 | by libc_feholdexcept_setround and libc_feholdexcept_setround_53bit. */ |
80 | static __always_inline void |
81 | libc_feholdexcept_setround_387_prec (fenv_t *e, int r) |
82 | { |
83 | libc_feholdexcept_387 (e); |
84 | |
85 | fpu_control_t cw = e->__control_word; |
86 | cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); |
87 | cw |= r | 0x3f; |
88 | _FPU_SETCW (cw); |
89 | } |
90 | |
91 | static __always_inline void |
92 | libc_feholdexcept_setround_387 (fenv_t *e, int r) |
93 | { |
94 | libc_feholdexcept_setround_387_prec (e, r: r | _FPU_EXTENDED); |
95 | } |
96 | |
97 | static __always_inline void |
98 | libc_feholdexcept_setround_387_53bit (fenv_t *e, int r) |
99 | { |
100 | libc_feholdexcept_setround_387_prec (e, r: r | _FPU_DOUBLE); |
101 | } |
102 | |
103 | static __always_inline int |
104 | libc_fetestexcept_sse (int e) |
105 | { |
106 | unsigned int mxcsr; |
107 | asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); |
108 | return mxcsr & e & FE_ALL_EXCEPT; |
109 | } |
110 | |
111 | static __always_inline int |
112 | libc_fetestexcept_387 (int ex) |
113 | { |
114 | fexcept_t temp; |
115 | asm volatile ("fnstsw %0" : "=a" (temp)); |
116 | return temp & ex & FE_ALL_EXCEPT; |
117 | } |
118 | |
119 | static __always_inline void |
120 | libc_fesetenv_sse (fenv_t *e) |
121 | { |
122 | asm volatile (LDMXCSR " %0" : : "m" (e->__mxcsr)); |
123 | } |
124 | |
125 | static __always_inline void |
126 | libc_fesetenv_387 (fenv_t *e) |
127 | { |
128 | /* Clobber all fp registers so that the TOS value we saved earlier is |
129 | compatible with the current state of the compiler. */ |
130 | asm volatile ("fldenv %0" |
131 | : : "m" (*e) |
132 | : "st" , "st(1)" , "st(2)" , "st(3)" , |
133 | "st(4)" , "st(5)" , "st(6)" , "st(7)" ); |
134 | } |
135 | |
136 | static __always_inline int |
137 | libc_feupdateenv_test_sse (fenv_t *e, int ex) |
138 | { |
139 | unsigned int mxcsr, old_mxcsr, cur_ex; |
140 | asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); |
141 | cur_ex = mxcsr & FE_ALL_EXCEPT; |
142 | |
143 | /* Merge current exceptions with the old environment. */ |
144 | old_mxcsr = e->__mxcsr; |
145 | mxcsr = old_mxcsr | cur_ex; |
146 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
147 | |
148 | /* Raise SIGFPE for any new exceptions since the hold. Expect that |
149 | the normal environment has all exceptions masked. */ |
150 | if (__glibc_unlikely (~(old_mxcsr >> 7) & cur_ex)) |
151 | __feraiseexcept (excepts: cur_ex); |
152 | |
153 | /* Test for exceptions raised since the hold. */ |
154 | return cur_ex & ex; |
155 | } |
156 | |
157 | static __always_inline int |
158 | libc_feupdateenv_test_387 (fenv_t *e, int ex) |
159 | { |
160 | fexcept_t cur_ex; |
161 | |
162 | /* Save current exceptions. */ |
163 | asm volatile ("fnstsw %0" : "=a" (cur_ex)); |
164 | cur_ex &= FE_ALL_EXCEPT; |
165 | |
166 | /* Reload original environment. */ |
167 | libc_fesetenv_387 (e); |
168 | |
169 | /* Merge current exceptions. */ |
170 | __feraiseexcept (excepts: cur_ex); |
171 | |
172 | /* Test for exceptions raised since the hold. */ |
173 | return cur_ex & ex; |
174 | } |
175 | |
176 | static __always_inline void |
177 | libc_feupdateenv_sse (fenv_t *e) |
178 | { |
179 | libc_feupdateenv_test_sse (e, ex: 0); |
180 | } |
181 | |
182 | static __always_inline void |
183 | libc_feupdateenv_387 (fenv_t *e) |
184 | { |
185 | libc_feupdateenv_test_387 (e, ex: 0); |
186 | } |
187 | |
188 | static __always_inline void |
189 | libc_feholdsetround_sse (fenv_t *e, int r) |
190 | { |
191 | unsigned int mxcsr; |
192 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
193 | e->__mxcsr = mxcsr; |
194 | mxcsr = (mxcsr & ~0x6000) | (r << 3); |
195 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
196 | } |
197 | |
198 | static __always_inline void |
199 | libc_feholdsetround_387_prec (fenv_t *e, int r) |
200 | { |
201 | fpu_control_t cw; |
202 | |
203 | _FPU_GETCW (cw); |
204 | e->__control_word = cw; |
205 | cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); |
206 | cw |= r; |
207 | _FPU_SETCW (cw); |
208 | } |
209 | |
210 | static __always_inline void |
211 | libc_feholdsetround_387 (fenv_t *e, int r) |
212 | { |
213 | libc_feholdsetround_387_prec (e, r: r | _FPU_EXTENDED); |
214 | } |
215 | |
216 | static __always_inline void |
217 | libc_feholdsetround_387_53bit (fenv_t *e, int r) |
218 | { |
219 | libc_feholdsetround_387_prec (e, r: r | _FPU_DOUBLE); |
220 | } |
221 | |
222 | static __always_inline void |
223 | libc_feresetround_sse (fenv_t *e) |
224 | { |
225 | unsigned int mxcsr; |
226 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
227 | mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000); |
228 | asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); |
229 | } |
230 | |
231 | static __always_inline void |
232 | libc_feresetround_387 (fenv_t *e) |
233 | { |
234 | _FPU_SETCW (e->__control_word); |
235 | } |
236 | |
237 | #ifdef __SSE_MATH__ |
238 | # define libc_feholdexceptf libc_feholdexcept_sse |
239 | # define libc_fesetroundf libc_fesetround_sse |
240 | # define libc_feholdexcept_setroundf libc_feholdexcept_setround_sse |
241 | # define libc_fetestexceptf libc_fetestexcept_sse |
242 | # define libc_fesetenvf libc_fesetenv_sse |
243 | # define libc_feupdateenv_testf libc_feupdateenv_test_sse |
244 | # define libc_feupdateenvf libc_feupdateenv_sse |
245 | # define libc_feholdsetroundf libc_feholdsetround_sse |
246 | # define libc_feresetroundf libc_feresetround_sse |
247 | #else |
248 | # define libc_feholdexceptf libc_feholdexcept_387 |
249 | # define libc_fesetroundf libc_fesetround_387 |
250 | # define libc_feholdexcept_setroundf libc_feholdexcept_setround_387 |
251 | # define libc_fetestexceptf libc_fetestexcept_387 |
252 | # define libc_fesetenvf libc_fesetenv_387 |
253 | # define libc_feupdateenv_testf libc_feupdateenv_test_387 |
254 | # define libc_feupdateenvf libc_feupdateenv_387 |
255 | # define libc_feholdsetroundf libc_feholdsetround_387 |
256 | # define libc_feresetroundf libc_feresetround_387 |
257 | #endif /* __SSE_MATH__ */ |
258 | |
259 | #ifdef __SSE2_MATH__ |
260 | # define libc_feholdexcept libc_feholdexcept_sse |
261 | # define libc_fesetround libc_fesetround_sse |
262 | # define libc_feholdexcept_setround libc_feholdexcept_setround_sse |
263 | # define libc_fetestexcept libc_fetestexcept_sse |
264 | # define libc_fesetenv libc_fesetenv_sse |
265 | # define libc_feupdateenv_test libc_feupdateenv_test_sse |
266 | # define libc_feupdateenv libc_feupdateenv_sse |
267 | # define libc_feholdsetround libc_feholdsetround_sse |
268 | # define libc_feresetround libc_feresetround_sse |
269 | #else |
270 | # define libc_feholdexcept libc_feholdexcept_387 |
271 | # define libc_fesetround libc_fesetround_387 |
272 | # define libc_feholdexcept_setround libc_feholdexcept_setround_387 |
273 | # define libc_fetestexcept libc_fetestexcept_387 |
274 | # define libc_fesetenv libc_fesetenv_387 |
275 | # define libc_feupdateenv_test libc_feupdateenv_test_387 |
276 | # define libc_feupdateenv libc_feupdateenv_387 |
277 | # define libc_feholdsetround libc_feholdsetround_387 |
278 | # define libc_feresetround libc_feresetround_387 |
279 | #endif /* __SSE2_MATH__ */ |
280 | |
281 | #define libc_feholdexceptl libc_feholdexcept_387 |
282 | #define libc_fesetroundl libc_fesetround_387 |
283 | #define libc_feholdexcept_setroundl libc_feholdexcept_setround_387 |
284 | #define libc_fetestexceptl libc_fetestexcept_387 |
285 | #define libc_fesetenvl libc_fesetenv_387 |
286 | #define libc_feupdateenv_testl libc_feupdateenv_test_387 |
287 | #define libc_feupdateenvl libc_feupdateenv_387 |
288 | #define libc_feholdsetroundl libc_feholdsetround_387 |
289 | #define libc_feresetroundl libc_feresetround_387 |
290 | |
291 | #ifndef __SSE2_MATH__ |
292 | # define libc_feholdexcept_setround_53bit libc_feholdexcept_setround_387_53bit |
293 | # define libc_feholdsetround_53bit libc_feholdsetround_387_53bit |
294 | #endif |
295 | |
296 | #ifdef __x86_64__ |
297 | /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on |
298 | x86_64, so that must be set for float128 computations. */ |
299 | # define SET_RESTORE_ROUNDF128(RM) \ |
300 | SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse) |
301 | # define libc_feholdexcept_setroundf128 libc_feholdexcept_setround_sse |
302 | # define libc_feupdateenv_testf128 libc_feupdateenv_test_sse |
303 | #else |
304 | /* The 387 rounding mode is used by soft-fp for 32-bit, but whether |
305 | 387 or SSE exceptions are used depends on whether libgcc was built |
306 | for SSE math, which is not known when glibc is being built. */ |
307 | # define libc_feholdexcept_setroundf128 default_libc_feholdexcept_setround |
308 | # define libc_feupdateenv_testf128 default_libc_feupdateenv_test |
309 | #endif |
310 | |
311 | /* We have support for rounding mode context. */ |
312 | #define HAVE_RM_CTX 1 |
313 | |
314 | static __always_inline void |
315 | libc_feholdexcept_setround_sse_ctx (struct rm_ctx *ctx, int r) |
316 | { |
317 | unsigned int mxcsr, new_mxcsr; |
318 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
319 | new_mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3); |
320 | |
321 | ctx->env.__mxcsr = mxcsr; |
322 | if (__glibc_unlikely (mxcsr != new_mxcsr)) |
323 | { |
324 | asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr)); |
325 | ctx->updated_status = true; |
326 | } |
327 | else |
328 | ctx->updated_status = false; |
329 | } |
330 | |
331 | /* Unconditional since we want to overwrite any exceptions that occurred in the |
332 | context. This is also why all fehold* functions unconditionally write into |
333 | ctx->env. */ |
334 | static __always_inline void |
335 | libc_fesetenv_sse_ctx (struct rm_ctx *ctx) |
336 | { |
337 | libc_fesetenv_sse (e: &ctx->env); |
338 | } |
339 | |
340 | static __always_inline void |
341 | libc_feupdateenv_sse_ctx (struct rm_ctx *ctx) |
342 | { |
343 | if (__glibc_unlikely (ctx->updated_status)) |
344 | libc_feupdateenv_test_sse (e: &ctx->env, ex: 0); |
345 | } |
346 | |
347 | static __always_inline void |
348 | libc_feholdexcept_setround_387_prec_ctx (struct rm_ctx *ctx, int r) |
349 | { |
350 | libc_feholdexcept_387 (e: &ctx->env); |
351 | |
352 | fpu_control_t cw = ctx->env.__control_word; |
353 | fpu_control_t old_cw = cw; |
354 | cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); |
355 | cw |= r | 0x3f; |
356 | |
357 | if (__glibc_unlikely (old_cw != cw)) |
358 | { |
359 | _FPU_SETCW (cw); |
360 | ctx->updated_status = true; |
361 | } |
362 | else |
363 | ctx->updated_status = false; |
364 | } |
365 | |
366 | static __always_inline void |
367 | libc_feholdexcept_setround_387_ctx (struct rm_ctx *ctx, int r) |
368 | { |
369 | libc_feholdexcept_setround_387_prec_ctx (ctx, r: r | _FPU_EXTENDED); |
370 | } |
371 | |
372 | static __always_inline void |
373 | libc_feholdexcept_setround_387_53bit_ctx (struct rm_ctx *ctx, int r) |
374 | { |
375 | libc_feholdexcept_setround_387_prec_ctx (ctx, r: r | _FPU_DOUBLE); |
376 | } |
377 | |
378 | static __always_inline void |
379 | libc_feholdsetround_387_prec_ctx (struct rm_ctx *ctx, int r) |
380 | { |
381 | fpu_control_t cw, new_cw; |
382 | |
383 | _FPU_GETCW (cw); |
384 | new_cw = cw; |
385 | new_cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); |
386 | new_cw |= r; |
387 | |
388 | ctx->env.__control_word = cw; |
389 | if (__glibc_unlikely (new_cw != cw)) |
390 | { |
391 | _FPU_SETCW (new_cw); |
392 | ctx->updated_status = true; |
393 | } |
394 | else |
395 | ctx->updated_status = false; |
396 | } |
397 | |
398 | static __always_inline void |
399 | libc_feholdsetround_387_ctx (struct rm_ctx *ctx, int r) |
400 | { |
401 | libc_feholdsetround_387_prec_ctx (ctx, r: r | _FPU_EXTENDED); |
402 | } |
403 | |
404 | static __always_inline void |
405 | libc_feholdsetround_387_53bit_ctx (struct rm_ctx *ctx, int r) |
406 | { |
407 | libc_feholdsetround_387_prec_ctx (ctx, r: r | _FPU_DOUBLE); |
408 | } |
409 | |
410 | static __always_inline void |
411 | libc_feholdsetround_sse_ctx (struct rm_ctx *ctx, int r) |
412 | { |
413 | unsigned int mxcsr, new_mxcsr; |
414 | |
415 | asm (STMXCSR " %0" : "=m" (*&mxcsr)); |
416 | new_mxcsr = (mxcsr & ~0x6000) | (r << 3); |
417 | |
418 | ctx->env.__mxcsr = mxcsr; |
419 | if (__glibc_unlikely (new_mxcsr != mxcsr)) |
420 | { |
421 | asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr)); |
422 | ctx->updated_status = true; |
423 | } |
424 | else |
425 | ctx->updated_status = false; |
426 | } |
427 | |
428 | static __always_inline void |
429 | libc_feresetround_sse_ctx (struct rm_ctx *ctx) |
430 | { |
431 | if (__glibc_unlikely (ctx->updated_status)) |
432 | libc_feresetround_sse (e: &ctx->env); |
433 | } |
434 | |
435 | static __always_inline void |
436 | libc_feresetround_387_ctx (struct rm_ctx *ctx) |
437 | { |
438 | if (__glibc_unlikely (ctx->updated_status)) |
439 | _FPU_SETCW (ctx->env.__control_word); |
440 | } |
441 | |
442 | static __always_inline void |
443 | libc_feupdateenv_387_ctx (struct rm_ctx *ctx) |
444 | { |
445 | if (__glibc_unlikely (ctx->updated_status)) |
446 | libc_feupdateenv_test_387 (e: &ctx->env, ex: 0); |
447 | } |
448 | |
449 | #ifdef __SSE_MATH__ |
450 | # define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_sse_ctx |
451 | # define libc_fesetenvf_ctx libc_fesetenv_sse_ctx |
452 | # define libc_feupdateenvf_ctx libc_feupdateenv_sse_ctx |
453 | # define libc_feholdsetroundf_ctx libc_feholdsetround_sse_ctx |
454 | # define libc_feresetroundf_ctx libc_feresetround_sse_ctx |
455 | #else |
456 | # define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_387_ctx |
457 | # define libc_feupdateenvf_ctx libc_feupdateenv_387_ctx |
458 | # define libc_feholdsetroundf_ctx libc_feholdsetround_387_ctx |
459 | # define libc_feresetroundf_ctx libc_feresetround_387_ctx |
460 | #endif /* __SSE_MATH__ */ |
461 | |
462 | #ifdef __SSE2_MATH__ |
463 | # if defined (__x86_64__) || !defined (MATH_SET_BOTH_ROUNDING_MODES) |
464 | # define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_sse_ctx |
465 | # define libc_fesetenv_ctx libc_fesetenv_sse_ctx |
466 | # define libc_feupdateenv_ctx libc_feupdateenv_sse_ctx |
467 | # define libc_feholdsetround_ctx libc_feholdsetround_sse_ctx |
468 | # define libc_feresetround_ctx libc_feresetround_sse_ctx |
469 | # else |
470 | # define libc_feholdexcept_setround_ctx default_libc_feholdexcept_setround_ctx |
471 | # define libc_fesetenv_ctx default_libc_fesetenv_ctx |
472 | # define libc_feupdateenv_ctx default_libc_feupdateenv_ctx |
473 | # define libc_feholdsetround_ctx default_libc_feholdsetround_ctx |
474 | # define libc_feresetround_ctx default_libc_feresetround_ctx |
475 | # endif |
476 | #else |
477 | # define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_387_ctx |
478 | # define libc_feupdateenv_ctx libc_feupdateenv_387_ctx |
479 | # define libc_feholdsetround_ctx libc_feholdsetround_387_ctx |
480 | # define libc_feresetround_ctx libc_feresetround_387_ctx |
481 | #endif /* __SSE2_MATH__ */ |
482 | |
483 | #define libc_feholdexcept_setroundl_ctx libc_feholdexcept_setround_387_ctx |
484 | #define libc_feupdateenvl_ctx libc_feupdateenv_387_ctx |
485 | #define libc_feholdsetroundl_ctx libc_feholdsetround_387_ctx |
486 | #define libc_feresetroundl_ctx libc_feresetround_387_ctx |
487 | |
488 | #ifndef __SSE2_MATH__ |
489 | # define libc_feholdsetround_53bit_ctx libc_feholdsetround_387_53bit_ctx |
490 | # define libc_feresetround_53bit_ctx libc_feresetround_387_ctx |
491 | #endif |
492 | |
493 | #undef __mxcsr |
494 | |
495 | #include_next <fenv_private.h> |
496 | |
497 | #endif /* X86_FENV_PRIVATE_H */ |
498 | |