1 | //=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Set source and destination precision setting |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef FP_TRUNC_HEADER |
14 | #define |
15 | |
16 | #include "int_lib.h" |
17 | |
18 | #if defined SRC_SINGLE |
19 | typedef float src_t; |
20 | typedef uint32_t src_rep_t; |
21 | #define SRC_REP_C UINT32_C |
22 | static const int srcBits = sizeof(src_t) * CHAR_BIT; |
23 | static const int srcSigFracBits = 23; |
24 | // -1 accounts for the sign bit. |
25 | // srcBits - srcSigFracBits - 1 |
26 | static const int srcExpBits = 8; |
27 | |
28 | #elif defined SRC_DOUBLE |
29 | typedef double src_t; |
30 | typedef uint64_t src_rep_t; |
31 | #define SRC_REP_C UINT64_C |
32 | static const int srcBits = sizeof(src_t) * CHAR_BIT; |
33 | static const int srcSigFracBits = 52; |
34 | // -1 accounts for the sign bit. |
35 | // srcBits - srcSigFracBits - 1 |
36 | static const int srcExpBits = 11; |
37 | |
38 | #elif defined SRC_80 |
39 | typedef xf_float src_t; |
40 | typedef __uint128_t src_rep_t; |
41 | #define SRC_REP_C (__uint128_t) |
42 | // sign bit, exponent and significand occupy the lower 80 bits. |
43 | static const int srcBits = 80; |
44 | static const int srcSigFracBits = 63; |
45 | // -1 accounts for the sign bit. |
46 | // -1 accounts for the explicitly stored integer bit. |
47 | // srcBits - srcSigFracBits - 1 - 1 |
48 | static const int srcExpBits = 15; |
49 | |
50 | #elif defined SRC_QUAD |
51 | typedef tf_float src_t; |
52 | typedef __uint128_t src_rep_t; |
53 | #define SRC_REP_C (__uint128_t) |
54 | static const int srcBits = sizeof(src_t) * CHAR_BIT; |
55 | static const int srcSigFracBits = 112; |
56 | // -1 accounts for the sign bit. |
57 | // srcBits - srcSigFracBits - 1 |
58 | static const int srcExpBits = 15; |
59 | |
60 | #else |
61 | #error Source should be double precision or quad precision! |
62 | #endif // end source precision |
63 | |
64 | #if defined DST_DOUBLE |
65 | typedef double dst_t; |
66 | typedef uint64_t dst_rep_t; |
67 | #define DST_REP_C UINT64_C |
68 | static const int dstBits = sizeof(dst_t) * CHAR_BIT; |
69 | static const int dstSigFracBits = 52; |
70 | // -1 accounts for the sign bit. |
71 | // dstBits - dstSigFracBits - 1 |
72 | static const int dstExpBits = 11; |
73 | |
74 | #elif defined DST_80 |
75 | typedef xf_float dst_t; |
76 | typedef __uint128_t dst_rep_t; |
77 | #define DST_REP_C (__uint128_t) |
78 | static const int dstBits = 80; |
79 | static const int dstSigFracBits = 63; |
80 | // -1 accounts for the sign bit. |
81 | // -1 accounts for the explicitly stored integer bit. |
82 | // dstBits - dstSigFracBits - 1 - 1 |
83 | static const int dstExpBits = 15; |
84 | |
85 | #elif defined DST_SINGLE |
86 | typedef float dst_t; |
87 | typedef uint32_t dst_rep_t; |
88 | #define DST_REP_C UINT32_C |
89 | static const int dstBits = sizeof(dst_t) * CHAR_BIT; |
90 | static const int dstSigFracBits = 23; |
91 | // -1 accounts for the sign bit. |
92 | // dstBits - dstSigFracBits - 1 |
93 | static const int dstExpBits = 8; |
94 | |
95 | #elif defined DST_HALF |
96 | #ifdef COMPILER_RT_HAS_FLOAT16 |
97 | typedef _Float16 dst_t; |
98 | #else |
99 | typedef uint16_t dst_t; |
100 | #endif |
101 | typedef uint16_t dst_rep_t; |
102 | #define DST_REP_C UINT16_C |
103 | static const int dstBits = sizeof(dst_t) * CHAR_BIT; |
104 | static const int dstSigFracBits = 10; |
105 | // -1 accounts for the sign bit. |
106 | // dstBits - dstSigFracBits - 1 |
107 | static const int dstExpBits = 5; |
108 | |
109 | #elif defined DST_BFLOAT |
110 | typedef __bf16 dst_t; |
111 | typedef uint16_t dst_rep_t; |
112 | #define DST_REP_C UINT16_C |
113 | static const int dstBits = sizeof(dst_t) * CHAR_BIT; |
114 | static const int dstSigFracBits = 7; |
115 | // -1 accounts for the sign bit. |
116 | // dstBits - dstSigFracBits - 1 |
117 | static const int dstExpBits = 8; |
118 | |
119 | #else |
120 | #error Destination should be single precision or double precision! |
121 | #endif // end destination precision |
122 | |
123 | // TODO: These helper routines should be placed into fp_lib.h |
124 | // Currently they depend on macros/constants defined above. |
125 | |
126 | static inline src_rep_t (src_rep_t x) { |
127 | const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1); |
128 | return (x & srcSignMask) >> (srcBits - 1); |
129 | } |
130 | |
131 | static inline src_rep_t (src_rep_t x) { |
132 | const int srcSigBits = srcBits - 1 - srcExpBits; |
133 | const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits; |
134 | return (x & srcExpMask) >> srcSigBits; |
135 | } |
136 | |
137 | static inline src_rep_t (src_rep_t x) { |
138 | const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1; |
139 | return x & srcSigFracMask; |
140 | } |
141 | |
142 | static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) { |
143 | dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac; |
144 | // Set the explicit integer bit in F80 if present. |
145 | if (dstBits == 80 && exp) { |
146 | result |= (DST_REP_C(1) << dstSigFracBits); |
147 | } |
148 | return result; |
149 | } |
150 | |
151 | // End of specialization parameters. Two helper routines for conversion to and |
152 | // from the representation of floating-point data as integer values follow. |
153 | |
154 | static inline src_rep_t srcToRep(src_t x) { |
155 | const union { |
156 | src_t f; |
157 | src_rep_t i; |
158 | } rep = {.f = x}; |
159 | return rep.i; |
160 | } |
161 | |
162 | static inline dst_t dstFromRep(dst_rep_t x) { |
163 | const union { |
164 | dst_t f; |
165 | dst_rep_t i; |
166 | } rep = {.i = x}; |
167 | return rep.f; |
168 | } |
169 | |
170 | #endif // FP_TRUNC_HEADER |
171 | |