1 | //=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Set source and destination precision setting |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef FP_TRUNC_HEADER |
14 | #define |
15 | |
16 | #include "int_lib.h" |
17 | |
18 | #if defined SRC_SINGLE |
19 | typedef float src_t; |
20 | typedef uint32_t src_rep_t; |
21 | #define SRC_REP_C UINT32_C |
22 | static const int srcBits = sizeof(src_t) * CHAR_BIT; |
23 | static const int srcSigFracBits = 23; |
24 | // -1 accounts for the sign bit. |
25 | // srcBits - srcSigFracBits - 1 |
26 | static const int srcExpBits = 8; |
27 | |
28 | #elif defined SRC_DOUBLE |
29 | typedef double src_t; |
30 | typedef uint64_t src_rep_t; |
31 | #define SRC_REP_C UINT64_C |
32 | static const int srcBits = sizeof(src_t) * CHAR_BIT; |
33 | static const int srcSigFracBits = 52; |
34 | // -1 accounts for the sign bit. |
35 | // srcBits - srcSigFracBits - 1 |
36 | static const int srcExpBits = 11; |
37 | |
38 | #elif defined SRC_QUAD |
39 | typedef tf_float src_t; |
40 | typedef __uint128_t src_rep_t; |
41 | #define SRC_REP_C (__uint128_t) |
42 | static const int srcBits = sizeof(src_t) * CHAR_BIT; |
43 | static const int srcSigFracBits = 112; |
44 | // -1 accounts for the sign bit. |
45 | // srcBits - srcSigFracBits - 1 |
46 | static const int srcExpBits = 15; |
47 | |
48 | #else |
49 | #error Source should be double precision or quad precision! |
50 | #endif // end source precision |
51 | |
52 | #if defined DST_DOUBLE |
53 | typedef double dst_t; |
54 | typedef uint64_t dst_rep_t; |
55 | #define DST_REP_C UINT64_C |
56 | static const int dstBits = sizeof(dst_t) * CHAR_BIT; |
57 | static const int dstSigFracBits = 52; |
58 | // -1 accounts for the sign bit. |
59 | // dstBits - dstSigFracBits - 1 |
60 | static const int dstExpBits = 11; |
61 | |
62 | #elif defined DST_80 |
63 | typedef xf_float dst_t; |
64 | typedef __uint128_t dst_rep_t; |
65 | #define DST_REP_C (__uint128_t) |
66 | static const int dstBits = 80; |
67 | static const int dstSigFracBits = 63; |
68 | // -1 accounts for the sign bit. |
69 | // -1 accounts for the explicitly stored integer bit. |
70 | // dstBits - dstSigFracBits - 1 - 1 |
71 | static const int dstExpBits = 15; |
72 | |
73 | #elif defined DST_SINGLE |
74 | typedef float dst_t; |
75 | typedef uint32_t dst_rep_t; |
76 | #define DST_REP_C UINT32_C |
77 | static const int dstBits = sizeof(dst_t) * CHAR_BIT; |
78 | static const int dstSigFracBits = 23; |
79 | // -1 accounts for the sign bit. |
80 | // dstBits - dstSigFracBits - 1 |
81 | static const int dstExpBits = 8; |
82 | |
83 | #elif defined DST_HALF |
84 | #ifdef COMPILER_RT_HAS_FLOAT16 |
85 | typedef _Float16 dst_t; |
86 | #else |
87 | typedef uint16_t dst_t; |
88 | #endif |
89 | typedef uint16_t dst_rep_t; |
90 | #define DST_REP_C UINT16_C |
91 | static const int dstBits = sizeof(dst_t) * CHAR_BIT; |
92 | static const int dstSigFracBits = 10; |
93 | // -1 accounts for the sign bit. |
94 | // dstBits - dstSigFracBits - 1 |
95 | static const int dstExpBits = 5; |
96 | |
97 | #elif defined DST_BFLOAT |
98 | typedef __bf16 dst_t; |
99 | typedef uint16_t dst_rep_t; |
100 | #define DST_REP_C UINT16_C |
101 | static const int dstBits = sizeof(dst_t) * CHAR_BIT; |
102 | static const int dstSigFracBits = 7; |
103 | // -1 accounts for the sign bit. |
104 | // dstBits - dstSigFracBits - 1 |
105 | static const int dstExpBits = 8; |
106 | |
107 | #else |
108 | #error Destination should be single precision or double precision! |
109 | #endif // end destination precision |
110 | |
111 | // TODO: These helper routines should be placed into fp_lib.h |
112 | // Currently they depend on macros/constants defined above. |
113 | |
114 | static inline src_rep_t (src_rep_t x) { |
115 | const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1); |
116 | return (x & srcSignMask) >> (srcBits - 1); |
117 | } |
118 | |
119 | static inline src_rep_t (src_rep_t x) { |
120 | const int srcSigBits = srcBits - 1 - srcExpBits; |
121 | const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits; |
122 | return (x & srcExpMask) >> srcSigBits; |
123 | } |
124 | |
125 | static inline src_rep_t (src_rep_t x) { |
126 | const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1; |
127 | return x & srcSigFracMask; |
128 | } |
129 | |
130 | static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) { |
131 | dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac; |
132 | // Set the explicit integer bit in F80 if present. |
133 | if (dstBits == 80 && exp) { |
134 | result |= (DST_REP_C(1) << dstSigFracBits); |
135 | } |
136 | return result; |
137 | } |
138 | |
139 | // End of specialization parameters. Two helper routines for conversion to and |
140 | // from the representation of floating-point data as integer values follow. |
141 | |
142 | static inline src_rep_t srcToRep(src_t x) { |
143 | const union { |
144 | src_t f; |
145 | src_rep_t i; |
146 | } rep = {.f = x}; |
147 | return rep.i; |
148 | } |
149 | |
150 | static inline dst_t dstFromRep(dst_rep_t x) { |
151 | const union { |
152 | dst_t f; |
153 | dst_rep_t i; |
154 | } rep = {.i = x}; |
155 | return rep.f; |
156 | } |
157 | |
158 | #endif // FP_TRUNC_HEADER |
159 | |