1/* fp16.c
2 *
3 * Copyright 2021 Red Hat, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 * SPDX-License-Identifier: LGPL-2.1-or-later
19 */
20
21#include "config.h"
22
23#include "fp16private.h"
24
25static inline guint
26as_uint (const float x)
27{
28 return *(guint*)&x;
29}
30
31static inline float
32as_float (const guint x)
33{
34 return *(float*)&x;
35}
36
37// IEEE-754 16-bit floating-point format (without infinity): 1-5-10
38
39static inline float
40half_to_float_one (const guint16 x)
41{
42 const guint e = (x&0x7C00)>>10; // exponent
43 const guint m = (x&0x03FF)<<13; // mantissa
44 const guint v = as_uint(x: (float)m)>>23;
45 return as_float(x: (x&0x8000)<<16 | (e!=0)*((e+112)<<23|m) | ((e==0)&(m!=0))*((v-37)<<23|((m<<(150-v))&0x007FE000)));
46}
47
48static inline guint16
49float_to_half_one (const float x)
50{
51 const guint b = as_uint(x)+0x00001000; // round-to-nearest-even
52 const guint e = (b&0x7F800000)>>23; // exponent
53 const guint m = b&0x007FFFFF; // mantissa
54 return (b&0x80000000)>>16 | (e>112)*((((e-112)<<10)&0x7C00)|m>>13) | ((e<113)&(e>101))*((((0x007FF000+m)>>(125-e))+1)>>1) | (e>143)*0x7FFF; // sign : normalized : denormalized : saturate
55}
56
57void
58float_to_half4_c (const float f[4],
59 guint16 h[4])
60{
61 h[0] = float_to_half_one (x: f[0]);
62 h[1] = float_to_half_one (x: f[1]);
63 h[2] = float_to_half_one (x: f[2]);
64 h[3] = float_to_half_one (x: f[3]);
65}
66
67void
68half_to_float4_c (const guint16 h[4],
69 float f[4])
70{
71 f[0] = half_to_float_one (x: h[0]);
72 f[1] = half_to_float_one (x: h[1]);
73 f[2] = half_to_float_one (x: h[2]);
74 f[3] = half_to_float_one (x: h[3]);
75}
76
77void
78float_to_half_c (const float *f,
79 guint16 *h,
80 int n)
81{
82 for (int i = 0; i < n; i++)
83 h[i] = float_to_half_one (x: f[i]);
84}
85
86void
87half_to_float_c (const guint16 *h,
88 float *f,
89 int n)
90{
91 for (int i = 0; i < n; i++)
92 f[i] = half_to_float_one (x: h[i]);
93}
94
95#ifdef HAVE_F16C
96
97#if defined(_MSC_VER) && !defined(__clang__)
98/* based on info from https://walbourn.github.io/directxmath-f16c-and-fma/ */
99static gboolean
100have_f16c_msvc (void)
101{
102 static gboolean result = FALSE;
103 static gsize inited = 0;
104
105 if (g_once_init_enter (&inited))
106 {
107 int cpuinfo[4] = { -1 };
108
109 __cpuid (cpuinfo, 0);
110
111 if (cpuinfo[0] > 0)
112 {
113 __cpuid (cpuinfo, 1);
114
115 if ((cpuinfo[2] & 0x8000000) != 0)
116 result = (cpuinfo[2] & 0x20000000) != 0;
117 }
118
119 g_once_init_leave (&inited, 1);
120 }
121
122 return result;
123}
124
125void
126float_to_half4 (const float f[4], guint16 h[4])
127{
128 if (have_f16c_msvc ())
129 float_to_half4_f16c (f, h);
130 else
131 float_to_half4_c (f, h);
132}
133
134void
135half_to_float4 (const guint16 h[4], float f[4])
136{
137 if (have_f16c_msvc ())
138 half_to_float4_f16c (h, f);
139 else
140 half_to_float4_c (h, f);
141}
142
143void
144float_to_half (const float *f, guint16 *h, int n)
145{
146 if (have_f16c_msvc ())
147 float_to_half_f16c (f, h, n);
148 else
149 float_to_half_c (f, h, n);
150}
151
152void
153half_to_float (const guint16 *h, float *f, int n)
154{
155 if (have_f16c_msvc ())
156 half_to_float_f16c (h, f, n);
157 else
158 half_to_float_c (h, f, n);
159}
160
161#else
162
163void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4")));
164void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4")));
165void float_to_half (const float *f, guint16 *h, int n) __attribute__((ifunc ("resolve_float_to_half")));
166void half_to_float (const guint16 *h, float *f, int n) __attribute__((ifunc ("resolve_half_to_float")));
167
168static void *
169resolve_float_to_half4 (void)
170{
171 __builtin_cpu_init ();
172 if (__builtin_cpu_supports ("f16c"))
173 return float_to_half4_f16c;
174 else
175 return float_to_half4_c;
176}
177
178static void *
179resolve_half_to_float4 (void)
180{
181 __builtin_cpu_init ();
182 if (__builtin_cpu_supports ("f16c"))
183 return half_to_float4_f16c;
184 else
185 return half_to_float4_c;
186}
187
188static void *
189resolve_float_to_half (void)
190{
191 __builtin_cpu_init ();
192 if (__builtin_cpu_supports ("f16c"))
193 return float_to_half_f16c;
194 else
195 return float_to_half_c;
196}
197
198static void *
199resolve_half_to_float (void)
200{
201 __builtin_cpu_init ();
202 if (__builtin_cpu_supports ("f16c"))
203 return half_to_float_f16c;
204 else
205 return half_to_float_c;
206}
207
208#endif
209
210#else /* ! HAVE_F16C */
211
212#if defined(__APPLE__) || (defined(_MSC_VER) && !defined(__clang__))
213// turns out aliases don't work on Darwin nor Visual Studio
214
215void
216float_to_half4 (const float f[4],
217 guint16 h[4])
218{
219 float_to_half4_c (f, h);
220}
221
222void
223half_to_float4 (const guint16 h[4],
224 float f[4])
225{
226 half_to_float4_c (h, f);
227}
228
229void
230float_to_half (const float *f,
231 guint16 *h,
232 int n)
233{
234 float_to_half_c (f, h, n);
235}
236
237void
238half_to_float (const guint16 *h,
239 float *f,
240 int n)
241{
242 half_to_float_c (h, f, n);
243}
244
245#else
246
247void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((alias ("float_to_half4_c")));
248void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((alias ("half_to_float4_c")));
249void float_to_half (const float *f, guint16 *h, int n) __attribute__((alias ("float_to_half_c")));
250void half_to_float (const guint16 *h, float *f, int n) __attribute__((alias ("half_to_float_c")));
251
252#endif
253
254#endif /* HAVE_F16C */
255

source code of gtk/gsk/gl/fp16.c