fp16.c source code [gtk/gsk/gl/fp16.c]

1	/ fp16.c*
2	*
3	* Copyright 2021 Red Hat, Inc.
4	*
5	* This library is free software; you can redistribute it and/or
6	* modify it under the terms of the GNU Lesser General Public
7	* License as published by the Free Software Foundation; either
8	* version 2.1 of the License, or (at your option) any later version.
9	*
10	* This library is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	* Lesser General Public License for more details.
14	*
15	* You should have received a copy of the GNU Lesser General Public
16	* License along with this program. If not, see <http://www.gnu.org/licenses/>.
17	*
18	* SPDX-License-Identifier: LGPL-2.1-or-later
19	*/
20
21	#include "config.h"
22
23	#include "fp16private.h"
24
25	static inline guint
26	as_uint (const float x)
27	{
28	return (guint)&x;
29	}
30
31	static inline float
32	as_float (const guint x)
33	{
34	return (float**)&x;
35	}
36
37	// IEEE-754 16-bit floating-point format (without infinity): 1-5-10
38
39	static inline float
40	half_to_float_one (const guint16 x)
41	{
42	const guint e = (x&`0x7C00`)>>`10`; // exponent
43	const guint m = (x&`0x03FF`)<<`13`; // mantissa
44	const guint v = as_uint(x: (float)m)>>`23`;
45	return as_float(x: (x&`0x8000`)<<`16` \| (e!=`0`)((e+`112`)<<`23`\|m) \| ((e==`0`)&(m!=`0`))((v-`37`)<<`23`\|((m<<(`150`-v))&`0x007FE000`)));
46	}
47
48	static inline guint16
49	float_to_half_one (const float x)
50	{
51	const guint b = as_uint(x)+`0x00001000`; // round-to-nearest-even
52	const guint e = (b&`0x7F800000`)>>`23`; // exponent
53	const guint m = b&`0x007FFFFF`; // mantissa
54	return (b&`0x80000000`)>>`16` \| (e>`112`)((((e-`112`)<<`10`)&`0x7C00`)\|m>>`13`) \| ((e<`113`)&(e>`101`))((((`0x007FF000`+m)>>(`125`-e))+`1`)>>`1`) \| (e>`143`)`0x7FFF`; // sign : normalized : denormalized : saturate*
55	}
56
57	void
58	float_to_half4_c (const float f[`4`],
59	guint16 h[`4`])
60	{
61	h[`0`] = float_to_half_one (x: f[`0`]);
62	h[`1`] = float_to_half_one (x: f[`1`]);
63	h[`2`] = float_to_half_one (x: f[`2`]);
64	h[`3`] = float_to_half_one (x: f[`3`]);
65	}
66
67	void
68	half_to_float4_c (const guint16 h[`4`],
69	float f[`4`])
70	{
71	f[`0`] = half_to_float_one (x: h[`0`]);
72	f[`1`] = half_to_float_one (x: h[`1`]);
73	f[`2`] = half_to_float_one (x: h[`2`]);
74	f[`3`] = half_to_float_one (x: h[`3`]);
75	}
76
77	void
78	float_to_half_c (const float *f,
79	guint16 *h,
80	int n)
81	{
82	for (int i = `0`; i < n; i++)
83	h[i] = float_to_half_one (x: f[i]);
84	}
85
86	void
87	half_to_float_c (const guint16 *h,
88	float *f,
89	int n)
90	{
91	for (int i = `0`; i < n; i++)
92	f[i] = half_to_float_one (x: h[i]);
93	}
94
95	#ifdef HAVE_F16C
96
97	#if defined(_MSC_VER) && !defined(__clang__)
98	/ based on info from https://walbourn.github.io/directxmath-f16c-and-fma/ /
99	static gboolean
100	have_f16c_msvc (void)
101	{
102	static gboolean result = FALSE;
103	static gsize inited = `0`;
104
105	if (g_once_init_enter (&inited))
106	{
107	int cpuinfo[`4`] = { -`1` };
108
109	__cpuid (cpuinfo, `0`);
110
111	if (cpuinfo[`0`] > `0`)
112	{
113	__cpuid (cpuinfo, `1`);
114
115	if ((cpuinfo[`2`] & `0x8000000`) != `0`)
116	result = (cpuinfo[`2`] & `0x20000000`) != `0`;
117	}
118
119	g_once_init_leave (&inited, `1`);
120	}
121
122	return result;
123	}
124
125	void
126	float_to_half4 (const float f[`4`], guint16 h[`4`])
127	{
128	if (have_f16c_msvc ())
129	float_to_half4_f16c (f, h);
130	else
131	float_to_half4_c (f, h);
132	}
133
134	void
135	half_to_float4 (const guint16 h[`4`], float f[`4`])
136	{
137	if (have_f16c_msvc ())
138	half_to_float4_f16c (h, f);
139	else
140	half_to_float4_c (h, f);
141	}
142
143	void
144	float_to_half (const float f, guint16 h, int n)
145	{
146	if (have_f16c_msvc ())
147	float_to_half_f16c (f, h, n);
148	else
149	float_to_half_c (f, h, n);
150	}
151
152	void
153	half_to_float (const guint16 h, float* f, int* n)
154	{
155	if (have_f16c_msvc ())
156	half_to_float_f16c (h, f, n);
157	else
158	half_to_float_c (h, f, n);
159	}
160
161	#else
162
163	void float_to_half4 (const float f[`4`], guint16 h[`4`]) __attribute__((ifunc ("resolve_float_to_half4")));
164	void half_to_float4 (const guint16 h[`4`], float f[`4`]) __attribute__((ifunc ("resolve_half_to_float4")));
165	void float_to_half (const float f, guint16 h, int n) __attribute__((ifunc ("resolve_float_to_half")));
166	void half_to_float (const guint16 h, float* f, int* n) __attribute__((ifunc ("resolve_half_to_float")));
167
168	static void *
169	resolve_float_to_half4 (void)
170	{
171	__builtin_cpu_init ();
172	if (__builtin_cpu_supports ("f16c"))
173	return float_to_half4_f16c;
174	else
175	return float_to_half4_c;
176	}
177
178	static void *
179	resolve_half_to_float4 (void)
180	{
181	__builtin_cpu_init ();
182	if (__builtin_cpu_supports ("f16c"))
183	return half_to_float4_f16c;
184	else
185	return half_to_float4_c;
186	}
187
188	static void *
189	resolve_float_to_half (void)
190	{
191	__builtin_cpu_init ();
192	if (__builtin_cpu_supports ("f16c"))
193	return float_to_half_f16c;
194	else
195	return float_to_half_c;
196	}
197
198	static void *
199	resolve_half_to_float (void)
200	{
201	__builtin_cpu_init ();
202	if (__builtin_cpu_supports ("f16c"))
203	return half_to_float_f16c;
204	else
205	return half_to_float_c;
206	}
207
208	#endif
209
210	#else /* ! HAVE_F16C */
211
212	#if defined(__APPLE__) \|\| (defined(_MSC_VER) && !defined(__clang__))
213	// turns out aliases don't work on Darwin nor Visual Studio
214
215	void
216	float_to_half4 (const float f[`4`],
217	guint16 h[`4`])
218	{
219	float_to_half4_c (f, h);
220	}
221
222	void
223	half_to_float4 (const guint16 h[`4`],
224	float f[`4`])
225	{
226	half_to_float4_c (h, f);
227	}
228
229	void
230	float_to_half (const float *f,
231	guint16 *h,
232	int n)
233	{
234	float_to_half_c (f, h, n);
235	}
236
237	void
238	half_to_float (const guint16 *h,
239	float *f,
240	int n)
241	{
242	half_to_float_c (h, f, n);
243	}
244
245	#else
246
247	void float_to_half4 (const float f[`4`], guint16 h[`4`]) __attribute__((alias ("float_to_half4_c")));
248	void half_to_float4 (const guint16 h[`4`], float f[`4`]) __attribute__((alias ("half_to_float4_c")));
249	void float_to_half (const float f, guint16 h, int n) __attribute__((alias ("float_to_half_c")));
250	void half_to_float (const guint16 h, float* f, int* n) __attribute__((alias ("half_to_float_c")));
251
252	#endif
253
254	#endif /* HAVE_F16C */
255

source code of gtk/gsk/gl/fp16.c