fp16i.c source code [gtk/gsk/gl/fp16i.c]

1	/ fp16i.c*
2	*
3	* Copyright 2021 Red Hat, Inc.
4	*
5	* This library is free software; you can redistribute it and/or
6	* modify it under the terms of the GNU Lesser General Public
7	* License as published by the Free Software Foundation; either
8	* version 2.1 of the License, or (at your option) any later version.
9	*
10	* This library is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	* Lesser General Public License for more details.
14	*
15	* You should have received a copy of the GNU Lesser General Public
16	* License along with this program. If not, see <http://www.gnu.org/licenses/>.
17	*
18	* SPDX-License-Identifier: LGPL-2.1-or-later
19	*/
20
21	#include "config.h"
22
23	#include "fp16private.h"
24
25	#ifdef HAVE_F16C
26	#include <immintrin.h>
27
28	#if defined(_MSC_VER) && !defined(__clang__)
29	#define CAST_M128I_P(a) (__m128i const *) a
30	#else
31	#define CAST_M128I_P(a) (__m128i_u const *) a
32	#endif
33	void
34	float_to_half4_f16c (const float f[`4`],
35	guint16 h[`4`])
36	{
37	__m128 s = _mm_loadu_ps (f);
38	__m128i i = _mm_cvtps_ph (s, `0`);
39	_mm_storel_epi64 ((__m128i*)h, i);
40	}
41
42	void
43	half_to_float4_f16c (const guint16 h[`4`],
44	float f[`4`])
45	{
46	__m128i i = _mm_loadl_epi64 (CAST_M128I_P (h));
47	__m128 s = _mm_cvtph_ps (i);
48
49	_mm_store_ps (f, s);
50	}
51
52	#define ALIGNED(p, n) (GPOINTER_TO_UINT(p) % n == 0)
53	void
54	float_to_half_f16c (const float *f,
55	guint16 *h,
56	int n)
57	{
58	__m128 s;
59	__m128i i;
60	int j;
61	const float *ff = f;
62	guint16 *hh = h;
63
64	for (j = `0`; j < n; j++)
65	{
66	if (ALIGNED (ff, `16`) && ALIGNED (hh, `16`))
67	break;
68	ff++;
69	hh++;
70	}
71
72	float_to_half_c (f, h, j);
73
74	for (; j + `4` < n; j += `4`)
75	{
76	s = _mm_loadu_ps (ff);
77	i = _mm_cvtps_ph (s, `0`);
78	_mm_storel_epi64 ((__m128i*)hh, i);
79	ff += `4`;
80	hh += `4`;
81	}
82
83	if (j < n)
84	float_to_half_c (ff, hh, n - j);
85	}
86
87	void
88	half_to_float_f16c (const guint16 *h,
89	float *f,
90	int n)
91	{
92	__m128i i;
93	__m128 s;
94	int j;
95	const guint16 *hh = h;
96	float *ff = f;
97
98	for (j = `0`; j < n; j++)
99	{
100	if (ALIGNED (ff, `16`) && ALIGNED (hh, `16`))
101	break;
102	ff++;
103	hh++;
104	}
105
106	half_to_float_c (h, f, j);
107
108	for (; j + `4` < n; j += `4`)
109	{
110	i = _mm_loadl_epi64 (CAST_M128I_P (hh));
111	s = _mm_cvtph_ps (i);
112	_mm_store_ps (ff, s);
113	hh += `4`;
114	ff += `4`;
115	}
116
117	if (j < n)
118	half_to_float_c (hh, ff, n - j);
119	}
120
121	#endif /* HAVE_F16C */
122
123

source code of gtk/gsk/gl/fp16i.c