1/* fp16i.c
2 *
3 * Copyright 2021 Red Hat, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 * SPDX-License-Identifier: LGPL-2.1-or-later
19 */
20
21#include "config.h"
22
23#include "fp16private.h"
24
25#ifdef HAVE_F16C
26#include <immintrin.h>
27
28#if defined(_MSC_VER) && !defined(__clang__)
29#define CAST_M128I_P(a) (__m128i const *) a
30#else
31#define CAST_M128I_P(a) (__m128i_u const *) a
32#endif
33void
34float_to_half4_f16c (const float f[4],
35 guint16 h[4])
36{
37 __m128 s = _mm_loadu_ps (f);
38 __m128i i = _mm_cvtps_ph (s, 0);
39 _mm_storel_epi64 ((__m128i*)h, i);
40}
41
42void
43half_to_float4_f16c (const guint16 h[4],
44 float f[4])
45{
46 __m128i i = _mm_loadl_epi64 (CAST_M128I_P (h));
47 __m128 s = _mm_cvtph_ps (i);
48
49 _mm_store_ps (f, s);
50}
51
52#define ALIGNED(p, n) (GPOINTER_TO_UINT(p) % n == 0)
53void
54float_to_half_f16c (const float *f,
55 guint16 *h,
56 int n)
57{
58 __m128 s;
59 __m128i i;
60 int j;
61 const float *ff = f;
62 guint16 *hh = h;
63
64 for (j = 0; j < n; j++)
65 {
66 if (ALIGNED (ff, 16) && ALIGNED (hh, 16))
67 break;
68 ff++;
69 hh++;
70 }
71
72 float_to_half_c (f, h, j);
73
74 for (; j + 4 < n; j += 4)
75 {
76 s = _mm_loadu_ps (ff);
77 i = _mm_cvtps_ph (s, 0);
78 _mm_storel_epi64 ((__m128i*)hh, i);
79 ff += 4;
80 hh += 4;
81 }
82
83 if (j < n)
84 float_to_half_c (ff, hh, n - j);
85}
86
87void
88half_to_float_f16c (const guint16 *h,
89 float *f,
90 int n)
91{
92 __m128i i;
93 __m128 s;
94 int j;
95 const guint16 *hh = h;
96 float *ff = f;
97
98 for (j = 0; j < n; j++)
99 {
100 if (ALIGNED (ff, 16) && ALIGNED (hh, 16))
101 break;
102 ff++;
103 hh++;
104 }
105
106 half_to_float_c (h, f, j);
107
108 for (; j + 4 < n; j += 4)
109 {
110 i = _mm_loadl_epi64 (CAST_M128I_P (hh));
111 s = _mm_cvtph_ps (i);
112 _mm_store_ps (ff, s);
113 hh += 4;
114 ff += 4;
115 }
116
117 if (j < n)
118 half_to_float_c (hh, ff, n - j);
119}
120
121#endif /* HAVE_F16C */
122
123

source code of gtk/gsk/gl/fp16i.c