Half.hpp source code [flutter_engine/third_party/swiftshader/src/System/Half.hpp]

1	// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#ifndef sw_Half_hpp
16	#define sw_Half_hpp
17
18	#include "Math.hpp"
19
20	#include <algorithm>
21	#include <cmath>
22
23	namespace sw {
24
25	class half
26	{
27	public:
28	half() = default;
29	explicit half(float f);
30
31	operator float() const;
32
33	half &operator=(float f);
34
35	private:
36	unsigned short fp16i;
37	};
38
39	inline half shortAsHalf(short s)
40	{
41	union
42	{
43	half h;
44	short s;
45	} hs;
46
47	hs.s = s;
48
49	return hs.h;
50	}
51
52	class RGB9E5
53	{
54	union
55	{
56	struct
57	{
58	unsigned int R : `9`;
59	unsigned int G : `9`;
60	unsigned int B : `9`;
61	unsigned int E : `5`;
62	};
63	uint32_t packed;
64	};
65
66	public:
67	RGB9E5(const float rgb[`3`])
68	: RGB9E5 (rgb[`0`], rgb[`1`], rgb[`2`])
69	{
70	}
71
72	RGB9E5(float r, float g, float b)
73	{
74	// Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
75
76	// B is the exponent bias (15)
77	constexpr int g_sharedexp_bias = `15`;
78
79	// N is the number of mantissa bits per component (9)
80	constexpr int g_sharedexp_mantissabits = `9`;
81
82	// Emax is the maximum allowed biased exponent value (31)
83	constexpr int g_sharedexp_maxexponent = `31`;
84
85	constexpr float g_sharedexp_max =
86	((static_cast<float>(`1` << g_sharedexp_mantissabits) - `1`) /
87	static_cast<float>(`1` << g_sharedexp_mantissabits)) *
88	static_cast<float>(`1` << (g_sharedexp_maxexponent - g_sharedexp_bias));
89
90	// Clamp components to valid range. NaN becomes 0.
91	const float red_c = std::min(a: !(r > `0`) ? `0` : r, b: g_sharedexp_max);
92	const float green_c = std::min(a: !(g > `0`) ? `0` : g, b: g_sharedexp_max);
93	const float blue_c = std::min(a: !(b > `0`) ? `0` : b, b: g_sharedexp_max);
94
95	// We're reducing the mantissa to 9 bits, so we must round up if the next
96	// bit is 1. In other words add 0.5 to the new mantissa's position and
97	// allow overflow into the exponent so we can scale correctly.
98	constexpr int half = `1` << (`23` - g_sharedexp_mantissabits);
99	const float red_r = bit_cast<float>(source: bit_cast<int>(source: red_c) + half);
100	const float green_r = bit_cast<float>(source: bit_cast<int>(source: green_c) + half);
101	const float blue_r = bit_cast<float>(source: bit_cast<int>(source: blue_c) + half);
102
103	// The largest component determines the shared exponent. It can't be lower
104	// than 0 (after bias subtraction) so also limit to the mimimum representable.
105	constexpr float min_s = `0.5f` / (`1` << g_sharedexp_bias);
106	float max_s = std::max(a: std::max(a: red_r, b: green_r), b: std::max(a: blue_r, b: min_s));
107
108	// Obtain the reciprocal of the shared exponent by inverting the bits,
109	// and scale by the new mantissa's size. Note that the IEEE-754 single-precision
110	// format has an implicit leading 1, but this shared component format does not.
111	float scale = bit_cast<float>(source: (bit_cast<int>(source: max_s) & `0x7F800000`) ^ `0x7F800000`) * (`1` << (g_sharedexp_mantissabits - `2`));
112
113	R = static_cast<unsigned int>(round(lcpp_x: red_c * scale));
114	G = static_cast<unsigned int>(round(lcpp_x: green_c * scale));
115	B = static_cast<unsigned int>(round(lcpp_x: blue_c * scale));
116	E = (bit_cast<unsigned int>(source: max_s) >> `23`) - `127` + `15` + `1`;
117	}
118
119	operator unsigned int() const
120	{
121	return packed;
122	}
123
124	void toRGB16F(half rgb[`3`]) const
125	{
126	constexpr int offset = `24`; // Exponent bias (15) + number of mantissa bits per component (9) = 24
127
128	const float factor = (`1u` << E) * (`1.0f` / (`1` << offset));
129	rgb[`0`] = half (R * factor);
130	rgb[`1`] = half (G * factor);
131	rgb[`2`] = half (B * factor);
132	}
133	};
134
135	class R11G11B10F
136	{
137	union
138	{
139	struct
140	{
141	unsigned int R : `11`;
142	unsigned int G : `11`;
143	unsigned int B : `10`;
144	};
145	uint32_t packed;
146	};
147
148	public:
149	R11G11B10F(const float rgb[`3`])
150	{
151	R = float32ToFloat11(fp32: rgb[`0`]);
152	G = float32ToFloat11(fp32: rgb[`1`]);
153	B = float32ToFloat10(fp32: rgb[`2`]);
154	}
155
156	operator unsigned int() const
157	{
158	return packed;
159	}
160
161	void toRGB16F(half rgb[`3`]) const
162	{
163	rgb[`0`] = float11ToFloat16(fp11: R);
164	rgb[`1`] = float11ToFloat16(fp11: G);
165	rgb[`2`] = float10ToFloat16(fp10: B);
166	}
167
168	static inline half float11ToFloat16(unsigned short fp11)
169	{
170	return shortAsHalf(s: fp11 << `4`); // Sign bit 0
171	}
172
173	static inline half float10ToFloat16(unsigned short fp10)
174	{
175	return shortAsHalf(s: fp10 << `5`); // Sign bit 0
176	}
177
178	static inline unsigned short float32ToFloat11(float fp32)
179	{
180	const unsigned int float32MantissaMask = `0x7FFFFF`;
181	const unsigned int float32ExponentMask = `0x7F800000`;
182	const unsigned int float32SignMask = `0x80000000`;
183	const unsigned int float32ValueMask = ~float32SignMask;
184	const unsigned int float32ExponentFirstBit = `23`;
185	const unsigned int float32ExponentBias = `127`;
186
187	const unsigned short float11Max = `0x7BF`;
188	const unsigned short float11MantissaMask = `0x3F`;
189	const unsigned short float11ExponentMask = `0x7C0`;
190	const unsigned short float11BitMask = `0x7FF`;
191	const unsigned int float11ExponentBias = `14`;
192
193	const unsigned int float32Maxfloat11 = `0x477E0000`;
194	const unsigned int float32MinNormfloat11 = `0x38800000`;
195	const unsigned int float32MinDenormfloat11 = `0x35000080`;
196
197	const unsigned int float32Bits = bit_cast<unsigned int>(source: fp32);
198	const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
199
200	unsigned int float32Val = float32Bits & float32ValueMask;
201
202	if((float32Val & float32ExponentMask) == float32ExponentMask)
203	{
204	// INF or NAN
205	if((float32Val & float32MantissaMask) != `0`)
206	{
207	return float11ExponentMask \|
208	(((float32Val >> `17`) \| (float32Val >> `11`) \| (float32Val >> `6`) \| (float32Val)) &
209	float11MantissaMask);
210	}
211	else if(float32Sign)
212	{
213	// -INF is clamped to 0 since float11 is positive only
214	return `0`;
215	}
216	else
217	{
218	return float11ExponentMask;
219	}
220	}
221	else if(float32Sign)
222	{
223	// float11 is positive only, so clamp to zero
224	return `0`;
225	}
226	else if(float32Val > float32Maxfloat11)
227	{
228	// The number is too large to be represented as a float11, set to max
229	return float11Max;
230	}
231	else if(float32Val < float32MinDenormfloat11)
232	{
233	// The number is too small to be represented as a denormalized float11, set to 0
234	return `0`;
235	}
236	else
237	{
238	if(float32Val < float32MinNormfloat11)
239	{
240	// The number is too small to be represented as a normalized float11
241	// Convert it to a denormalized value.
242	const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
243	(float32Val >> float32ExponentFirstBit);
244	float32Val =
245	((`1` << float32ExponentFirstBit) \| (float32Val & float32MantissaMask)) >> shift;
246	}
247	else
248	{
249	// Rebias the exponent to represent the value as a normalized float11
250	float32Val += `0xC8000000`;
251	}
252
253	return ((float32Val + `0xFFFF` + ((float32Val >> `17`) & `1`)) >> `17`) & float11BitMask;
254	}
255	}
256
257	static inline unsigned short float32ToFloat10(float fp32)
258	{
259	const unsigned int float32MantissaMask = `0x7FFFFF`;
260	const unsigned int float32ExponentMask = `0x7F800000`;
261	const unsigned int float32SignMask = `0x80000000`;
262	const unsigned int float32ValueMask = ~float32SignMask;
263	const unsigned int float32ExponentFirstBit = `23`;
264	const unsigned int float32ExponentBias = `127`;
265
266	const unsigned short float10Max = `0x3DF`;
267	const unsigned short float10MantissaMask = `0x1F`;
268	const unsigned short float10ExponentMask = `0x3E0`;
269	const unsigned short float10BitMask = `0x3FF`;
270	const unsigned int float10ExponentBias = `14`;
271
272	const unsigned int float32Maxfloat10 = `0x477C0000`;
273	const unsigned int float32MinNormfloat10 = `0x38800000`;
274	const unsigned int float32MinDenormfloat10 = `0x35800040`;
275
276	const unsigned int float32Bits = bit_cast<unsigned int>(source: fp32);
277	const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
278
279	unsigned int float32Val = float32Bits & float32ValueMask;
280
281	if((float32Val & float32ExponentMask) == float32ExponentMask)
282	{
283	// INF or NAN
284	if((float32Val & float32MantissaMask) != `0`)
285	{
286	return float10ExponentMask \|
287	(((float32Val >> `18`) \| (float32Val >> `13`) \| (float32Val >> `3`) \| (float32Val)) &
288	float10MantissaMask);
289	}
290	else if(float32Sign)
291	{
292	// -INF is clamped to 0 since float10 is positive only
293	return `0`;
294	}
295	else
296	{
297	return float10ExponentMask;
298	}
299	}
300	else if(float32Sign)
301	{
302	// float10 is positive only, so clamp to zero
303	return `0`;
304	}
305	else if(float32Val > float32Maxfloat10)
306	{
307	// The number is too large to be represented as a float10, set to max
308	return float10Max;
309	}
310	else if(float32Val < float32MinDenormfloat10)
311	{
312	// The number is too small to be represented as a denormalized float10, set to 0
313	return `0`;
314	}
315	else
316	{
317	if(float32Val < float32MinNormfloat10)
318	{
319	// The number is too small to be represented as a normalized float10
320	// Convert it to a denormalized value.
321	const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
322	(float32Val >> float32ExponentFirstBit);
323	float32Val =
324	((`1` << float32ExponentFirstBit) \| (float32Val & float32MantissaMask)) >> shift;
325	}
326	else
327	{
328	// Rebias the exponent to represent the value as a normalized float10
329	float32Val += `0xC8000000`;
330	}
331
332	return ((float32Val + `0x1FFFF` + ((float32Val >> `18`) & `1`)) >> `18`) & float10BitMask;
333	}
334	}
335	};
336
337	} // namespace sw
338
339	#endif // sw_Half_hpp
340

Provided by KDAB

Learn more about Flutter for embedded and desktop on industrialflutter.com

Definitions

source code of flutter_engine/third_party/swiftshader/src/System/Half.hpp