1 | // SPDX-License-Identifier: Apache-2.0 |
2 | // ---------------------------------------------------------------------------- |
3 | // Copyright 2011-2020 Arm Limited |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
6 | // use this file except in compliance with the License. You may obtain a copy |
7 | // of the License at: |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, software |
12 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
13 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
14 | // License for the specific language governing permissions and limitations |
15 | // under the License. |
16 | // ---------------------------------------------------------------------------- |
17 | |
18 | /* |
19 | * This module implements a variety of mathematical data types and library |
20 | * functions used by the codec. |
21 | */ |
22 | |
23 | #ifndef ASTC_MATHLIB_H_INCLUDED |
24 | #define ASTC_MATHLIB_H_INCLUDED |
25 | |
26 | #include <cmath> |
27 | #include <cstdint> |
28 | |
29 | #ifndef M_PI |
30 | #define M_PI 3.14159265358979323846 |
31 | #endif |
32 | |
33 | /* ============================================================================ |
34 | Fast math library; note that many of the higher-order functions in this set |
35 | use approximations which are less accurate, but faster, than <cmath> standard |
36 | library equivalents. |
37 | |
38 | Note: Many of these are not necessarily faster than simple C versions when |
39 | used on a single scalar value, but are included for testing purposes as most |
40 | have an option based on SSE intrinsics and therefore provide an obvious route |
41 | to future vectorization. |
42 | ============================================================================ */ |
43 | |
44 | // We support scalar versions of many maths functions which use SSE intrinsics |
45 | // as an "optimized" path, using just one lane from the SIMD hardware. In |
46 | // reality these are often slower than standard C due to setup and scheduling |
47 | // overheads, and the fact that we're not offsetting that cost with any actual |
48 | // vectorization. |
49 | // |
50 | // These variants are only included as a means to test that the accuracy of an |
51 | // SSE implementation would be acceptable before refactoring code paths to use |
52 | // an actual vectorized implementation which gets some advantage from SSE. It |
53 | // is therefore expected that the code will go *slower* with this macro |
54 | // set to 1 ... |
55 | #define USE_SCALAR_SSE 0 |
56 | |
57 | // These are namespaced to avoid colliding with C standard library functions. |
58 | namespace astc |
59 | { |
60 | |
61 | /** |
62 | * @brief Test if a float value is a nan. |
63 | * |
64 | * @param val The value test. |
65 | * |
66 | * @return Zero is not a NaN, non-zero otherwise. |
67 | */ |
68 | static inline int isnan(float val) |
69 | { |
70 | return val != val; |
71 | } |
72 | |
73 | /** |
74 | * @brief Initialize the seed structure for a random number generator. |
75 | * |
76 | * Important note: For the purposes of ASTC we want sets of random numbers to |
77 | * use the codec, but we want the same seed value across instances and threads |
78 | * to ensure that image output is stable across compressor runs and across |
79 | * platforms. Every PRNG created by this call will therefore return the same |
80 | * sequence of values ... |
81 | * |
82 | * @param state The state structure to initialize. |
83 | */ |
84 | void rand_init(uint64_t state[2]); |
85 | |
86 | /** |
87 | * @brief Return the next random number from the generator. |
88 | * |
89 | * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the |
90 | * public-domain implementation given by David Blackman & Sebastiano Vigna at |
91 | * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c |
92 | * |
93 | * @param state The state structure to use/update. |
94 | */ |
95 | uint64_t rand(uint64_t state[2]); |
96 | |
97 | } |
98 | |
99 | /* ============================================================================ |
100 | Utility vector template classes with basic operations |
101 | ============================================================================ */ |
102 | |
103 | template <typename T> class vtype4 |
104 | { |
105 | public: |
106 | T x, y, z, w; |
107 | vtype4() {} |
108 | vtype4(T p, T q, T r, T s) : x(p), y(q), z(r), w(s) {} |
109 | vtype4(const vtype4 & p) : x(p.x), y(p.y), z(p.z), w(p.w) {} |
110 | vtype4 &operator =(const vtype4 &s) { |
111 | this->x = s.x; |
112 | this->y = s.y; |
113 | this->z = s.z; |
114 | this->w = s.w; |
115 | return *this; |
116 | } |
117 | }; |
118 | |
119 | typedef vtype4<int> int4; |
120 | typedef vtype4<unsigned int> uint4; |
121 | |
122 | static inline int4 operator+(int4 p, int4 q) { return int4( p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); } |
123 | static inline uint4 operator+(uint4 p, uint4 q) { return uint4( p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); } |
124 | |
125 | static inline int4 operator-(int4 p, int4 q) { return int4( p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); } |
126 | static inline uint4 operator-(uint4 p, uint4 q) { return uint4( p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); } |
127 | |
128 | static inline int4 operator*(int4 p, int4 q) { return int4( p.x * q.x, p.y * q.y, p.z * q.z, p.w * q.w ); } |
129 | static inline uint4 operator*(uint4 p, uint4 q) { return uint4( p.x * q.x, p.y * q.y, p.z * q.z, p.w * q.w ); } |
130 | |
131 | static inline int4 operator*(int4 p, int q) { return int4( p.x * q, p.y * q, p.z * q, p.w * q ); } |
132 | static inline uint4 operator*(uint4 p, uint32_t q) { return uint4( p.x * q, p.y * q, p.z * q, p.w * q ); } |
133 | |
134 | static inline int4 operator*(int p, int4 q) { return q * p; } |
135 | static inline uint4 operator*(uint32_t p, uint4 q) { return q * p; } |
136 | |
137 | #ifndef MIN |
138 | #define MIN(x,y) ((x)<(y)?(x):(y)) |
139 | #endif |
140 | |
141 | #ifndef MAX |
142 | #define MAX(x,y) ((x)>(y)?(x):(y)) |
143 | #endif |
144 | |
145 | /* ============================================================================ |
146 | Softfloat library with fp32 and fp16 conversion functionality. |
147 | ============================================================================ */ |
148 | typedef union if32_ |
149 | { |
150 | uint32_t u; |
151 | int32_t s; |
152 | float f; |
153 | } if32; |
154 | |
155 | uint32_t clz32(uint32_t p); |
156 | |
157 | /* sized soft-float types. These are mapped to the sized integer |
158 | types of C99, instead of C's floating-point types; this is because |
159 | the library needs to maintain exact, bit-level control on all |
160 | operations on these data types. */ |
161 | typedef uint16_t sf16; |
162 | typedef uint32_t sf32; |
163 | |
164 | /* widening float->float conversions */ |
165 | sf32 sf16_to_sf32(sf16); |
166 | |
167 | float sf16_to_float(sf16); |
168 | |
169 | #endif |
170 | |