1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #ifndef QCOLORTRCLUT_P_H |
5 | #define QCOLORTRCLUT_P_H |
6 | |
7 | // |
8 | // W A R N I N G |
9 | // ------------- |
10 | // |
11 | // This file is not part of the Qt API. It exists purely as an |
12 | // implementation detail. This header file may change from version to |
13 | // version without notice, or even be removed. |
14 | // |
15 | // We mean it. |
16 | // |
17 | |
18 | #include <QtGui/private/qtguiglobal_p.h> |
19 | #include <QtGui/qrgb.h> |
20 | #include <QtGui/qrgba64.h> |
21 | #include <QtCore/private/qsimd_p.h> |
22 | |
23 | #include <cmath> |
24 | #include <memory> |
25 | |
26 | #if defined(__SSE2__) |
27 | #include <emmintrin.h> |
28 | #elif defined(__ARM_NEON__) |
29 | #include <arm_neon.h> |
30 | #endif |
31 | |
32 | QT_BEGIN_NAMESPACE |
33 | |
34 | class QColorTransferGenericFunction; |
35 | class QColorTransferFunction; |
36 | class QColorTransferTable; |
37 | class QColorTrc; |
38 | |
39 | class Q_GUI_EXPORT QColorTrcLut |
40 | { |
41 | public: |
42 | static constexpr uint32_t ShiftUp = 4; // Amount to shift up from 1->255 |
43 | static constexpr uint32_t ShiftDown = (8 - ShiftUp); // Amount to shift down from 1->65280 |
44 | static constexpr qsizetype Resolution = (1 << ShiftUp) * 255; // Number of entries in table |
45 | |
46 | enum Direction { |
47 | ToLinear = 1, |
48 | FromLinear = 2, |
49 | BiLinear = ToLinear | FromLinear |
50 | }; |
51 | |
52 | static std::shared_ptr<QColorTrcLut> fromGamma(float gamma, Direction dir = BiLinear); |
53 | static std::shared_ptr<QColorTrcLut> fromTrc(const QColorTrc &trc, Direction dir = BiLinear); |
54 | void setFromGamma(float gamma, Direction dir = BiLinear); |
55 | void setFromTransferFunction(const QColorTransferFunction &transFn, Direction dir = BiLinear); |
56 | void setFromTransferTable(const QColorTransferTable &transTable, Direction dir = BiLinear); |
57 | void setFromTransferGenericFunction(const QColorTransferGenericFunction &transfn, Direction dir); |
58 | void setFromTrc(const QColorTrc &trc, Direction dir); |
59 | |
60 | // The following methods all convert opaque or unpremultiplied colors: |
61 | |
62 | QRgba64 toLinear64(QRgb rgb32) const |
63 | { |
64 | #if defined(__SSE2__) |
65 | __m128i v = _mm_cvtsi32_si128(a: rgb32); |
66 | v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128()); |
67 | const __m128i vidx = _mm_slli_epi16(a: v, count: ShiftUp); |
68 | const int ridx = _mm_extract_epi16(vidx, 2); |
69 | const int gidx = _mm_extract_epi16(vidx, 1); |
70 | const int bidx = _mm_extract_epi16(vidx, 0); |
71 | v = _mm_slli_epi16(a: v, count: 8); // a * 256 |
72 | v = _mm_insert_epi16(v, m_toLinear[ridx], 0); |
73 | v = _mm_insert_epi16(v, m_toLinear[gidx], 1); |
74 | v = _mm_insert_epi16(v, m_toLinear[bidx], 2); |
75 | v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
76 | QRgba64 rgba64; |
77 | _mm_storel_epi64(p: reinterpret_cast<__m128i *>(&rgba64), a: v); |
78 | return rgba64; |
79 | #elif defined(__ARM_NEON__) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
80 | uint8x8_t v8 = vreinterpret_u8_u32(vmov_n_u32(rgb32)); |
81 | uint16x4_t v16 = vget_low_u16(vmovl_u8(v8)); |
82 | const uint16x4_t vidx = vshl_n_u16(v16, ShiftUp); |
83 | const int ridx = vget_lane_u16(vidx, 2); |
84 | const int gidx = vget_lane_u16(vidx, 1); |
85 | const int bidx = vget_lane_u16(vidx, 0); |
86 | v16 = vshl_n_u16(v16, 8); // a * 256 |
87 | v16 = vset_lane_u16(m_toLinear[ridx], v16, 0); |
88 | v16 = vset_lane_u16(m_toLinear[gidx], v16, 1); |
89 | v16 = vset_lane_u16(m_toLinear[bidx], v16, 2); |
90 | v16 = vadd_u16(v16, vshr_n_u16(v16, 8)); |
91 | return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v16), 0)); |
92 | #else |
93 | uint r = m_toLinear[qRed(rgb32) << ShiftUp]; |
94 | uint g = m_toLinear[qGreen(rgb32) << ShiftUp]; |
95 | uint b = m_toLinear[qBlue(rgb32) << ShiftUp]; |
96 | r = r + (r >> 8); |
97 | g = g + (g >> 8); |
98 | b = b + (b >> 8); |
99 | return QRgba64::fromRgba64(r, g, b, qAlpha(rgb32) * 257); |
100 | #endif |
101 | } |
102 | QRgba64 toLinear64(QRgba64) const = delete; |
103 | |
104 | QRgb toLinear(QRgb rgb32) const |
105 | { |
106 | return convertWithTable(rgb32, table: m_toLinear.get()); |
107 | } |
108 | |
109 | QRgba64 toLinear(QRgba64 rgb64) const |
110 | { |
111 | return convertWithTable(rgb64, table: m_toLinear.get()); |
112 | } |
113 | |
114 | float u8ToLinearF32(int c) const |
115 | { |
116 | ushort v = m_toLinear[c << ShiftUp]; |
117 | return v * (1.0f / (255*256)); |
118 | } |
119 | |
120 | float u16ToLinearF32(int c) const |
121 | { |
122 | c -= (c >> 8); |
123 | ushort v = m_toLinear[c >> ShiftDown]; |
124 | return v * (1.0f / (255*256)); |
125 | } |
126 | |
127 | float toLinear(float f) const |
128 | { |
129 | ushort v = m_toLinear[(int)(f * Resolution + 0.5f)]; |
130 | return v * (1.0f / (255*256)); |
131 | } |
132 | |
133 | QRgb fromLinear64(QRgba64 rgb64) const |
134 | { |
135 | #if defined(__SSE2__) |
136 | __m128i v = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(&rgb64)); |
137 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
138 | const __m128i vidx = _mm_srli_epi16(a: v, count: ShiftDown); |
139 | const int ridx = _mm_extract_epi16(vidx, 0); |
140 | const int gidx = _mm_extract_epi16(vidx, 1); |
141 | const int bidx = _mm_extract_epi16(vidx, 2); |
142 | v = _mm_insert_epi16(v, m_fromLinear[ridx], 2); |
143 | v = _mm_insert_epi16(v, m_fromLinear[gidx], 1); |
144 | v = _mm_insert_epi16(v, m_fromLinear[bidx], 0); |
145 | v = _mm_add_epi16(a: v, b: _mm_set1_epi16(w: 0x80)); |
146 | v = _mm_srli_epi16(a: v, count: 8); |
147 | v = _mm_packus_epi16(a: v, b: v); |
148 | return _mm_cvtsi128_si32(a: v); |
149 | #elif defined(__ARM_NEON__) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
150 | uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64)); |
151 | v = vsub_u16(v, vshr_n_u16(v, 8)); |
152 | const uint16x4_t vidx = vshr_n_u16(v, ShiftDown); |
153 | const int ridx = vget_lane_u16(vidx, 0); |
154 | const int gidx = vget_lane_u16(vidx, 1); |
155 | const int bidx = vget_lane_u16(vidx, 2); |
156 | v = vset_lane_u16(m_fromLinear[ridx], v, 2); |
157 | v = vset_lane_u16(m_fromLinear[gidx], v, 1); |
158 | v = vset_lane_u16(m_fromLinear[bidx], v, 0); |
159 | uint8x8_t v8 = vrshrn_n_u16(vcombine_u16(v, v), 8); |
160 | return vget_lane_u32(vreinterpret_u32_u8(v8), 0); |
161 | #else |
162 | uint a = rgb64.alpha(); |
163 | uint r = rgb64.red(); |
164 | uint g = rgb64.green(); |
165 | uint b = rgb64.blue(); |
166 | a = a - (a >> 8); |
167 | r = r - (r >> 8); |
168 | g = g - (g >> 8); |
169 | b = b - (b >> 8); |
170 | a = (a + 0x80) >> 8; |
171 | r = (m_fromLinear[r >> ShiftDown] + 0x80) >> 8; |
172 | g = (m_fromLinear[g >> ShiftDown] + 0x80) >> 8; |
173 | b = (m_fromLinear[b >> ShiftDown] + 0x80) >> 8; |
174 | return (a << 24) | (r << 16) | (g << 8) | b; |
175 | #endif |
176 | } |
177 | |
178 | QRgb fromLinear(QRgb rgb32) const |
179 | { |
180 | return convertWithTable(rgb32, table: m_fromLinear.get()); |
181 | } |
182 | |
183 | QRgba64 fromLinear(QRgba64 rgb64) const |
184 | { |
185 | return convertWithTable(rgb64, table: m_fromLinear.get()); |
186 | } |
187 | |
188 | int u8FromLinearF32(float f) const |
189 | { |
190 | ushort v = m_fromLinear[(int)(f * Resolution + 0.5f)]; |
191 | return (v + 0x80) >> 8; |
192 | } |
193 | int u16FromLinearF32(float f) const |
194 | { |
195 | ushort v = m_fromLinear[(int)(f * Resolution + 0.5f)]; |
196 | return v + (v >> 8); |
197 | } |
198 | float fromLinear(float f) const |
199 | { |
200 | ushort v = m_fromLinear[(int)(f * Resolution + 0.5f)]; |
201 | return v * (1.0f / (255*256)); |
202 | } |
203 | |
204 | // We translate to 0-65280 (255*256) instead to 0-65535 to make simple |
205 | // shifting an accurate conversion. |
206 | // We translate from 0->Resolution (4080 = 255*16) for the same speed up, |
207 | // and to keep the tables small enough to fit in most inner caches. |
208 | std::unique_ptr<ushort[]> m_toLinear; // [0->Resolution] -> [0-65280] |
209 | std::unique_ptr<ushort[]> m_fromLinear; // [0->Resolution] -> [0-65280] |
210 | ushort m_unclampedToLinear = Resolution; |
211 | |
212 | private: |
213 | QColorTrcLut() = default; |
214 | |
215 | static std::shared_ptr<QColorTrcLut> create(); |
216 | |
217 | Q_ALWAYS_INLINE static QRgb convertWithTable(QRgb rgb32, const ushort *table) |
218 | { |
219 | const int r = (table[qRed(rgb: rgb32) << ShiftUp] + 0x80) >> 8; |
220 | const int g = (table[qGreen(rgb: rgb32) << ShiftUp] + 0x80) >> 8; |
221 | const int b = (table[qBlue(rgb: rgb32) << ShiftUp] + 0x80) >> 8; |
222 | return (rgb32 & 0xff000000) | (r << 16) | (g << 8) | b; |
223 | } |
224 | Q_ALWAYS_INLINE static QRgba64 convertWithTable(QRgba64 rgb64, const ushort *table) |
225 | { |
226 | #if defined(__SSE2__) |
227 | __m128i v = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(&rgb64)); |
228 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
229 | const __m128i vidx = _mm_srli_epi16(a: v, count: ShiftDown); |
230 | const int ridx = _mm_extract_epi16(vidx, 2); |
231 | const int gidx = _mm_extract_epi16(vidx, 1); |
232 | const int bidx = _mm_extract_epi16(vidx, 0); |
233 | v = _mm_insert_epi16(v, table[ridx], 2); |
234 | v = _mm_insert_epi16(v, table[gidx], 1); |
235 | v = _mm_insert_epi16(v, table[bidx], 0); |
236 | v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
237 | QRgba64 rgba64; |
238 | _mm_storel_epi64(p: reinterpret_cast<__m128i *>(&rgba64), a: v); |
239 | return rgba64; |
240 | #elif defined(__ARM_NEON__) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
241 | uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64)); |
242 | v = vsub_u16(v, vshr_n_u16(v, 8)); |
243 | const uint16x4_t vidx = vshr_n_u16(v, ShiftDown); |
244 | const int ridx = vget_lane_u16(vidx, 2); |
245 | const int gidx = vget_lane_u16(vidx, 1); |
246 | const int bidx = vget_lane_u16(vidx, 0); |
247 | v = vset_lane_u16(table[ridx], v, 2); |
248 | v = vset_lane_u16(table[gidx], v, 1); |
249 | v = vset_lane_u16(table[bidx], v, 0); |
250 | v = vadd_u16(v, vshr_n_u16(v, 8)); |
251 | return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v), 0)); |
252 | #else |
253 | ushort r = rgb64.red(); |
254 | ushort g = rgb64.green(); |
255 | ushort b = rgb64.blue(); |
256 | r = r - (r >> 8); |
257 | g = g - (g >> 8); |
258 | b = b - (b >> 8); |
259 | r = table[r >> ShiftDown]; |
260 | g = table[g >> ShiftDown]; |
261 | b = table[b >> ShiftDown]; |
262 | r = r + (r >> 8); |
263 | g = g + (g >> 8); |
264 | b = b + (b >> 8); |
265 | return QRgba64::fromRgba64(r, g, b, rgb64.alpha()); |
266 | #endif |
267 | } |
268 | }; |
269 | |
270 | QT_END_NAMESPACE |
271 | |
272 | #endif // QCOLORTRCLUT_P_H |
273 | |