1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtGui module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | #ifndef QCOLORTRCLUT_P_H |
41 | #define QCOLORTRCLUT_P_H |
42 | |
43 | // |
44 | // W A R N I N G |
45 | // ------------- |
46 | // |
47 | // This file is not part of the Qt API. It exists purely as an |
48 | // implementation detail. This header file may change from version to |
49 | // version without notice, or even be removed. |
50 | // |
51 | // We mean it. |
52 | // |
53 | |
54 | #include <QtGui/private/qtguiglobal_p.h> |
55 | #include <QtCore/qsharedpointer.h> |
56 | #include <QtGui/qrgb.h> |
57 | #include <QtGui/qrgba64.h> |
58 | |
59 | #include <cmath> |
60 | |
61 | #if defined(__SSE2__) |
62 | #include <emmintrin.h> |
63 | #elif defined(__ARM_NEON__) || defined(__ARM_NEON) |
64 | #include <arm_neon.h> |
65 | #endif |
66 | |
67 | QT_BEGIN_NAMESPACE |
68 | |
69 | class QColorTransferFunction; |
70 | class QColorTransferTable; |
71 | |
72 | class Q_GUI_EXPORT QColorTrcLut : public QEnableSharedFromThis<QColorTrcLut> |
73 | { |
74 | public: |
75 | static QColorTrcLut *fromGamma(qreal gamma); |
76 | static QColorTrcLut *fromTransferFunction(const QColorTransferFunction &transfn); |
77 | static QColorTrcLut *fromTransferTable(const QColorTransferTable &transTable); |
78 | |
79 | // The following methods all convert opaque or unpremultiplied colors: |
80 | |
81 | QRgba64 toLinear64(QRgb rgb32) const |
82 | { |
83 | #if defined(__SSE2__) |
84 | __m128i v = _mm_cvtsi32_si128(a: rgb32); |
85 | v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128()); |
86 | const __m128i vidx = _mm_slli_epi16(a: v, count: 4); |
87 | const int ridx = _mm_extract_epi16(vidx, 2); |
88 | const int gidx = _mm_extract_epi16(vidx, 1); |
89 | const int bidx = _mm_extract_epi16(vidx, 0); |
90 | v = _mm_slli_epi16(a: v, count: 8); // a * 256 |
91 | v = _mm_insert_epi16(v, m_toLinear[ridx], 0); |
92 | v = _mm_insert_epi16(v, m_toLinear[gidx], 1); |
93 | v = _mm_insert_epi16(v, m_toLinear[bidx], 2); |
94 | v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
95 | QRgba64 rgba64; |
96 | _mm_storel_epi64(p: reinterpret_cast<__m128i *>(&rgba64), a: v); |
97 | return rgba64; |
98 | #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
99 | uint8x8_t v8 = vreinterpret_u8_u32(vmov_n_u32(rgb32)); |
100 | uint16x4_t v16 = vget_low_u16(vmovl_u8(v8)); |
101 | const uint16x4_t vidx = vshl_n_u16(v16, 4); |
102 | const int ridx = vget_lane_u16(vidx, 2); |
103 | const int gidx = vget_lane_u16(vidx, 1); |
104 | const int bidx = vget_lane_u16(vidx, 0); |
105 | v16 = vshl_n_u16(v16, 8); // a * 256 |
106 | v16 = vset_lane_u16(m_toLinear[ridx], v16, 0); |
107 | v16 = vset_lane_u16(m_toLinear[gidx], v16, 1); |
108 | v16 = vset_lane_u16(m_toLinear[bidx], v16, 2); |
109 | v16 = vadd_u16(v16, vshr_n_u16(v16, 8)); |
110 | return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v16), 0)); |
111 | #else |
112 | uint r = m_toLinear[qRed(rgb32) << 4]; |
113 | uint g = m_toLinear[qGreen(rgb32) << 4]; |
114 | uint b = m_toLinear[qBlue(rgb32) << 4]; |
115 | r = r + (r >> 8); |
116 | g = g + (g >> 8); |
117 | b = b + (b >> 8); |
118 | return QRgba64::fromRgba64(r, g, b, qAlpha(rgb32) * 257); |
119 | #endif |
120 | } |
121 | |
122 | QRgb toLinear(QRgb rgb32) const |
123 | { |
124 | return convertWithTable(rgb32, table: m_toLinear); |
125 | } |
126 | |
127 | QRgba64 toLinear(QRgba64 rgb64) const |
128 | { |
129 | return convertWithTable(rgb64, table: m_toLinear); |
130 | } |
131 | |
132 | float u8ToLinearF32(int c) const |
133 | { |
134 | ushort v = m_toLinear[c << 4]; |
135 | return v * (1.0f / (255*256)); |
136 | } |
137 | |
138 | float u16ToLinearF32(int c) const |
139 | { |
140 | c -= (c >> 8); |
141 | ushort v = m_toLinear[c >> 4]; |
142 | return v * (1.0f / (255*256)); |
143 | } |
144 | |
145 | float toLinear(float f) const |
146 | { |
147 | ushort v = m_toLinear[(int)(f * (255 * 16) + 0.5f)]; |
148 | return v * (1.0f / (255*256)); |
149 | } |
150 | |
151 | QRgb fromLinear64(QRgba64 rgb64) const |
152 | { |
153 | #if defined(__SSE2__) |
154 | __m128i v = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(&rgb64)); |
155 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
156 | const __m128i vidx = _mm_srli_epi16(a: v, count: 4); |
157 | const int ridx = _mm_extract_epi16(vidx, 0); |
158 | const int gidx = _mm_extract_epi16(vidx, 1); |
159 | const int bidx = _mm_extract_epi16(vidx, 2); |
160 | v = _mm_insert_epi16(v, m_fromLinear[ridx], 2); |
161 | v = _mm_insert_epi16(v, m_fromLinear[gidx], 1); |
162 | v = _mm_insert_epi16(v, m_fromLinear[bidx], 0); |
163 | v = _mm_add_epi16(a: v, b: _mm_set1_epi16(w: 0x80)); |
164 | v = _mm_srli_epi16(a: v, count: 8); |
165 | v = _mm_packus_epi16(a: v, b: v); |
166 | return _mm_cvtsi128_si32(a: v); |
167 | #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
168 | uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64)); |
169 | v = vsub_u16(v, vshr_n_u16(v, 8)); |
170 | const uint16x4_t vidx = vshr_n_u16(v, 4); |
171 | const int ridx = vget_lane_u16(vidx, 0); |
172 | const int gidx = vget_lane_u16(vidx, 1); |
173 | const int bidx = vget_lane_u16(vidx, 2); |
174 | v = vset_lane_u16(m_fromLinear[ridx], v, 2); |
175 | v = vset_lane_u16(m_fromLinear[gidx], v, 1); |
176 | v = vset_lane_u16(m_fromLinear[bidx], v, 0); |
177 | uint8x8_t v8 = vrshrn_n_u16(vcombine_u16(v, v), 8); |
178 | return vget_lane_u32(vreinterpret_u32_u8(v8), 0); |
179 | #else |
180 | uint a = rgb64.alpha(); |
181 | uint r = rgb64.red(); |
182 | uint g = rgb64.green(); |
183 | uint b = rgb64.blue(); |
184 | a = a - (a >> 8); |
185 | r = r - (r >> 8); |
186 | g = g - (g >> 8); |
187 | b = b - (b >> 8); |
188 | a = (a + 0x80) >> 8; |
189 | r = (m_fromLinear[r >> 4] + 0x80) >> 8; |
190 | g = (m_fromLinear[g >> 4] + 0x80) >> 8; |
191 | b = (m_fromLinear[b >> 4] + 0x80) >> 8; |
192 | return (a << 24) | (r << 16) | (g << 8) | b; |
193 | #endif |
194 | } |
195 | |
196 | QRgb fromLinear(QRgb rgb32) const |
197 | { |
198 | return convertWithTable(rgb32, table: m_fromLinear); |
199 | } |
200 | |
201 | QRgba64 fromLinear(QRgba64 rgb64) const |
202 | { |
203 | return convertWithTable(rgb64, table: m_fromLinear); |
204 | } |
205 | |
206 | int u8FromLinearF32(float f) const |
207 | { |
208 | ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)]; |
209 | return (v + 0x80) >> 8; |
210 | } |
211 | int u16FromLinearF32(float f) const |
212 | { |
213 | ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)]; |
214 | return v + (v >> 8); |
215 | } |
216 | float fromLinear(float f) const |
217 | { |
218 | ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)]; |
219 | return v * (1.0f / (255*256)); |
220 | } |
221 | |
222 | // We translate to 0-65280 (255*256) instead to 0-65535 to make simple |
223 | // shifting an accurate conversion. |
224 | // We translate from 0-4080 (255*16) for the same speed up, and to keep |
225 | // the tables small enough to fit in most inner caches. |
226 | ushort m_toLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280] |
227 | ushort m_fromLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280] |
228 | |
229 | private: |
230 | QColorTrcLut() { } |
231 | |
232 | Q_ALWAYS_INLINE static QRgb convertWithTable(QRgb rgb32, const ushort *table) |
233 | { |
234 | const int r = (table[qRed(rgb: rgb32) << 4] + 0x80) >> 8; |
235 | const int g = (table[qGreen(rgb: rgb32) << 4] + 0x80) >> 8; |
236 | const int b = (table[qBlue(rgb: rgb32) << 4] + 0x80) >> 8; |
237 | return (rgb32 & 0xff000000) | (r << 16) | (g << 8) | b; |
238 | } |
239 | Q_ALWAYS_INLINE static QRgba64 convertWithTable(QRgba64 rgb64, const ushort *table) |
240 | { |
241 | #if defined(__SSE2__) |
242 | __m128i v = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(&rgb64)); |
243 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
244 | const __m128i vidx = _mm_srli_epi16(a: v, count: 4); |
245 | const int ridx = _mm_extract_epi16(vidx, 2); |
246 | const int gidx = _mm_extract_epi16(vidx, 1); |
247 | const int bidx = _mm_extract_epi16(vidx, 0); |
248 | v = _mm_insert_epi16(v, table[ridx], 2); |
249 | v = _mm_insert_epi16(v, table[gidx], 1); |
250 | v = _mm_insert_epi16(v, table[bidx], 0); |
251 | v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
252 | QRgba64 rgba64; |
253 | _mm_storel_epi64(p: reinterpret_cast<__m128i *>(&rgba64), a: v); |
254 | return rgba64; |
255 | #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
256 | uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64)); |
257 | v = vsub_u16(v, vshr_n_u16(v, 8)); |
258 | const uint16x4_t vidx = vshr_n_u16(v, 4); |
259 | const int ridx = vget_lane_u16(vidx, 2); |
260 | const int gidx = vget_lane_u16(vidx, 1); |
261 | const int bidx = vget_lane_u16(vidx, 0); |
262 | v = vset_lane_u16(table[ridx], v, 2); |
263 | v = vset_lane_u16(table[gidx], v, 1); |
264 | v = vset_lane_u16(table[bidx], v, 0); |
265 | v = vadd_u16(v, vshr_n_u16(v, 8)); |
266 | return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v), 0)); |
267 | #else |
268 | ushort r = rgb64.red(); |
269 | ushort g = rgb64.green(); |
270 | ushort b = rgb64.blue(); |
271 | r = r - (r >> 8); |
272 | g = g - (g >> 8); |
273 | b = b - (b >> 8); |
274 | r = table[r >> 4]; |
275 | g = table[g >> 4]; |
276 | b = table[b >> 4]; |
277 | r = r + (r >> 8); |
278 | g = g + (g >> 8); |
279 | b = b + (b >> 8); |
280 | return QRgba64::fromRgba64(r, g, b, rgb64.alpha()); |
281 | #endif |
282 | } |
283 | }; |
284 | |
285 | QT_END_NAMESPACE |
286 | |
287 | #endif // QCOLORTRCLUT_P_H |
288 | |