1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #ifndef QCOLORTRCLUT_P_H |
5 | #define QCOLORTRCLUT_P_H |
6 | |
7 | // |
8 | // W A R N I N G |
9 | // ------------- |
10 | // |
11 | // This file is not part of the Qt API. It exists purely as an |
12 | // implementation detail. This header file may change from version to |
13 | // version without notice, or even be removed. |
14 | // |
15 | // We mean it. |
16 | // |
17 | |
18 | #include <QtGui/private/qtguiglobal_p.h> |
19 | #include <QtGui/qrgb.h> |
20 | #include <QtGui/qrgba64.h> |
21 | |
22 | #include <cmath> |
23 | #include <memory> |
24 | |
25 | #if defined(__SSE2__) |
26 | #include <emmintrin.h> |
27 | #elif defined(__ARM_NEON__) || defined(__ARM_NEON) |
28 | #include <arm_neon.h> |
29 | #endif |
30 | |
31 | QT_BEGIN_NAMESPACE |
32 | |
33 | class QColorTransferFunction; |
34 | class QColorTransferTable; |
35 | |
36 | class Q_GUI_EXPORT QColorTrcLut |
37 | { |
38 | public: |
39 | static std::shared_ptr<QColorTrcLut> fromGamma(qreal gamma); |
40 | static std::shared_ptr<QColorTrcLut> fromTransferFunction(const QColorTransferFunction &transfn); |
41 | static std::shared_ptr<QColorTrcLut> fromTransferTable(const QColorTransferTable &transTable); |
42 | |
43 | // The following methods all convert opaque or unpremultiplied colors: |
44 | |
45 | QRgba64 toLinear64(QRgb rgb32) const |
46 | { |
47 | #if defined(__SSE2__) |
48 | __m128i v = _mm_cvtsi32_si128(a: rgb32); |
49 | v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128()); |
50 | const __m128i vidx = _mm_slli_epi16(a: v, count: 4); |
51 | const int ridx = _mm_extract_epi16(vidx, 2); |
52 | const int gidx = _mm_extract_epi16(vidx, 1); |
53 | const int bidx = _mm_extract_epi16(vidx, 0); |
54 | v = _mm_slli_epi16(a: v, count: 8); // a * 256 |
55 | v = _mm_insert_epi16(v, m_toLinear[ridx], 0); |
56 | v = _mm_insert_epi16(v, m_toLinear[gidx], 1); |
57 | v = _mm_insert_epi16(v, m_toLinear[bidx], 2); |
58 | v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
59 | QRgba64 rgba64; |
60 | _mm_storel_epi64(p: reinterpret_cast<__m128i *>(&rgba64), a: v); |
61 | return rgba64; |
62 | #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
63 | uint8x8_t v8 = vreinterpret_u8_u32(vmov_n_u32(rgb32)); |
64 | uint16x4_t v16 = vget_low_u16(vmovl_u8(v8)); |
65 | const uint16x4_t vidx = vshl_n_u16(v16, 4); |
66 | const int ridx = vget_lane_u16(vidx, 2); |
67 | const int gidx = vget_lane_u16(vidx, 1); |
68 | const int bidx = vget_lane_u16(vidx, 0); |
69 | v16 = vshl_n_u16(v16, 8); // a * 256 |
70 | v16 = vset_lane_u16(m_toLinear[ridx], v16, 0); |
71 | v16 = vset_lane_u16(m_toLinear[gidx], v16, 1); |
72 | v16 = vset_lane_u16(m_toLinear[bidx], v16, 2); |
73 | v16 = vadd_u16(v16, vshr_n_u16(v16, 8)); |
74 | return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v16), 0)); |
75 | #else |
76 | uint r = m_toLinear[qRed(rgb32) << 4]; |
77 | uint g = m_toLinear[qGreen(rgb32) << 4]; |
78 | uint b = m_toLinear[qBlue(rgb32) << 4]; |
79 | r = r + (r >> 8); |
80 | g = g + (g >> 8); |
81 | b = b + (b >> 8); |
82 | return QRgba64::fromRgba64(r, g, b, qAlpha(rgb32) * 257); |
83 | #endif |
84 | } |
85 | QRgba64 toLinear64(QRgba64) const = delete; |
86 | |
87 | QRgb toLinear(QRgb rgb32) const |
88 | { |
89 | return convertWithTable(rgb32, table: m_toLinear); |
90 | } |
91 | |
92 | QRgba64 toLinear(QRgba64 rgb64) const |
93 | { |
94 | return convertWithTable(rgb64, table: m_toLinear); |
95 | } |
96 | |
97 | float u8ToLinearF32(int c) const |
98 | { |
99 | ushort v = m_toLinear[c << 4]; |
100 | return v * (1.0f / (255*256)); |
101 | } |
102 | |
103 | float u16ToLinearF32(int c) const |
104 | { |
105 | c -= (c >> 8); |
106 | ushort v = m_toLinear[c >> 4]; |
107 | return v * (1.0f / (255*256)); |
108 | } |
109 | |
110 | float toLinear(float f) const |
111 | { |
112 | ushort v = m_toLinear[(int)(f * (255 * 16) + 0.5f)]; |
113 | return v * (1.0f / (255*256)); |
114 | } |
115 | |
116 | QRgb fromLinear64(QRgba64 rgb64) const |
117 | { |
118 | #if defined(__SSE2__) |
119 | __m128i v = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(&rgb64)); |
120 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
121 | const __m128i vidx = _mm_srli_epi16(a: v, count: 4); |
122 | const int ridx = _mm_extract_epi16(vidx, 0); |
123 | const int gidx = _mm_extract_epi16(vidx, 1); |
124 | const int bidx = _mm_extract_epi16(vidx, 2); |
125 | v = _mm_insert_epi16(v, m_fromLinear[ridx], 2); |
126 | v = _mm_insert_epi16(v, m_fromLinear[gidx], 1); |
127 | v = _mm_insert_epi16(v, m_fromLinear[bidx], 0); |
128 | v = _mm_add_epi16(a: v, b: _mm_set1_epi16(w: 0x80)); |
129 | v = _mm_srli_epi16(a: v, count: 8); |
130 | v = _mm_packus_epi16(a: v, b: v); |
131 | return _mm_cvtsi128_si32(a: v); |
132 | #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
133 | uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64)); |
134 | v = vsub_u16(v, vshr_n_u16(v, 8)); |
135 | const uint16x4_t vidx = vshr_n_u16(v, 4); |
136 | const int ridx = vget_lane_u16(vidx, 0); |
137 | const int gidx = vget_lane_u16(vidx, 1); |
138 | const int bidx = vget_lane_u16(vidx, 2); |
139 | v = vset_lane_u16(m_fromLinear[ridx], v, 2); |
140 | v = vset_lane_u16(m_fromLinear[gidx], v, 1); |
141 | v = vset_lane_u16(m_fromLinear[bidx], v, 0); |
142 | uint8x8_t v8 = vrshrn_n_u16(vcombine_u16(v, v), 8); |
143 | return vget_lane_u32(vreinterpret_u32_u8(v8), 0); |
144 | #else |
145 | uint a = rgb64.alpha(); |
146 | uint r = rgb64.red(); |
147 | uint g = rgb64.green(); |
148 | uint b = rgb64.blue(); |
149 | a = a - (a >> 8); |
150 | r = r - (r >> 8); |
151 | g = g - (g >> 8); |
152 | b = b - (b >> 8); |
153 | a = (a + 0x80) >> 8; |
154 | r = (m_fromLinear[r >> 4] + 0x80) >> 8; |
155 | g = (m_fromLinear[g >> 4] + 0x80) >> 8; |
156 | b = (m_fromLinear[b >> 4] + 0x80) >> 8; |
157 | return (a << 24) | (r << 16) | (g << 8) | b; |
158 | #endif |
159 | } |
160 | |
161 | QRgb fromLinear(QRgb rgb32) const |
162 | { |
163 | return convertWithTable(rgb32, table: m_fromLinear); |
164 | } |
165 | |
166 | QRgba64 fromLinear(QRgba64 rgb64) const |
167 | { |
168 | return convertWithTable(rgb64, table: m_fromLinear); |
169 | } |
170 | |
171 | int u8FromLinearF32(float f) const |
172 | { |
173 | ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)]; |
174 | return (v + 0x80) >> 8; |
175 | } |
176 | int u16FromLinearF32(float f) const |
177 | { |
178 | ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)]; |
179 | return v + (v >> 8); |
180 | } |
181 | float fromLinear(float f) const |
182 | { |
183 | ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)]; |
184 | return v * (1.0f / (255*256)); |
185 | } |
186 | |
187 | // We translate to 0-65280 (255*256) instead to 0-65535 to make simple |
188 | // shifting an accurate conversion. |
189 | // We translate from 0-4080 (255*16) for the same speed up, and to keep |
190 | // the tables small enough to fit in most inner caches. |
191 | ushort m_toLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280] |
192 | ushort m_fromLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280] |
193 | |
194 | private: |
195 | QColorTrcLut() { } // force uninitialized members |
196 | |
197 | static std::shared_ptr<QColorTrcLut> create(); |
198 | |
199 | Q_ALWAYS_INLINE static QRgb convertWithTable(QRgb rgb32, const ushort *table) |
200 | { |
201 | const int r = (table[qRed(rgb: rgb32) << 4] + 0x80) >> 8; |
202 | const int g = (table[qGreen(rgb: rgb32) << 4] + 0x80) >> 8; |
203 | const int b = (table[qBlue(rgb: rgb32) << 4] + 0x80) >> 8; |
204 | return (rgb32 & 0xff000000) | (r << 16) | (g << 8) | b; |
205 | } |
206 | Q_ALWAYS_INLINE static QRgba64 convertWithTable(QRgba64 rgb64, const ushort *table) |
207 | { |
208 | #if defined(__SSE2__) |
209 | __m128i v = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(&rgb64)); |
210 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
211 | const __m128i vidx = _mm_srli_epi16(a: v, count: 4); |
212 | const int ridx = _mm_extract_epi16(vidx, 2); |
213 | const int gidx = _mm_extract_epi16(vidx, 1); |
214 | const int bidx = _mm_extract_epi16(vidx, 0); |
215 | v = _mm_insert_epi16(v, table[ridx], 2); |
216 | v = _mm_insert_epi16(v, table[gidx], 1); |
217 | v = _mm_insert_epi16(v, table[bidx], 0); |
218 | v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
219 | QRgba64 rgba64; |
220 | _mm_storel_epi64(p: reinterpret_cast<__m128i *>(&rgba64), a: v); |
221 | return rgba64; |
222 | #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
223 | uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64)); |
224 | v = vsub_u16(v, vshr_n_u16(v, 8)); |
225 | const uint16x4_t vidx = vshr_n_u16(v, 4); |
226 | const int ridx = vget_lane_u16(vidx, 2); |
227 | const int gidx = vget_lane_u16(vidx, 1); |
228 | const int bidx = vget_lane_u16(vidx, 0); |
229 | v = vset_lane_u16(table[ridx], v, 2); |
230 | v = vset_lane_u16(table[gidx], v, 1); |
231 | v = vset_lane_u16(table[bidx], v, 0); |
232 | v = vadd_u16(v, vshr_n_u16(v, 8)); |
233 | return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v), 0)); |
234 | #else |
235 | ushort r = rgb64.red(); |
236 | ushort g = rgb64.green(); |
237 | ushort b = rgb64.blue(); |
238 | r = r - (r >> 8); |
239 | g = g - (g >> 8); |
240 | b = b - (b >> 8); |
241 | r = table[r >> 4]; |
242 | g = table[g >> 4]; |
243 | b = table[b >> 4]; |
244 | r = r + (r >> 8); |
245 | g = g + (g >> 8); |
246 | b = b + (b >> 8); |
247 | return QRgba64::fromRgba64(r, g, b, rgb64.alpha()); |
248 | #endif |
249 | } |
250 | }; |
251 | |
252 | QT_END_NAMESPACE |
253 | |
254 | #endif // QCOLORTRCLUT_P_H |
255 | |