| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2018 The Qt Company Ltd. |
| 4 | ** Contact: https://www.qt.io/licensing/ |
| 5 | ** |
| 6 | ** This file is part of the QtGui module of the Qt Toolkit. |
| 7 | ** |
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ |
| 9 | ** Commercial License Usage |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | ** accordance with the commercial license agreement provided with the |
| 12 | ** Software or, alternatively, in accordance with the terms contained in |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms |
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
| 15 | ** information use the contact form at https://www.qt.io/contact-us. |
| 16 | ** |
| 17 | ** GNU Lesser General Public License Usage |
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
| 19 | ** General Public License version 3 as published by the Free Software |
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| 21 | ** packaging of this file. Please review the following information to |
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements |
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| 24 | ** |
| 25 | ** GNU General Public License Usage |
| 26 | ** Alternatively, this file may be used under the terms of the GNU |
| 27 | ** General Public License version 2.0 or (at your option) the GNU General |
| 28 | ** Public license version 3 or any later version approved by the KDE Free |
| 29 | ** Qt Foundation. The licenses are as published by the Free Software |
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| 31 | ** included in the packaging of this file. Please review the following |
| 32 | ** information to ensure the GNU General Public License requirements will |
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
| 35 | ** |
| 36 | ** $QT_END_LICENSE$ |
| 37 | ** |
| 38 | ****************************************************************************/ |
| 39 | |
| 40 | |
| 41 | #include "qcolortransform.h" |
| 42 | #include "qcolortransform_p.h" |
| 43 | |
| 44 | #include "qcolormatrix_p.h" |
| 45 | #include "qcolorspace_p.h" |
| 46 | #include "qcolortrc_p.h" |
| 47 | #include "qcolortrclut_p.h" |
| 48 | |
| 49 | #include <QtCore/qatomic.h> |
| 50 | #include <QtCore/qmath.h> |
| 51 | #include <QtGui/qcolor.h> |
| 52 | #include <QtGui/qtransform.h> |
| 53 | #include <QtCore/private/qsimd_p.h> |
| 54 | |
| 55 | #include <qdebug.h> |
| 56 | |
| 57 | QT_BEGIN_NAMESPACE |
| 58 | |
| 59 | QColorTrcLut *lutFromTrc(const QColorTrc &trc) |
| 60 | { |
| 61 | if (trc.m_type == QColorTrc::Type::Table) |
| 62 | return QColorTrcLut::fromTransferTable(transTable: trc.m_table); |
| 63 | if (trc.m_type == QColorTrc::Type::Function) |
| 64 | return QColorTrcLut::fromTransferFunction(transfn: trc.m_fun); |
| 65 | qWarning() << "TRC uninitialized" ; |
| 66 | return nullptr; |
| 67 | } |
| 68 | |
| 69 | void QColorTransformPrivate::updateLutsIn() const |
| 70 | { |
| 71 | if (colorSpaceIn->lut.generated.loadAcquire()) |
| 72 | return; |
| 73 | QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock); |
| 74 | if (colorSpaceIn->lut.generated.loadRelaxed()) |
| 75 | return; |
| 76 | |
| 77 | for (int i = 0; i < 3; ++i) { |
| 78 | if (!colorSpaceIn->trc[i].isValid()) |
| 79 | return; |
| 80 | } |
| 81 | |
| 82 | if (colorSpaceIn->trc[0] == colorSpaceIn->trc[1] && colorSpaceIn->trc[0] == colorSpaceIn->trc[2]) { |
| 83 | colorSpaceIn->lut[0].reset(t: lutFromTrc(trc: colorSpaceIn->trc[0])); |
| 84 | colorSpaceIn->lut[1] = colorSpaceIn->lut[0]; |
| 85 | colorSpaceIn->lut[2] = colorSpaceIn->lut[0]; |
| 86 | } else { |
| 87 | for (int i = 0; i < 3; ++i) |
| 88 | colorSpaceIn->lut[i].reset(t: lutFromTrc(trc: colorSpaceIn->trc[i])); |
| 89 | } |
| 90 | |
| 91 | colorSpaceIn->lut.generated.storeRelease(newValue: 1); |
| 92 | } |
| 93 | |
| 94 | void QColorTransformPrivate::updateLutsOut() const |
| 95 | { |
| 96 | if (colorSpaceOut->lut.generated.loadAcquire()) |
| 97 | return; |
| 98 | QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock); |
| 99 | if (colorSpaceOut->lut.generated.loadRelaxed()) |
| 100 | return; |
| 101 | for (int i = 0; i < 3; ++i) { |
| 102 | if (!colorSpaceOut->trc[i].isValid()) |
| 103 | return; |
| 104 | } |
| 105 | |
| 106 | if (colorSpaceOut->trc[0] == colorSpaceOut->trc[1] && colorSpaceOut->trc[0] == colorSpaceOut->trc[2]) { |
| 107 | colorSpaceOut->lut[0].reset(t: lutFromTrc(trc: colorSpaceOut->trc[0])); |
| 108 | colorSpaceOut->lut[1] = colorSpaceOut->lut[0]; |
| 109 | colorSpaceOut->lut[2] = colorSpaceOut->lut[0]; |
| 110 | } else { |
| 111 | for (int i = 0; i < 3; ++i) |
| 112 | colorSpaceOut->lut[i].reset(t: lutFromTrc(trc: colorSpaceOut->trc[i])); |
| 113 | } |
| 114 | |
| 115 | colorSpaceOut->lut.generated.storeRelease(newValue: 1); |
| 116 | } |
| 117 | |
| 118 | /*! |
| 119 | \class QColorTransform |
| 120 | \brief The QColorTransform class is a transformation between color spaces. |
| 121 | \since 5.14 |
| 122 | |
| 123 | \ingroup painting |
| 124 | \ingroup appearance |
| 125 | \inmodule QtGui |
| 126 | |
| 127 | QColorTransform is an instantiation of a transformation between color spaces. |
| 128 | It can be applied on color and pixels to convert them from one color space to |
| 129 | another. |
| 130 | |
| 131 | Setting up a QColorTransform takes some preprocessing, so keeping around |
| 132 | QColorTransforms that you need often is recommended, instead of generating |
| 133 | them on the fly. |
| 134 | */ |
| 135 | |
| 136 | |
| 137 | QColorTransform::QColorTransform(const QColorTransform &colorTransform) noexcept |
| 138 | : d(colorTransform.d) |
| 139 | { |
| 140 | if (d) |
| 141 | d->ref.ref(); |
| 142 | } |
| 143 | |
| 144 | |
| 145 | QColorTransform::~QColorTransform() |
| 146 | { |
| 147 | if (d && !d->ref.deref()) |
| 148 | delete d; |
| 149 | } |
| 150 | |
| 151 | /*! |
| 152 | Applies the color transformation on the QRgb value \a argb. |
| 153 | |
| 154 | The input should be opaque or unpremultiplied. |
| 155 | */ |
| 156 | QRgb QColorTransform::map(QRgb argb) const |
| 157 | { |
| 158 | if (!d) |
| 159 | return argb; |
| 160 | constexpr float f = 1.0f / 255.0f; |
| 161 | QColorVector c = { qRed(rgb: argb) * f, qGreen(rgb: argb) * f, qBlue(rgb: argb) * f }; |
| 162 | c.x = d->colorSpaceIn->trc[0].apply(x: c.x); |
| 163 | c.y = d->colorSpaceIn->trc[1].apply(x: c.y); |
| 164 | c.z = d->colorSpaceIn->trc[2].apply(x: c.z); |
| 165 | c = d->colorMatrix.map(c); |
| 166 | c.x = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.x)); |
| 167 | c.y = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.y)); |
| 168 | c.z = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.z)); |
| 169 | if (d->colorSpaceOut->lut.generated.loadAcquire()) { |
| 170 | c.x = d->colorSpaceOut->lut[0]->fromLinear(f: c.x); |
| 171 | c.y = d->colorSpaceOut->lut[1]->fromLinear(f: c.y); |
| 172 | c.z = d->colorSpaceOut->lut[2]->fromLinear(f: c.z); |
| 173 | } else { |
| 174 | c.x = d->colorSpaceOut->trc[0].applyInverse(x: c.x); |
| 175 | c.y = d->colorSpaceOut->trc[1].applyInverse(x: c.y); |
| 176 | c.z = d->colorSpaceOut->trc[2].applyInverse(x: c.z); |
| 177 | } |
| 178 | |
| 179 | return qRgba(r: c.x * 255 + 0.5f, g: c.y * 255 + 0.5f, b: c.z * 255 + 0.5f, a: qAlpha(rgb: argb)); |
| 180 | } |
| 181 | |
| 182 | /*! |
| 183 | Applies the color transformation on the QRgba64 value \a rgba64. |
| 184 | |
| 185 | The input should be opaque or unpremultiplied. |
| 186 | */ |
| 187 | QRgba64 QColorTransform::map(QRgba64 rgba64) const |
| 188 | { |
| 189 | if (!d) |
| 190 | return rgba64; |
| 191 | constexpr float f = 1.0f / 65535.0f; |
| 192 | QColorVector c = { rgba64.red() * f, rgba64.green() * f, rgba64.blue() * f }; |
| 193 | c.x = d->colorSpaceIn->trc[0].apply(x: c.x); |
| 194 | c.y = d->colorSpaceIn->trc[1].apply(x: c.y); |
| 195 | c.z = d->colorSpaceIn->trc[2].apply(x: c.z); |
| 196 | c = d->colorMatrix.map(c); |
| 197 | c.x = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.x)); |
| 198 | c.y = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.y)); |
| 199 | c.z = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.z)); |
| 200 | if (d->colorSpaceOut->lut.generated.loadAcquire()) { |
| 201 | c.x = d->colorSpaceOut->lut[0]->fromLinear(f: c.x); |
| 202 | c.y = d->colorSpaceOut->lut[1]->fromLinear(f: c.y); |
| 203 | c.z = d->colorSpaceOut->lut[2]->fromLinear(f: c.z); |
| 204 | } else { |
| 205 | c.x = d->colorSpaceOut->trc[0].applyInverse(x: c.x); |
| 206 | c.y = d->colorSpaceOut->trc[1].applyInverse(x: c.y); |
| 207 | c.z = d->colorSpaceOut->trc[2].applyInverse(x: c.z); |
| 208 | } |
| 209 | |
| 210 | return QRgba64::fromRgba64(red: c.x * 65535, green: c.y * 65535, blue: c.z * 65535, alpha: rgba64.alpha()); |
| 211 | } |
| 212 | |
| 213 | /*! |
| 214 | Applies the color transformation on the QColor value \a color. |
| 215 | |
| 216 | */ |
| 217 | QColor QColorTransform::map(const QColor &color) const |
| 218 | { |
| 219 | if (!d) |
| 220 | return color; |
| 221 | QColor clr = color; |
| 222 | if (color.spec() != QColor::ExtendedRgb || color.spec() != QColor::Rgb) |
| 223 | clr = clr.toRgb(); |
| 224 | |
| 225 | QColorVector c = { (float)clr.redF(), (float)clr.greenF(), (float)clr.blueF() }; |
| 226 | if (clr.spec() == QColor::ExtendedRgb) { |
| 227 | c.x = d->colorSpaceIn->trc[0].applyExtended(x: c.x); |
| 228 | c.y = d->colorSpaceIn->trc[1].applyExtended(x: c.y); |
| 229 | c.z = d->colorSpaceIn->trc[2].applyExtended(x: c.z); |
| 230 | } else { |
| 231 | c.x = d->colorSpaceIn->trc[0].apply(x: c.x); |
| 232 | c.y = d->colorSpaceIn->trc[1].apply(x: c.y); |
| 233 | c.z = d->colorSpaceIn->trc[2].apply(x: c.z); |
| 234 | } |
| 235 | c = d->colorMatrix.map(c); |
| 236 | bool inGamut = c.x >= 0.0f && c.x <= 1.0f && c.y >= 0.0f && c.y <= 1.0f && c.z >= 0.0f && c.z <= 1.0f; |
| 237 | if (inGamut) { |
| 238 | if (d->colorSpaceOut->lut.generated.loadAcquire()) { |
| 239 | c.x = d->colorSpaceOut->lut[0]->fromLinear(f: c.x); |
| 240 | c.y = d->colorSpaceOut->lut[1]->fromLinear(f: c.y); |
| 241 | c.z = d->colorSpaceOut->lut[2]->fromLinear(f: c.z); |
| 242 | } else { |
| 243 | c.x = d->colorSpaceOut->trc[0].applyInverse(x: c.x); |
| 244 | c.y = d->colorSpaceOut->trc[1].applyInverse(x: c.y); |
| 245 | c.z = d->colorSpaceOut->trc[2].applyInverse(x: c.z); |
| 246 | } |
| 247 | } else { |
| 248 | c.x = d->colorSpaceOut->trc[0].applyInverseExtended(x: c.x); |
| 249 | c.y = d->colorSpaceOut->trc[1].applyInverseExtended(x: c.y); |
| 250 | c.z = d->colorSpaceOut->trc[2].applyInverseExtended(x: c.z); |
| 251 | } |
| 252 | QColor out; |
| 253 | out.setRgbF(r: c.x, g: c.y, b: c.z, a: color.alphaF()); |
| 254 | return out; |
| 255 | } |
| 256 | |
| 257 | // Optimized sub-routines for fast block based conversion: |
| 258 | |
| 259 | static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorMatrix &colorMatrix) |
| 260 | { |
| 261 | #if defined(__SSE2__) |
| 262 | const __m128 minV = _mm_set1_ps(w: 0.0f); |
| 263 | const __m128 maxV = _mm_set1_ps(w: 1.0f); |
| 264 | const __m128 xMat = _mm_loadu_ps(p: &colorMatrix.r.x); |
| 265 | const __m128 yMat = _mm_loadu_ps(p: &colorMatrix.g.x); |
| 266 | const __m128 zMat = _mm_loadu_ps(p: &colorMatrix.b.x); |
| 267 | for (qsizetype j = 0; j < len; ++j) { |
| 268 | __m128 c = _mm_loadu_ps(p: &buffer[j].x); |
| 269 | __m128 cx = _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)); |
| 270 | __m128 cy = _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)); |
| 271 | __m128 cz = _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)); |
| 272 | cx = _mm_mul_ps(a: cx, b: xMat); |
| 273 | cy = _mm_mul_ps(a: cy, b: yMat); |
| 274 | cz = _mm_mul_ps(a: cz, b: zMat); |
| 275 | cx = _mm_add_ps(a: cx, b: cy); |
| 276 | cx = _mm_add_ps(a: cx, b: cz); |
| 277 | // Clamp: |
| 278 | cx = _mm_min_ps(a: cx, b: maxV); |
| 279 | cx = _mm_max_ps(a: cx, b: minV); |
| 280 | _mm_storeu_ps(p: &buffer[j].x, a: cx); |
| 281 | } |
| 282 | #else |
| 283 | for (int j = 0; j < len; ++j) { |
| 284 | const QColorVector cv = colorMatrix.map(buffer[j]); |
| 285 | buffer[j].x = std::max(0.0f, std::min(1.0f, cv.x)); |
| 286 | buffer[j].y = std::max(0.0f, std::min(1.0f, cv.y)); |
| 287 | buffer[j].z = std::max(0.0f, std::min(1.0f, cv.z)); |
| 288 | } |
| 289 | #endif |
| 290 | } |
| 291 | |
| 292 | template<typename T> |
| 293 | static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr); |
| 294 | template<typename T> |
| 295 | static void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr); |
| 296 | |
| 297 | #if defined(__SSE2__) |
| 298 | // Load to [0-alpha] in 4x32 SIMD |
| 299 | template<typename T> |
| 300 | static inline void loadP(const T &p, __m128i &v); |
| 301 | |
| 302 | template<> |
| 303 | inline void loadP<QRgb>(const QRgb &p, __m128i &v) |
| 304 | { |
| 305 | v = _mm_cvtsi32_si128(a: p); |
| 306 | #if defined(__SSE4_1__) |
| 307 | v = _mm_cvtepu8_epi32(v); |
| 308 | #else |
| 309 | v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128()); |
| 310 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
| 311 | #endif |
| 312 | } |
| 313 | |
| 314 | template<> |
| 315 | inline void loadP<QRgba64>(const QRgba64 &p, __m128i &v) |
| 316 | { |
| 317 | v = _mm_loadl_epi64(p: (const __m128i *)&p); |
| 318 | #if defined(__SSE4_1__) |
| 319 | v = _mm_cvtepu16_epi32(v); |
| 320 | #else |
| 321 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
| 322 | #endif |
| 323 | // Shuffle to ARGB as the template below expects it |
| 324 | v = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 0, 1, 2)); |
| 325 | } |
| 326 | |
| 327 | template<typename T> |
| 328 | static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
| 329 | { |
| 330 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
| 331 | const __m128 iFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
| 332 | for (qsizetype i = 0; i < len; ++i) { |
| 333 | __m128i v; |
| 334 | loadP<T>(src[i], v); |
| 335 | __m128 vf = _mm_cvtepi32_ps(a: v); |
| 336 | // Approximate 1/a: |
| 337 | __m128 va = _mm_shuffle_ps(vf, vf, _MM_SHUFFLE(3, 3, 3, 3)); |
| 338 | __m128 via = _mm_rcp_ps(a: va); |
| 339 | via = _mm_sub_ps(a: _mm_add_ps(a: via, b: via), b: _mm_mul_ps(a: via, b: _mm_mul_ps(a: via, b: va))); |
| 340 | // v * (1/a) |
| 341 | vf = _mm_mul_ps(a: vf, b: via); |
| 342 | |
| 343 | // Handle zero alpha |
| 344 | __m128 vAlphaMask = _mm_cmpeq_ps(a: va, b: _mm_set1_ps(w: 0.0f)); |
| 345 | vf = _mm_andnot_ps(a: vAlphaMask, b: vf); |
| 346 | |
| 347 | // LUT |
| 348 | v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
| 349 | const int ridx = _mm_extract_epi16(v, 4); |
| 350 | const int gidx = _mm_extract_epi16(v, 2); |
| 351 | const int bidx = _mm_extract_epi16(v, 0); |
| 352 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); |
| 353 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); |
| 354 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); |
| 355 | vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: iFF00); |
| 356 | |
| 357 | _mm_storeu_ps(p: &buffer[i].x, a: vf); |
| 358 | } |
| 359 | } |
| 360 | |
| 361 | // Load to [0-4080] in 4x32 SIMD |
| 362 | template<typename T> |
| 363 | static inline void loadPU(const T &p, __m128i &v); |
| 364 | |
| 365 | template<> |
| 366 | inline void loadPU<QRgb>(const QRgb &p, __m128i &v) |
| 367 | { |
| 368 | v = _mm_cvtsi32_si128(a: p); |
| 369 | #if defined(__SSE4_1__) |
| 370 | v = _mm_cvtepu8_epi32(v); |
| 371 | #else |
| 372 | v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128()); |
| 373 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
| 374 | #endif |
| 375 | v = _mm_slli_epi32(a: v, count: 4); |
| 376 | } |
| 377 | |
| 378 | template<> |
| 379 | inline void loadPU<QRgba64>(const QRgba64 &p, __m128i &v) |
| 380 | { |
| 381 | v = _mm_loadl_epi64(p: (const __m128i *)&p); |
| 382 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
| 383 | #if defined(__SSE4_1__) |
| 384 | v = _mm_cvtepu16_epi32(v); |
| 385 | #else |
| 386 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
| 387 | #endif |
| 388 | v = _mm_srli_epi32(a: v, count: 4); |
| 389 | // Shuffle to ARGB as the template below expects it |
| 390 | v = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 0, 1, 2)); |
| 391 | } |
| 392 | |
| 393 | template<typename T> |
| 394 | void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
| 395 | { |
| 396 | const __m128 iFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
| 397 | for (qsizetype i = 0; i < len; ++i) { |
| 398 | __m128i v; |
| 399 | loadPU<T>(src[i], v); |
| 400 | const int ridx = _mm_extract_epi16(v, 4); |
| 401 | const int gidx = _mm_extract_epi16(v, 2); |
| 402 | const int bidx = _mm_extract_epi16(v, 0); |
| 403 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); |
| 404 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); |
| 405 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); |
| 406 | __m128 vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: iFF00); |
| 407 | _mm_storeu_ps(p: &buffer[i].x, a: vf); |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | #else |
| 412 | template<> |
| 413 | void loadPremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
| 414 | { |
| 415 | for (qsizetype i = 0; i < len; ++i) { |
| 416 | const uint p = src[i]; |
| 417 | const int a = qAlpha(p); |
| 418 | if (a) { |
| 419 | const float ia = 4080.0f / a; |
| 420 | const int ridx = int(qRed(p) * ia + 0.5f); |
| 421 | const int gidx = int(qGreen(p) * ia + 0.5f); |
| 422 | const int bidx = int(qBlue(p) * ia + 0.5f); |
| 423 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256)); |
| 424 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256)); |
| 425 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256)); |
| 426 | } else { |
| 427 | buffer[i].x = buffer[i].y = buffer[i].z = 0.0f; |
| 428 | } |
| 429 | } |
| 430 | } |
| 431 | |
| 432 | template<> |
| 433 | void loadPremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
| 434 | { |
| 435 | for (qsizetype i = 0; i < len; ++i) { |
| 436 | const QRgba64 &p = src[i]; |
| 437 | const int a = p.alpha(); |
| 438 | if (a) { |
| 439 | const float ia = 4080.0f / a; |
| 440 | const int ridx = int(p.red() * ia + 0.5f); |
| 441 | const int gidx = int(p.green() * ia + 0.5f); |
| 442 | const int bidx = int(p.blue() * ia + 0.5f); |
| 443 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256)); |
| 444 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256)); |
| 445 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256)); |
| 446 | } else { |
| 447 | buffer[i].x = buffer[i].y = buffer[i].z = 0.0f; |
| 448 | } |
| 449 | } |
| 450 | } |
| 451 | |
| 452 | template<> |
| 453 | void loadUnpremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
| 454 | { |
| 455 | for (qsizetype i = 0; i < len; ++i) { |
| 456 | const uint p = src[i]; |
| 457 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->u8ToLinearF32(qRed(p)); |
| 458 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->u8ToLinearF32(qGreen(p)); |
| 459 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->u8ToLinearF32(qBlue(p)); |
| 460 | } |
| 461 | } |
| 462 | |
| 463 | template<> |
| 464 | void loadUnpremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
| 465 | { |
| 466 | for (qsizetype i = 0; i < len; ++i) { |
| 467 | const QRgba64 &p = src[i]; |
| 468 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->u16ToLinearF32(p.red()); |
| 469 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->u16ToLinearF32(p.green()); |
| 470 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->u16ToLinearF32(p.blue()); |
| 471 | } |
| 472 | } |
| 473 | #endif |
| 474 | |
| 475 | static void storePremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
| 476 | const QColorTransformPrivate *d_ptr) |
| 477 | { |
| 478 | #if defined(__SSE2__) |
| 479 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
| 480 | const __m128 iFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
| 481 | for (qsizetype i = 0; i < len; ++i) { |
| 482 | const int a = qAlpha(rgb: src[i]); |
| 483 | __m128 vf = _mm_loadu_ps(p: &buffer[i].x); |
| 484 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
| 485 | __m128 va = _mm_set1_ps(w: a); |
| 486 | va = _mm_mul_ps(a: va, b: iFF00); |
| 487 | const int ridx = _mm_extract_epi16(v, 0); |
| 488 | const int gidx = _mm_extract_epi16(v, 2); |
| 489 | const int bidx = _mm_extract_epi16(v, 4); |
| 490 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 4); |
| 491 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2); |
| 492 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 0); |
| 493 | vf = _mm_cvtepi32_ps(a: v); |
| 494 | vf = _mm_mul_ps(a: vf, b: va); |
| 495 | v = _mm_cvtps_epi32(a: vf); |
| 496 | v = _mm_packs_epi32(a: v, b: v); |
| 497 | v = _mm_insert_epi16(v, a, 3); |
| 498 | v = _mm_packus_epi16(a: v, b: v); |
| 499 | dst[i] = _mm_cvtsi128_si32(a: v); |
| 500 | } |
| 501 | #else |
| 502 | for (qsizetype i = 0; i < len; ++i) { |
| 503 | const int a = qAlpha(src[i]); |
| 504 | const float fa = a / (255.0f * 256.0f); |
| 505 | const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)]; |
| 506 | const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)]; |
| 507 | const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)]; |
| 508 | dst[i] = qRgba(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a); |
| 509 | } |
| 510 | #endif |
| 511 | } |
| 512 | |
| 513 | static void storeUnpremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
| 514 | const QColorTransformPrivate *d_ptr) |
| 515 | { |
| 516 | #if defined(__SSE2__) |
| 517 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
| 518 | for (qsizetype i = 0; i < len; ++i) { |
| 519 | const int a = qAlpha(rgb: src[i]); |
| 520 | __m128 vf = _mm_loadu_ps(p: &buffer[i].x); |
| 521 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
| 522 | const int ridx = _mm_extract_epi16(v, 0); |
| 523 | const int gidx = _mm_extract_epi16(v, 2); |
| 524 | const int bidx = _mm_extract_epi16(v, 4); |
| 525 | v = _mm_setzero_si128(); |
| 526 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 2); |
| 527 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1); |
| 528 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 0); |
| 529 | v = _mm_add_epi16(a: v, b: _mm_set1_epi16(w: 0x80)); |
| 530 | v = _mm_srli_epi16(a: v, count: 8); |
| 531 | v = _mm_insert_epi16(v, a, 3); |
| 532 | v = _mm_packus_epi16(a: v, b: v); |
| 533 | dst[i] = _mm_cvtsi128_si32(a: v); |
| 534 | } |
| 535 | #else |
| 536 | for (qsizetype i = 0; i < len; ++i) { |
| 537 | const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x); |
| 538 | const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y); |
| 539 | const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z); |
| 540 | dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0); |
| 541 | } |
| 542 | #endif |
| 543 | } |
| 544 | |
| 545 | static void storeOpaque(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
| 546 | const QColorTransformPrivate *d_ptr) |
| 547 | { |
| 548 | Q_UNUSED(src); |
| 549 | #if defined(__SSE2__) |
| 550 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
| 551 | for (qsizetype i = 0; i < len; ++i) { |
| 552 | __m128 vf = _mm_loadu_ps(p: &buffer[i].x); |
| 553 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
| 554 | const int ridx = _mm_extract_epi16(v, 0); |
| 555 | const int gidx = _mm_extract_epi16(v, 2); |
| 556 | const int bidx = _mm_extract_epi16(v, 4); |
| 557 | v = _mm_setzero_si128(); |
| 558 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 2); |
| 559 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1); |
| 560 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 0); |
| 561 | v = _mm_add_epi16(a: v, b: _mm_set1_epi16(w: 0x80)); |
| 562 | v = _mm_srli_epi16(a: v, count: 8); |
| 563 | v = _mm_insert_epi16(v, 255, 3); |
| 564 | v = _mm_packus_epi16(a: v, b: v); |
| 565 | dst[i] = _mm_cvtsi128_si32(a: v); |
| 566 | } |
| 567 | #else |
| 568 | for (qsizetype i = 0; i < len; ++i) { |
| 569 | const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x); |
| 570 | const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y); |
| 571 | const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z); |
| 572 | dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0); |
| 573 | } |
| 574 | #endif |
| 575 | } |
| 576 | |
| 577 | static void storePremultiplied(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
| 578 | const QColorTransformPrivate *d_ptr) |
| 579 | { |
| 580 | for (qsizetype i = 0; i < len; ++i) { |
| 581 | const int a = src[i].alpha(); |
| 582 | const float fa = a / (255.0f * 256.0f); |
| 583 | const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)]; |
| 584 | const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)]; |
| 585 | const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)]; |
| 586 | dst[i] = qRgba64(r: r * fa + 0.5f, g: g * fa + 0.5f, b: b * fa + 0.5f, a); |
| 587 | } |
| 588 | } |
| 589 | |
| 590 | static void storeUnpremultiplied(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
| 591 | const QColorTransformPrivate *d_ptr) |
| 592 | { |
| 593 | for (qsizetype i = 0; i < len; ++i) { |
| 594 | const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(f: buffer[i].x); |
| 595 | const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(f: buffer[i].y); |
| 596 | const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(f: buffer[i].z); |
| 597 | dst[i] = qRgba64(r, g, b, a: src[i].alpha()); |
| 598 | } |
| 599 | } |
| 600 | |
| 601 | static void storeOpaque(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
| 602 | const QColorTransformPrivate *d_ptr) |
| 603 | { |
| 604 | Q_UNUSED(src); |
| 605 | for (qsizetype i = 0; i < len; ++i) { |
| 606 | const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(f: buffer[i].x); |
| 607 | const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(f: buffer[i].y); |
| 608 | const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(f: buffer[i].z); |
| 609 | dst[i] = qRgba64(r, g, b, a: 0xFFFF); |
| 610 | } |
| 611 | } |
| 612 | |
| 613 | static constexpr qsizetype WorkBlockSize = 256; |
| 614 | |
| 615 | template <typename T, int Count = 1> |
| 616 | class QUninitialized |
| 617 | { |
| 618 | public: |
| 619 | operator T*() { return reinterpret_cast<T *>(this); } |
| 620 | private: |
| 621 | alignas(T) char data[sizeof(T) * Count]; |
| 622 | }; |
| 623 | |
| 624 | template<typename T> |
| 625 | void QColorTransformPrivate::apply(T *dst, const T *src, qsizetype count, TransformFlags flags) const |
| 626 | { |
| 627 | if (!colorMatrix.isValid()) |
| 628 | return; |
| 629 | |
| 630 | updateLutsIn(); |
| 631 | updateLutsOut(); |
| 632 | |
| 633 | bool doApplyMatrix = (colorMatrix != QColorMatrix::identity()); |
| 634 | |
| 635 | QUninitialized<QColorVector, WorkBlockSize> buffer; |
| 636 | |
| 637 | qsizetype i = 0; |
| 638 | while (i < count) { |
| 639 | const qsizetype len = qMin(a: count - i, b: WorkBlockSize); |
| 640 | if (flags & InputPremultiplied) |
| 641 | loadPremultiplied(buffer, src + i, len, this); |
| 642 | else |
| 643 | loadUnpremultiplied(buffer, src + i, len, this); |
| 644 | |
| 645 | if (doApplyMatrix) |
| 646 | applyMatrix(buffer, len, colorMatrix); |
| 647 | |
| 648 | if (flags & InputOpaque) |
| 649 | storeOpaque(dst + i, src + i, buffer, len, this); |
| 650 | else if (flags & OutputPremultiplied) |
| 651 | storePremultiplied(dst + i, src + i, buffer, len, this); |
| 652 | else |
| 653 | storeUnpremultiplied(dst + i, src + i, buffer, len, this); |
| 654 | |
| 655 | i += len; |
| 656 | } |
| 657 | } |
| 658 | |
| 659 | /*! |
| 660 | \internal |
| 661 | \enum QColorTransformPrivate::TransformFlag |
| 662 | |
| 663 | Defines how the transform is to be applied. |
| 664 | |
| 665 | \value Unpremultiplied The input and output should both be unpremultiplied. |
| 666 | \value InputOpaque The input is guaranteed to be opaque. |
| 667 | \value InputPremultiplied The input is premultiplied. |
| 668 | \value OutputPremultiplied The output should be premultiplied. |
| 669 | \value Premultiplied Both input and output should both be premultiplied. |
| 670 | */ |
| 671 | |
| 672 | /*! |
| 673 | \internal |
| 674 | Prepares a color transformation for fast application. You do not need to |
| 675 | call this explicitly as it will be called implicitly on the first transforms, but |
| 676 | if you want predictable performance on the first transforms, you can perform it |
| 677 | in advance. |
| 678 | |
| 679 | \sa QColorTransform::map(), apply() |
| 680 | */ |
| 681 | void QColorTransformPrivate::prepare() |
| 682 | { |
| 683 | updateLutsIn(); |
| 684 | updateLutsOut(); |
| 685 | } |
| 686 | |
| 687 | /*! |
| 688 | \internal |
| 689 | Applies the color transformation on \a count QRgb pixels starting from |
| 690 | \a src and stores the result in \a dst. |
| 691 | |
| 692 | Thread-safe if prepare() has been called first. |
| 693 | |
| 694 | Assumes unpremultiplied data by default. Set \a flags to change defaults. |
| 695 | |
| 696 | \sa prepare() |
| 697 | */ |
| 698 | void QColorTransformPrivate::apply(QRgb *dst, const QRgb *src, qsizetype count, TransformFlags flags) const |
| 699 | { |
| 700 | apply<QRgb>(dst, src, count, flags); |
| 701 | } |
| 702 | |
| 703 | /*! |
| 704 | \internal |
| 705 | Applies the color transformation on \a count QRgba64 pixels starting from |
| 706 | \a src and stores the result in \a dst. |
| 707 | |
| 708 | Thread-safe if prepare() has been called first. |
| 709 | |
| 710 | Assumes unpremultiplied data by default. Set \a flags to change defaults. |
| 711 | |
| 712 | \sa prepare() |
| 713 | */ |
| 714 | void QColorTransformPrivate::apply(QRgba64 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const |
| 715 | { |
| 716 | apply<QRgba64>(dst, src, count, flags); |
| 717 | } |
| 718 | |
| 719 | |
| 720 | QT_END_NAMESPACE |
| 721 | |