| 1 | // Copyright (C) 2024 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 3 | |
| 4 | #ifndef QCOLORCLUT_H |
| 5 | #define QCOLORCLUT_H |
| 6 | |
| 7 | // |
| 8 | // W A R N I N G |
| 9 | // ------------- |
| 10 | // |
| 11 | // This file is not part of the Qt API. It exists purely as an |
| 12 | // implementation detail. This header file may change from version to |
| 13 | // version without notice, or even be removed. |
| 14 | // |
| 15 | // We mean it. |
| 16 | // |
| 17 | |
| 18 | #include <QtCore/qlist.h> |
| 19 | #include <QtCore/qsimd.h> |
| 20 | #include <QtGui/private/qcolormatrix_p.h> |
| 21 | #if defined(__SSE2__) |
| 22 | #include <immintrin.h> |
| 23 | #endif |
| 24 | |
| 25 | QT_BEGIN_NAMESPACE |
| 26 | |
| 27 | // A 3/4-dimensional lookup table compatible with ICC lut8, lut16, mAB, and mBA formats. |
| 28 | class QColorCLUT |
| 29 | { |
| 30 | inline static QColorVector interpolate(const QColorVector &a, const QColorVector &b, float t) |
| 31 | { |
| 32 | return a + (b - a) * t; // faster than std::lerp by assuming no super large or non-number floats |
| 33 | } |
| 34 | inline static void interpolateIn(QColorVector &a, const QColorVector &b, float t) |
| 35 | { |
| 36 | a += (b - a) * t; |
| 37 | } |
| 38 | public: |
| 39 | uint32_t gridPointsX = 0; |
| 40 | uint32_t gridPointsY = 0; |
| 41 | uint32_t gridPointsZ = 0; |
| 42 | uint32_t gridPointsW = 1; |
| 43 | QList<QColorVector> table; |
| 44 | |
| 45 | bool isEmpty() const { return table.isEmpty(); } |
| 46 | |
| 47 | QColorVector apply(const QColorVector &v) const |
| 48 | { |
| 49 | Q_ASSERT(table.size() == qsizetype(gridPointsX * gridPointsY * gridPointsZ * gridPointsW)); |
| 50 | QColorVector frac; |
| 51 | #if defined(__SSE2__) |
| 52 | const __m128 minV = _mm_setzero_ps(); |
| 53 | const __m128 maxV = _mm_set1_ps(w: 1.0f); |
| 54 | const __m128i gridPointsInt = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(&gridPointsX)); |
| 55 | const __m128 gridPointsV = _mm_cvtepi32_ps(a: _mm_add_epi32(a: gridPointsInt, b: _mm_set1_epi32(i: -1))); |
| 56 | __m128 c = _mm_loadu_ps(p: &v.x); |
| 57 | c = _mm_max_ps(a: c, b: minV); |
| 58 | c = _mm_min_ps(a: c, b: maxV); |
| 59 | c = _mm_mul_ps(a: c, b: gridPointsV); |
| 60 | #if !defined(__SSE4_1__) |
| 61 | const __m128 clo = _mm_cvtepi32_ps(a: _mm_cvttps_epi32(a: c)); // truncation == floor for x >= 0 |
| 62 | #else |
| 63 | const __m128 clo = _mm_floor_ps(c); |
| 64 | #endif |
| 65 | __m128 chi = _mm_add_ps(a: clo, b: maxV); |
| 66 | chi = _mm_min_ps(a: chi, b: gridPointsV); |
| 67 | _mm_storeu_ps(p: reinterpret_cast<float *>(&frac), a: _mm_sub_ps(a: c, b: clo)); |
| 68 | const __m128i ilo = _mm_cvtps_epi32(a: clo); |
| 69 | const __m128i ihi = _mm_cvtps_epi32(a: chi); |
| 70 | const uint32_t lox = _mm_cvtsi128_si32(a: ilo); |
| 71 | const uint32_t hix = _mm_cvtsi128_si32(a: ihi); |
| 72 | #if !defined(__SSE4_1__) |
| 73 | const uint32_t loy = _mm_cvtsi128_si32(_mm_shuffle_epi32(ilo, _MM_SHUFFLE(1, 1, 1, 1))); |
| 74 | const uint32_t loz = _mm_cvtsi128_si32(a: _mm_unpackhi_epi32(a: ilo, b: ilo)); |
| 75 | const uint32_t low = _mm_cvtsi128_si32(_mm_shuffle_epi32(ilo, _MM_SHUFFLE(3, 3, 3, 3))); |
| 76 | const uint32_t hiy = _mm_cvtsi128_si32(_mm_shuffle_epi32(ihi, _MM_SHUFFLE(1, 1, 1, 1))); |
| 77 | const uint32_t hiz = _mm_cvtsi128_si32(a: _mm_unpackhi_epi32(a: ihi, b: ihi)); |
| 78 | const uint32_t hiw = _mm_cvtsi128_si32(_mm_shuffle_epi32(ihi, _MM_SHUFFLE(3, 3, 3, 3))); |
| 79 | #else |
| 80 | const uint32_t loy = _mm_extract_epi32(ilo, 1); |
| 81 | const uint32_t loz = _mm_extract_epi32(ilo, 2); |
| 82 | const uint32_t low = _mm_extract_epi32(ilo, 3); |
| 83 | const uint32_t hiy = _mm_extract_epi32(ihi, 1); |
| 84 | const uint32_t hiz = _mm_extract_epi32(ihi, 2); |
| 85 | const uint32_t hiw = _mm_extract_epi32(ihi, 3); |
| 86 | #endif |
| 87 | #else |
| 88 | const float x = std::clamp(v.x, 0.0f, 1.0f) * (gridPointsX - 1); |
| 89 | const float y = std::clamp(v.y, 0.0f, 1.0f) * (gridPointsY - 1); |
| 90 | const float z = std::clamp(v.z, 0.0f, 1.0f) * (gridPointsZ - 1); |
| 91 | const float w = std::clamp(v.w, 0.0f, 1.0f) * (gridPointsW - 1); |
| 92 | const uint32_t lox = static_cast<uint32_t>(std::floor(x)); |
| 93 | const uint32_t hix = std::min(lox + 1, gridPointsX - 1); |
| 94 | const uint32_t loy = static_cast<uint32_t>(std::floor(y)); |
| 95 | const uint32_t hiy = std::min(loy + 1, gridPointsY - 1); |
| 96 | const uint32_t loz = static_cast<uint32_t>(std::floor(z)); |
| 97 | const uint32_t hiz = std::min(loz + 1, gridPointsZ - 1); |
| 98 | const uint32_t low = static_cast<uint32_t>(std::floor(w)); |
| 99 | const uint32_t hiw = std::min(low + 1, gridPointsW - 1); |
| 100 | frac.x = x - static_cast<float>(lox); |
| 101 | frac.y = y - static_cast<float>(loy); |
| 102 | frac.z = z - static_cast<float>(loz); |
| 103 | frac.w = w - static_cast<float>(low); |
| 104 | #endif |
| 105 | if (gridPointsW > 1) { |
| 106 | auto index = [&](qsizetype x, qsizetype y, qsizetype z, qsizetype w) -> qsizetype { |
| 107 | return x * gridPointsW * gridPointsZ * gridPointsY |
| 108 | + y * gridPointsW * gridPointsZ |
| 109 | + z * gridPointsW |
| 110 | + w; |
| 111 | }; |
| 112 | QColorVector tmp[8]; |
| 113 | // interpolate over w |
| 114 | tmp[0] = interpolate(a: table[index(lox, loy, loz, low)], |
| 115 | b: table[index(lox, loy, loz, hiw)], t: frac.w); |
| 116 | tmp[1] = interpolate(a: table[index(lox, loy, hiz, low)], |
| 117 | b: table[index(lox, loy, hiz, hiw)], t: frac.w); |
| 118 | tmp[2] = interpolate(a: table[index(lox, hiy, loz, low)], |
| 119 | b: table[index(lox, hiy, loz, hiw)], t: frac.w); |
| 120 | tmp[3] = interpolate(a: table[index(lox, hiy, hiz, low)], |
| 121 | b: table[index(lox, hiy, hiz, hiw)], t: frac.w); |
| 122 | tmp[4] = interpolate(a: table[index(hix, loy, loz, low)], |
| 123 | b: table[index(hix, loy, loz, hiw)], t: frac.w); |
| 124 | tmp[5] = interpolate(a: table[index(hix, loy, hiz, low)], |
| 125 | b: table[index(hix, loy, hiz, hiw)], t: frac.w); |
| 126 | tmp[6] = interpolate(a: table[index(hix, hiy, loz, low)], |
| 127 | b: table[index(hix, hiy, loz, hiw)], t: frac.w); |
| 128 | tmp[7] = interpolate(a: table[index(hix, hiy, hiz, low)], |
| 129 | b: table[index(hix, hiy, hiz, hiw)], t: frac.w); |
| 130 | // interpolate over z |
| 131 | for (int i = 0; i < 4; ++i) |
| 132 | interpolateIn(a&: tmp[i * 2], b: tmp[i * 2 + 1], t: frac.z); |
| 133 | // interpolate over y |
| 134 | for (int i = 0; i < 2; ++i) |
| 135 | interpolateIn(a&: tmp[i * 4], b: tmp[i * 4 + 2], t: frac.y); |
| 136 | // interpolate over x |
| 137 | interpolateIn(a&: tmp[0], b: tmp[4], t: frac.x); |
| 138 | return tmp[0]; |
| 139 | } |
| 140 | auto index = [&](qsizetype x, qsizetype y, qsizetype z) -> qsizetype { |
| 141 | return x * gridPointsZ * gridPointsY |
| 142 | + y * gridPointsZ |
| 143 | + z; |
| 144 | }; |
| 145 | QColorVector tmp[8] = { |
| 146 | table[index(lox, loy, loz)], |
| 147 | table[index(lox, loy, hiz)], |
| 148 | table[index(lox, hiy, loz)], |
| 149 | table[index(lox, hiy, hiz)], |
| 150 | table[index(hix, loy, loz)], |
| 151 | table[index(hix, loy, hiz)], |
| 152 | table[index(hix, hiy, loz)], |
| 153 | table[index(hix, hiy, hiz)] |
| 154 | }; |
| 155 | // interpolate over z |
| 156 | for (int i = 0; i < 4; ++i) |
| 157 | interpolateIn(a&: tmp[i * 2], b: tmp[i * 2 + 1], t: frac.z); |
| 158 | // interpolate over y |
| 159 | for (int i = 0; i < 2; ++i) |
| 160 | interpolateIn(a&: tmp[i * 4], b: tmp[i * 4 + 2], t: frac.y); |
| 161 | // interpolate over x |
| 162 | interpolateIn(a&: tmp[0], b: tmp[4], t: frac.x); |
| 163 | return tmp[0]; |
| 164 | } |
| 165 | }; |
| 166 | |
| 167 | QT_END_NAMESPACE |
| 168 | |
| 169 | #endif // QCOLORCLUT_H |
| 170 | |