| 1 | // Copyright (C) 2016 The Qt Company Ltd. | 
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only | 
| 3 |  | 
| 4 | #include "qvideoframeconversionhelper_p.h" | 
| 5 |  | 
| 6 | #ifdef QT_COMPILER_SUPPORTS_SSE2 | 
| 7 |  | 
| 8 | QT_BEGIN_NAMESPACE | 
| 9 |  | 
| 10 | namespace  { | 
| 11 |  | 
| 12 | template<int a, int r, int b, int g> | 
| 13 | void convert_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output) | 
| 14 | { | 
| 15 |     FETCH_INFO_PACKED(frame) | 
| 16 |     MERGE_LOOPS(width, height, stride, 4) | 
| 17 |     quint32 *argb = reinterpret_cast<quint32*>(output); | 
| 18 |  | 
| 19 |     const __m128i zero = _mm_setzero_si128(); | 
| 20 | #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN | 
| 21 |     const uchar shuffle = _MM_SHUFFLE(a, r, b, g); | 
| 22 | #else | 
| 23 |     const uchar shuffle = _MM_SHUFFLE(3-a, 3-r, 3-b, 3-g); | 
| 24 | #endif | 
| 25 |  | 
| 26 |     using Pixel = const ArgbPixel<a, r, g, b>; | 
| 27 |  | 
| 28 |     for (int y = 0; y < height; ++y) { | 
| 29 |         auto *pixel = reinterpret_cast<const Pixel *>(src); | 
| 30 |  | 
| 31 |         int x = 0; | 
| 32 |         QT_MEDIA_ALIGN(16, argb, x, width) { | 
| 33 |             *argb = pixel->convert(); | 
| 34 |             ++pixel; | 
| 35 |             ++argb; | 
| 36 |         } | 
| 37 |  | 
| 38 |         for (; x < width - 3; x += 4) { | 
| 39 |             __m128i pixelData = _mm_loadu_si128(p: reinterpret_cast<const __m128i*>(pixel)); | 
| 40 |             pixel += 4; | 
| 41 |             __m128i lowPixels = _mm_unpacklo_epi8(a: pixelData, b: zero); | 
| 42 |             __m128i highPixels = _mm_unpackhi_epi8(a: pixelData, b: zero); | 
| 43 |             lowPixels = _mm_shufflelo_epi16(_mm_shufflehi_epi16(lowPixels, shuffle), shuffle); | 
| 44 |             highPixels = _mm_shufflelo_epi16(_mm_shufflehi_epi16(highPixels, shuffle), shuffle); | 
| 45 |             pixelData = _mm_packus_epi16(a: lowPixels, b: highPixels); | 
| 46 |             _mm_store_si128(p: reinterpret_cast<__m128i*>(argb), b: pixelData); | 
| 47 |             argb += 4; | 
| 48 |         } | 
| 49 |  | 
| 50 |         // leftovers | 
| 51 |         for (; x < width; ++x) { | 
| 52 |             *argb = pixel->convert(); | 
| 53 |             ++pixel; | 
| 54 |             ++argb; | 
| 55 |         } | 
| 56 |  | 
| 57 |         src += stride; | 
| 58 |     } | 
| 59 | } | 
| 60 |  | 
| 61 | } | 
| 62 |  | 
| 63 | void QT_FASTCALL qt_convert_ARGB8888_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output) | 
| 64 | { | 
| 65 |     convert_to_ARGB32_sse2<0, 1, 2, 3>(frame, output); | 
| 66 | } | 
| 67 |  | 
| 68 | void QT_FASTCALL qt_convert_ABGR8888_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output) | 
| 69 | { | 
| 70 |     convert_to_ARGB32_sse2<0, 3, 2, 1>(frame, output); | 
| 71 | } | 
| 72 |  | 
| 73 | void QT_FASTCALL qt_convert_RGBA8888_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output) | 
| 74 | { | 
| 75 |     convert_to_ARGB32_sse2<3, 0, 1, 2>(frame, output); | 
| 76 | } | 
| 77 |  | 
| 78 | void QT_FASTCALL qt_convert_BGRA8888_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output) | 
| 79 | { | 
| 80 |     convert_to_ARGB32_sse2<3, 2, 1, 0>(frame, output); | 
| 81 | } | 
| 82 |  | 
| 83 | void QT_FASTCALL qt_copy_pixels_with_mask_sse2(uint32_t *dst, const uint32_t *src, size_t size, uint32_t mask) | 
| 84 | { | 
| 85 |     const auto mask128 = _mm_set_epi32(i3: mask, i2: mask, i1: mask, i0: mask); | 
| 86 |  | 
| 87 |     size_t x = 0; | 
| 88 |  | 
| 89 |     QT_MEDIA_ALIGN(16, dst, x, size) | 
| 90 |         *(dst++) = *(src++) | mask; | 
| 91 |  | 
| 92 |     for (; x < size - (4 * 4 - 1); x += 4 * 4) { | 
| 93 |         const auto srcData0 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src)); | 
| 94 |         const auto srcData1 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src += 4)); | 
| 95 |         const auto srcData2 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src += 4)); | 
| 96 |         const auto srcData3 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src += 4)); | 
| 97 |  | 
| 98 |         _mm_store_si128(p: reinterpret_cast<__m128i *>(dst), b: _mm_or_si128(a: srcData0, b: mask128)); | 
| 99 |         _mm_store_si128(p: reinterpret_cast<__m128i *>(dst += 4), b: _mm_or_si128(a: srcData1, b: mask128)); | 
| 100 |         _mm_store_si128(p: reinterpret_cast<__m128i *>(dst += 4), b: _mm_or_si128(a: srcData2, b: mask128)); | 
| 101 |         _mm_store_si128(p: reinterpret_cast<__m128i *>(dst += 4), b: _mm_or_si128(a: srcData3, b: mask128)); | 
| 102 |  | 
| 103 |         src += 4; | 
| 104 |         dst += 4; | 
| 105 |     } | 
| 106 |  | 
| 107 |     for (; x < size - 3; x += 4) { | 
| 108 |         const auto srcData = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src)); | 
| 109 |  | 
| 110 |         _mm_store_si128(p: reinterpret_cast<__m128i *>(dst), b: _mm_or_si128(a: srcData, b: mask128)); | 
| 111 |  | 
| 112 |         src += 4; | 
| 113 |         dst += 4; | 
| 114 |     } | 
| 115 |  | 
| 116 |     // leftovers | 
| 117 |     for (; x < size; ++x) | 
| 118 |         *(dst++) = *(src++) | mask; | 
| 119 | } | 
| 120 |  | 
| 121 | QT_END_NAMESPACE | 
| 122 |  | 
| 123 | #endif | 
| 124 |  |