1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qvideoframeconversionhelper_p.h"
5
6#ifdef QT_COMPILER_SUPPORTS_SSE2
7
8QT_BEGIN_NAMESPACE
9
10namespace {
11
12template<int a, int r, int b, int g>
13void convert_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output)
14{
15 FETCH_INFO_PACKED(frame)
16 MERGE_LOOPS(width, height, stride, 4)
17 quint32 *argb = reinterpret_cast<quint32*>(output);
18
19 const __m128i zero = _mm_setzero_si128();
20#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
21 const uchar shuffle = _MM_SHUFFLE(a, r, b, g);
22#else
23 const uchar shuffle = _MM_SHUFFLE(3-a, 3-r, 3-b, 3-g);
24#endif
25
26 using Pixel = const ArgbPixel<a, r, g, b>;
27
28 for (int y = 0; y < height; ++y) {
29 auto *pixel = reinterpret_cast<const Pixel *>(src);
30
31 int x = 0;
32 ALIGN(16, argb, x, width) {
33 *argb = pixel->convert();
34 ++pixel;
35 ++argb;
36 }
37
38 for (; x < width - 3; x += 4) {
39 __m128i pixelData = _mm_loadu_si128(p: reinterpret_cast<const __m128i*>(pixel));
40 pixel += 4;
41 __m128i lowPixels = _mm_unpacklo_epi8(a: pixelData, b: zero);
42 __m128i highPixels = _mm_unpackhi_epi8(a: pixelData, b: zero);
43 lowPixels = _mm_shufflelo_epi16(_mm_shufflehi_epi16(lowPixels, shuffle), shuffle);
44 highPixels = _mm_shufflelo_epi16(_mm_shufflehi_epi16(highPixels, shuffle), shuffle);
45 pixelData = _mm_packus_epi16(a: lowPixels, b: highPixels);
46 _mm_store_si128(p: reinterpret_cast<__m128i*>(argb), b: pixelData);
47 argb += 4;
48 }
49
50 // leftovers
51 for (; x < width; ++x) {
52 *argb = pixel->convert();
53 ++pixel;
54 ++argb;
55 }
56
57 src += stride;
58 }
59}
60
61}
62
63void QT_FASTCALL qt_convert_ARGB8888_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output)
64{
65 convert_to_ARGB32_sse2<0, 1, 2, 3>(frame, output);
66}
67
68void QT_FASTCALL qt_convert_ABGR8888_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output)
69{
70 convert_to_ARGB32_sse2<0, 3, 2, 1>(frame, output);
71}
72
73void QT_FASTCALL qt_convert_RGBA8888_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output)
74{
75 convert_to_ARGB32_sse2<3, 0, 1, 2>(frame, output);
76}
77
78void QT_FASTCALL qt_convert_BGRA8888_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output)
79{
80 convert_to_ARGB32_sse2<3, 2, 1, 0>(frame, output);
81}
82
83void QT_FASTCALL qt_copy_pixels_with_mask_sse2(uint32_t *dst, const uint32_t *src, size_t size, uint32_t mask)
84{
85 const auto mask128 = _mm_set_epi32(i3: mask, i2: mask, i1: mask, i0: mask);
86
87 size_t x = 0;
88
89 ALIGN(16, dst, x, size)
90 *(dst++) = *(src++) | mask;
91
92 for (; x < size - (4 * 4 - 1); x += 4 * 4) {
93 const auto srcData0 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src));
94 const auto srcData1 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src += 4));
95 const auto srcData2 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src += 4));
96 const auto srcData3 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src += 4));
97
98 _mm_store_si128(p: reinterpret_cast<__m128i *>(dst), b: _mm_or_si128(a: srcData0, b: mask128));
99 _mm_store_si128(p: reinterpret_cast<__m128i *>(dst += 4), b: _mm_or_si128(a: srcData1, b: mask128));
100 _mm_store_si128(p: reinterpret_cast<__m128i *>(dst += 4), b: _mm_or_si128(a: srcData2, b: mask128));
101 _mm_store_si128(p: reinterpret_cast<__m128i *>(dst += 4), b: _mm_or_si128(a: srcData3, b: mask128));
102
103 src += 4;
104 dst += 4;
105 }
106
107 for (; x < size - 3; x += 4) {
108 const auto srcData = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src));
109
110 _mm_store_si128(p: reinterpret_cast<__m128i *>(dst), b: _mm_or_si128(a: srcData, b: mask128));
111
112 src += 4;
113 dst += 4;
114 }
115
116 // leftovers
117 for (; x < size; ++x)
118 *(dst++) = *(src++) | mask;
119}
120
121QT_END_NAMESPACE
122
123#endif
124

source code of qtmultimedia/src/multimedia/video/qvideoframeconversionhelper_sse2.cpp