1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qvideoframeconversionhelper_p.h" |
5 | |
6 | #ifdef QT_COMPILER_SUPPORTS_SSSE3 |
7 | |
8 | QT_BEGIN_NAMESPACE |
9 | |
10 | namespace { |
11 | |
12 | template<int a, int r, int g, int b> |
13 | void convert_to_ARGB32_ssse3(const QVideoFrame &frame, uchar *output) |
14 | { |
15 | FETCH_INFO_PACKED(frame) |
16 | MERGE_LOOPS(width, height, stride, 4) |
17 | quint32 *argb = reinterpret_cast<quint32*>(output); |
18 | |
19 | #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
20 | __m128i shuffleMask = _mm_set_epi8(b15: 12 + a, b14: 12 + r, b13: 12 + g, b12: 12 + b, |
21 | b11: 8 + a, b10: 8 + r, b9: 8 + g, b8: 8 + b, |
22 | b7: 4 + a, b6: 4 + r, b5: 4 + g, b4: 4 + b, |
23 | b3: 0 + a, b2: 0 + r, b1: 0 + g, b0: 0 + b); |
24 | #else |
25 | __m128i shuffleMask = _mm_set_epi8(15 - a, 15 - r, 15 - g, 15 - b, |
26 | 11 - a, 11 - r, 11 - g, 11 - b, |
27 | 7 - a, 7 - r, 7 - g, 7 - b, |
28 | 3 - a, 3 - r, 3 - g, 3 - b); |
29 | #endif |
30 | |
31 | using Pixel = const ArgbPixel<a, r, g, b>; |
32 | |
33 | for (int y = 0; y < height; ++y) { |
34 | const auto *pixel = reinterpret_cast<const Pixel *>(src); |
35 | |
36 | int x = 0; |
37 | ALIGN(16, argb, x, width) { |
38 | *argb = pixel->convert(); |
39 | ++pixel; |
40 | ++argb; |
41 | } |
42 | |
43 | for (; x < width - 7; x += 8) { |
44 | __m128i pixelData = _mm_loadu_si128(p: reinterpret_cast<const __m128i*>(pixel)); |
45 | __m128i pixelData2 = _mm_loadu_si128(p: reinterpret_cast<const __m128i*>(pixel + 4)); |
46 | pixel += 8; |
47 | pixelData = _mm_shuffle_epi8(a: pixelData, b: shuffleMask); |
48 | pixelData2 = _mm_shuffle_epi8(a: pixelData2, b: shuffleMask); |
49 | _mm_store_si128(p: reinterpret_cast<__m128i*>(argb), b: pixelData); |
50 | _mm_store_si128(p: reinterpret_cast<__m128i*>(argb + 4), b: pixelData2); |
51 | argb += 8; |
52 | } |
53 | |
54 | // leftovers |
55 | for (; x < width; ++x) { |
56 | *argb = pixel->convert(); |
57 | ++pixel; |
58 | ++argb; |
59 | } |
60 | |
61 | src += stride; |
62 | } |
63 | } |
64 | |
65 | } |
66 | |
67 | void QT_FASTCALL qt_convert_ARGB8888_to_ARGB32_ssse3(const QVideoFrame &frame, uchar *output) |
68 | { |
69 | convert_to_ARGB32_ssse3<0, 1, 2, 3>(frame, output); |
70 | } |
71 | |
72 | void QT_FASTCALL qt_convert_ABGR8888_to_ARGB32_ssse3(const QVideoFrame &frame, uchar *output) |
73 | { |
74 | convert_to_ARGB32_ssse3<0, 3, 2, 1>(frame, output); |
75 | } |
76 | |
77 | void QT_FASTCALL qt_convert_RGBA8888_to_ARGB32_ssse3(const QVideoFrame &frame, uchar *output) |
78 | { |
79 | convert_to_ARGB32_ssse3<3, 0, 1, 2>(frame, output); |
80 | } |
81 | |
82 | void QT_FASTCALL qt_convert_BGRA8888_to_ARGB32_ssse3(const QVideoFrame &frame, uchar *output) |
83 | { |
84 | convert_to_ARGB32_ssse3<3, 2, 1, 0>(frame, output); |
85 | } |
86 | |
87 | QT_END_NAMESPACE |
88 | |
89 | #endif |
90 | |