qvideoframeconversionhelper_avx2.cpp source code [qtmultimedia/src/multimedia/video/qvideoframeconversionhelper_avx2.cpp]

1	// Copyright (C) 2016 The Qt Company Ltd.
2	// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4	#include "qvideoframeconversionhelper_p.h"
5
6	#ifdef QT_COMPILER_SUPPORTS_AVX2
7
8	QT_BEGIN_NAMESPACE
9
10	namespace {
11
12	template<int a, int r, int g, int b>
13	void convert_to_ARGB32_avx2(const QVideoFrame &frame, uchar *output)
14	{
15	FETCH_INFO_PACKED(frame)
16	MERGE_LOOPS(width, height, stride, `4`)
17	quint32 argb = reinterpret_cast<quint32>(output);
18
19	#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
20	__m256i shuffleMask = _mm256_set_epi8(b31: `12` + a, b30: `12` + r, b29: `12` + g, b28: `12` + b,
21	b27: `8` + a, b26: `8` + r, b25: `8` + g, b24: `8` + b,
22	b23: `4` + a, b22: `4` + r, b21: `4` + g, b20: `4` + b,
23	b19: `0` + a, b18: `0` + r, b17: `0` + g, b16: `0` + b,
24	b15: `12` + a, b14: `12` + r, b13: `12` + g, b12: `12` + b,
25	b11: `8` + a, b10: `8` + r, b09: `8` + g, b08: `8` + b,
26	b07: `4` + a, b06: `4` + r, b05: `4` + g, b04: `4` + b,
27	b03: `0` + a, b02: `0` + r, b01: `0` + g, b00: `0` + b);
28	#else
29	__m256i shuffleMask = _mm256_set_epi8(`15` - a, `15` - r, `15` - g, `15` - b,
30	`11` - a, `11` - r, `11` - g, `11` - b,
31	`7` - a, `7` - r, `7` - g, `7` - b,
32	`3` - a, `3` - r, `3` - g, `3` - b,
33	`15` - a, `15` - r, `15` - g, `15` - b,
34	`11` - a, `11` - r, `11` - g, `11` - b,
35	`7` - a, `7` - r, `7` - g, `7` - b,
36	`3` - a, `3` - r, `3` - g, `3` - b);
37	#endif
38
39	using Pixel = const ArgbPixel<a, r, g, b>;
40
41	for (int y = `0`; y < height; ++y) {
42	auto pixel = reinterpret_cast<const* Pixel *>(src);
43
44	int x = `0`;
45	ALIGN(`32`, argb, x, width) {
46	*argb = pixel->convert();
47	++pixel;
48	++argb;
49	}
50
51	for (; x < width - `15`; x += `16`) {
52	__m256i pixelData = _mm256_loadu_si256(p: reinterpret_cast<const __m256i*>(pixel));
53	__m256i pixelData2 = _mm256_loadu_si256(p: reinterpret_cast<const __m256i*>(pixel + `8`));
54	pixel += `16`;
55	pixelData = _mm256_shuffle_epi8(a: pixelData, b: shuffleMask);
56	pixelData2 = _mm256_shuffle_epi8(a: pixelData2, b: shuffleMask);
57	_mm256_store_si256(p: reinterpret_cast<__m256i*>(argb), a: pixelData);
58	_mm256_store_si256(p: reinterpret_cast<__m256i*>(argb + `8`), a: pixelData2);
59	argb += `16`;
60	}
61
62	// leftovers
63	for (; x < width; ++x) {
64	*argb = pixel->convert();
65	++pixel;
66	++argb;
67	}
68
69	src += stride;
70	}
71	}
72
73	}
74
75
76	void QT_FASTCALL qt_convert_ARGB8888_to_ARGB32_avx2(const QVideoFrame &frame, uchar *output)
77	{
78	convert_to_ARGB32_avx2<`0`, `1`, `2`, `3`>(frame, output);
79	}
80
81	void QT_FASTCALL qt_convert_ABGR8888_to_ARGB32_avx2(const QVideoFrame &frame, uchar *output)
82	{
83	convert_to_ARGB32_avx2<`0`, `3`, `2`, `1`>(frame, output);
84	}
85
86	void QT_FASTCALL qt_convert_RGBA8888_to_ARGB32_avx2(const QVideoFrame &frame, uchar *output)
87	{
88	convert_to_ARGB32_avx2<`3`, `0`, `1`, `2`>(frame, output);
89	}
90
91	void QT_FASTCALL qt_convert_BGRA8888_to_ARGB32_avx2(const QVideoFrame &frame, uchar *output)
92	{
93	convert_to_ARGB32_avx2<`3`, `2`, `1`, `0`>(frame, output);
94	}
95
96	void QT_FASTCALL qt_copy_pixels_with_mask_avx2(uint32_t dst, const* uint32_t *src, size_t size, uint32_t mask)
97	{
98	const auto mask256 = _mm256_set_epi32(i0: mask, i1: mask, i2: mask, i3: mask, i4: mask, i5: mask, i6: mask, i7: mask);
99
100	size_t x = `0`;
101
102	ALIGN(`32`, dst, x, size)
103	(dst++) = (src++) \| mask;
104
105	for (; x < size - (`8` * `4` + `1`); x += `8` * `4`) {
106	const auto srcData1 = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(src));
107	const auto srcData2 = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(src += `8`));
108	const auto srcData3 = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(src += `8`));
109	const auto srcData4 = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(src += `8`));
110
111	_mm256_store_si256(p: reinterpret_cast<__m256i *>(dst), a: _mm256_or_si256(a: srcData1, b: mask256));
112	_mm256_store_si256(p: reinterpret_cast<__m256i *>(dst += `8`), a: _mm256_or_si256(a: srcData2, b: mask256));
113	_mm256_store_si256(p: reinterpret_cast<__m256i *>(dst += `8`), a: _mm256_or_si256(a: srcData3, b: mask256));
114	_mm256_store_si256(p: reinterpret_cast<__m256i *>(dst += `8`), a: _mm256_or_si256(a: srcData4, b: mask256));
115
116	src += `8`;
117	dst += `8`;
118	}
119
120	// leftovers
121	for (; x < size - `7`; x += `8`) {
122	const auto srcData = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(src));
123	_mm256_store_si256(p: reinterpret_cast<__m256i *>(dst), a: _mm256_or_si256(a: srcData, b: mask256));
124
125	src += `8`;
126	dst += `8`;
127	}
128
129	for (; x < size; ++x)
130	(dst++) = (src++) \| mask;
131	}
132
133	QT_END_NAMESPACE
134
135	#endif
136

source code of qtmultimedia/src/multimedia/video/qvideoframeconversionhelper_avx2.cpp