1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qmemrotate_p.h" |
5 | #include "qpixellayout_p.h" |
6 | |
7 | QT_BEGIN_NAMESPACE |
8 | |
9 | static const int tileSize = 32; |
10 | |
11 | template<class T> |
12 | static inline void qt_memrotate90_tiled(const T *src, int w, int h, int isstride, T *dest, int idstride) |
13 | { |
14 | const qsizetype sstride = isstride / sizeof(T); |
15 | const qsizetype dstride = idstride / sizeof(T); |
16 | |
17 | const int pack = sizeof(quint32) / sizeof(T); |
18 | const int unaligned = |
19 | qMin(a: uint((quintptr(dest) & (sizeof(quint32)-1)) / sizeof(T)), b: uint(h)); |
20 | const int restX = w % tileSize; |
21 | const int restY = (h - unaligned) % tileSize; |
22 | const int unoptimizedY = restY % pack; |
23 | const int numTilesX = w / tileSize + (restX > 0); |
24 | const int numTilesY = (h - unaligned) / tileSize + (restY >= pack); |
25 | |
26 | for (int tx = 0; tx < numTilesX; ++tx) { |
27 | const int startx = w - tx * tileSize - 1; |
28 | const int stopx = qMax(a: startx - tileSize, b: 0); |
29 | |
30 | if (unaligned) { |
31 | for (int x = startx; x >= stopx; --x) { |
32 | T *d = dest + (w - x - 1) * dstride; |
33 | for (int y = 0; y < unaligned; ++y) { |
34 | *d++ = src[y * sstride + x]; |
35 | } |
36 | } |
37 | } |
38 | |
39 | for (int ty = 0; ty < numTilesY; ++ty) { |
40 | const int starty = ty * tileSize + unaligned; |
41 | const int stopy = qMin(a: starty + tileSize, b: h - unoptimizedY); |
42 | |
43 | for (int x = startx; x >= stopx; --x) { |
44 | quint32 *d = reinterpret_cast<quint32*>(dest + (w - x - 1) * dstride + starty); |
45 | for (int y = starty; y < stopy; y += pack) { |
46 | quint32 c = src[y * sstride + x]; |
47 | for (int i = 1; i < pack; ++i) { |
48 | const int shift = (sizeof(T) * 8 * i); |
49 | const T color = src[(y + i) * sstride + x]; |
50 | c |= color << shift; |
51 | } |
52 | *d++ = c; |
53 | } |
54 | } |
55 | } |
56 | |
57 | if (unoptimizedY) { |
58 | const int starty = h - unoptimizedY; |
59 | for (int x = startx; x >= stopx; --x) { |
60 | T *d = dest + (w - x - 1) * dstride + starty; |
61 | for (int y = starty; y < h; ++y) { |
62 | *d++ = src[y * sstride + x]; |
63 | } |
64 | } |
65 | } |
66 | } |
67 | } |
68 | |
69 | template<class T> |
70 | static inline void qt_memrotate90_tiled_unpacked(const T *src, int w, int h, int isstride, T *dest, int idstride) |
71 | { |
72 | const qsizetype sstride = isstride; |
73 | const qsizetype dstride = idstride; |
74 | const int numTilesX = (w + tileSize - 1) / tileSize; |
75 | const int numTilesY = (h + tileSize - 1) / tileSize; |
76 | |
77 | for (int tx = 0; tx < numTilesX; ++tx) { |
78 | const int startx = w - tx * tileSize - 1; |
79 | const int stopx = qMax(a: startx - tileSize, b: 0); |
80 | |
81 | for (int ty = 0; ty < numTilesY; ++ty) { |
82 | const int starty = ty * tileSize; |
83 | const int stopy = qMin(a: starty + tileSize, b: h); |
84 | |
85 | for (int x = startx; x >= stopx; --x) { |
86 | T *d = (T *)((char*)dest + (w - x - 1) * dstride) + starty; |
87 | const char *s = (const char*)(src + x) + starty * sstride; |
88 | for (int y = starty; y < stopy; ++y) { |
89 | *d++ = *(const T *)(s); |
90 | s += sstride; |
91 | } |
92 | } |
93 | } |
94 | } |
95 | } |
96 | |
97 | template<class T> |
98 | static inline void qt_memrotate270_tiled(const T *src, int w, int h, int isstride, T *dest, int idstride) |
99 | { |
100 | const qsizetype sstride = isstride / sizeof(T); |
101 | const qsizetype dstride = idstride / sizeof(T); |
102 | |
103 | const int pack = sizeof(quint32) / sizeof(T); |
104 | const int unaligned = |
105 | qMin(a: uint((quintptr(dest) & (sizeof(quint32)-1)) / sizeof(T)), b: uint(h)); |
106 | const int restX = w % tileSize; |
107 | const int restY = (h - unaligned) % tileSize; |
108 | const int unoptimizedY = restY % pack; |
109 | const int numTilesX = w / tileSize + (restX > 0); |
110 | const int numTilesY = (h - unaligned) / tileSize + (restY >= pack); |
111 | |
112 | for (int tx = 0; tx < numTilesX; ++tx) { |
113 | const int startx = tx * tileSize; |
114 | const int stopx = qMin(a: startx + tileSize, b: w); |
115 | |
116 | if (unaligned) { |
117 | for (int x = startx; x < stopx; ++x) { |
118 | T *d = dest + x * dstride; |
119 | for (int y = h - 1; y >= h - unaligned; --y) { |
120 | *d++ = src[y * sstride + x]; |
121 | } |
122 | } |
123 | } |
124 | |
125 | for (int ty = 0; ty < numTilesY; ++ty) { |
126 | const int starty = h - 1 - unaligned - ty * tileSize; |
127 | const int stopy = qMax(a: starty - tileSize, b: unoptimizedY); |
128 | |
129 | for (int x = startx; x < stopx; ++x) { |
130 | quint32 *d = reinterpret_cast<quint32*>(dest + x * dstride |
131 | + h - 1 - starty); |
132 | for (int y = starty; y >= stopy; y -= pack) { |
133 | quint32 c = src[y * sstride + x]; |
134 | for (int i = 1; i < pack; ++i) { |
135 | const int shift = (sizeof(T) * 8 * i); |
136 | const T color = src[(y - i) * sstride + x]; |
137 | c |= color << shift; |
138 | } |
139 | *d++ = c; |
140 | } |
141 | } |
142 | } |
143 | if (unoptimizedY) { |
144 | const int starty = unoptimizedY - 1; |
145 | for (int x = startx; x < stopx; ++x) { |
146 | T *d = dest + x * dstride + h - 1 - starty; |
147 | for (int y = starty; y >= 0; --y) { |
148 | *d++ = src[y * sstride + x]; |
149 | } |
150 | } |
151 | } |
152 | } |
153 | } |
154 | |
155 | template<class T> |
156 | static inline void qt_memrotate270_tiled_unpacked(const T *src, int w, int h, int isstride, T *dest, int idstride) |
157 | { |
158 | const qsizetype sstride = isstride; |
159 | const qsizetype dstride = idstride; |
160 | const int numTilesX = (w + tileSize - 1) / tileSize; |
161 | const int numTilesY = (h + tileSize - 1) / tileSize; |
162 | |
163 | for (int tx = 0; tx < numTilesX; ++tx) { |
164 | const int startx = tx * tileSize; |
165 | const int stopx = qMin(a: startx + tileSize, b: w); |
166 | |
167 | for (int ty = 0; ty < numTilesY; ++ty) { |
168 | const int starty = h - 1 - ty * tileSize; |
169 | const int stopy = qMax(a: starty - tileSize, b: 0); |
170 | |
171 | for (int x = startx; x < stopx; ++x) { |
172 | T *d = (T*)((char*)dest + x * dstride) + h - 1 - starty; |
173 | const char *s = (const char*)(src + x) + starty * sstride; |
174 | for (int y = starty; y >= stopy; --y) { |
175 | *d++ = *(const T*)s; |
176 | s -= sstride; |
177 | } |
178 | } |
179 | } |
180 | } |
181 | } |
182 | |
183 | |
184 | template <class T> |
185 | static |
186 | inline void qt_memrotate90_template(const T *src, int srcWidth, int srcHeight, int srcStride, |
187 | T *dest, int dstStride) |
188 | { |
189 | #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
190 | // packed algorithm assumes little endian and that sizeof(quint32)/sizeof(T) is an integer |
191 | static_assert(sizeof(quint32) % sizeof(T) == 0); |
192 | qt_memrotate90_tiled<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); |
193 | #else |
194 | qt_memrotate90_tiled_unpacked<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); |
195 | #endif |
196 | } |
197 | |
198 | template<class T> |
199 | static inline void qt_memrotate180_template(const T *src, int w, int h, int isstride, T *dest, int idstride) |
200 | { |
201 | const qsizetype sstride = isstride; |
202 | const qsizetype dstride = idstride; |
203 | |
204 | const char *s = (const char*)(src) + (h - 1) * sstride; |
205 | for (int dy = 0; dy < h; ++dy) { |
206 | T *d = reinterpret_cast<T*>((char *)(dest) + dy * dstride); |
207 | src = reinterpret_cast<const T*>(s); |
208 | for (int dx = 0; dx < w; ++dx) { |
209 | d[dx] = src[w - 1 - dx]; |
210 | } |
211 | s -= sstride; |
212 | } |
213 | } |
214 | |
215 | template <class T> |
216 | static |
217 | inline void qt_memrotate270_template(const T *src, int srcWidth, int srcHeight, int srcStride, |
218 | T *dest, int dstStride) |
219 | { |
220 | #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
221 | // packed algorithm assumes little endian and that sizeof(quint32)/sizeof(T) is an integer |
222 | static_assert(sizeof(quint32) % sizeof(T) == 0); |
223 | qt_memrotate270_tiled<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); |
224 | #else |
225 | qt_memrotate270_tiled_unpacked<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); |
226 | #endif |
227 | } |
228 | |
229 | #define QT_IMPL_MEMROTATE(type) \ |
230 | Q_GUI_EXPORT void qt_memrotate90(const type *src, int w, int h, int sstride, \ |
231 | type *dest, int dstride) \ |
232 | { \ |
233 | qt_memrotate90_template(src, w, h, sstride, dest, dstride); \ |
234 | } \ |
235 | Q_GUI_EXPORT void qt_memrotate180(const type *src, int w, int h, int sstride, \ |
236 | type *dest, int dstride) \ |
237 | { \ |
238 | qt_memrotate180_template(src, w, h, sstride, dest, dstride); \ |
239 | } \ |
240 | Q_GUI_EXPORT void qt_memrotate270(const type *src, int w, int h, int sstride, \ |
241 | type *dest, int dstride) \ |
242 | { \ |
243 | qt_memrotate270_template(src, w, h, sstride, dest, dstride); \ |
244 | } |
245 | |
246 | #define QT_IMPL_SIMPLE_MEMROTATE(type) \ |
247 | Q_GUI_EXPORT void qt_memrotate90(const type *src, int w, int h, int sstride, \ |
248 | type *dest, int dstride) \ |
249 | { \ |
250 | qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride); \ |
251 | } \ |
252 | Q_GUI_EXPORT void qt_memrotate180(const type *src, int w, int h, int sstride, \ |
253 | type *dest, int dstride) \ |
254 | { \ |
255 | qt_memrotate180_template(src, w, h, sstride, dest, dstride); \ |
256 | } \ |
257 | Q_GUI_EXPORT void qt_memrotate270(const type *src, int w, int h, int sstride, \ |
258 | type *dest, int dstride) \ |
259 | { \ |
260 | qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride); \ |
261 | } |
262 | |
263 | QT_IMPL_SIMPLE_MEMROTATE(QRgbaFloat32) |
264 | QT_IMPL_SIMPLE_MEMROTATE(quint64) |
265 | QT_IMPL_SIMPLE_MEMROTATE(quint32) |
266 | QT_IMPL_SIMPLE_MEMROTATE(quint24) |
267 | QT_IMPL_MEMROTATE(quint16) |
268 | QT_IMPL_MEMROTATE(quint8) |
269 | |
270 | void qt_memrotate90_8(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
271 | { |
272 | qt_memrotate90(src: srcPixels, w, h, sstride: sbpl, dest: destPixels, dstride: dbpl); |
273 | } |
274 | |
275 | void qt_memrotate180_8(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
276 | { |
277 | qt_memrotate180(src: srcPixels, w, h, sstride: sbpl, dest: destPixels, dstride: dbpl); |
278 | } |
279 | |
280 | void qt_memrotate270_8(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
281 | { |
282 | qt_memrotate270(src: srcPixels, w, h, sstride: sbpl, dest: destPixels, dstride: dbpl); |
283 | } |
284 | |
285 | void qt_memrotate90_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
286 | { |
287 | qt_memrotate90(src: (const ushort *)srcPixels, w, h, sstride: sbpl, dest: (ushort *)destPixels, dstride: dbpl); |
288 | } |
289 | |
290 | void qt_memrotate180_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
291 | { |
292 | qt_memrotate180(src: (const ushort *)srcPixels, w, h, sstride: sbpl, dest: (ushort *)destPixels, dstride: dbpl); |
293 | } |
294 | |
295 | void qt_memrotate270_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
296 | { |
297 | qt_memrotate270(src: (const ushort *)srcPixels, w, h, sstride: sbpl, dest: (ushort *)destPixels, dstride: dbpl); |
298 | } |
299 | |
300 | void qt_memrotate90_24(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
301 | { |
302 | qt_memrotate90(src: (const quint24 *)srcPixels, w, h, sstride: sbpl, dest: (quint24 *)destPixels, dstride: dbpl); |
303 | } |
304 | |
305 | void qt_memrotate180_24(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
306 | { |
307 | qt_memrotate180(src: (const quint24 *)srcPixels, w, h, sstride: sbpl, dest: (quint24 *)destPixels, dstride: dbpl); |
308 | } |
309 | |
310 | void qt_memrotate270_24(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
311 | { |
312 | qt_memrotate270(src: (const quint24 *)srcPixels, w, h, sstride: sbpl, dest: (quint24 *)destPixels, dstride: dbpl); |
313 | } |
314 | |
315 | void qt_memrotate90_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
316 | { |
317 | qt_memrotate90(src: (const uint *)srcPixels, w, h, sstride: sbpl, dest: (uint *)destPixels, dstride: dbpl); |
318 | } |
319 | |
320 | void qt_memrotate180_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
321 | { |
322 | qt_memrotate180(src: (const uint *)srcPixels, w, h, sstride: sbpl, dest: (uint *)destPixels, dstride: dbpl); |
323 | } |
324 | |
325 | void qt_memrotate270_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
326 | { |
327 | qt_memrotate270(src: (const uint *)srcPixels, w, h, sstride: sbpl, dest: (uint *)destPixels, dstride: dbpl); |
328 | } |
329 | |
330 | |
331 | void qt_memrotate90_64(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
332 | { |
333 | qt_memrotate90(src: (const quint64 *)srcPixels, w, h, sstride: sbpl, dest: (quint64 *)destPixels, dstride: dbpl); |
334 | } |
335 | |
336 | void qt_memrotate180_64(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
337 | { |
338 | qt_memrotate180(src: (const quint64 *)srcPixels, w, h, sstride: sbpl, dest: (quint64 *)destPixels, dstride: dbpl); |
339 | } |
340 | |
341 | void qt_memrotate270_64(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
342 | { |
343 | qt_memrotate270(src: (const quint64 *)srcPixels, w, h, sstride: sbpl, dest: (quint64 *)destPixels, dstride: dbpl); |
344 | } |
345 | |
346 | void qt_memrotate90_128(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
347 | { |
348 | qt_memrotate90(src: (const QRgbaFloat32 *)srcPixels, w, h, sstride: sbpl, dest: (QRgbaFloat32 *)destPixels, dstride: dbpl); |
349 | } |
350 | |
351 | void qt_memrotate180_128(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
352 | { |
353 | qt_memrotate180(src: (const QRgbaFloat32 *)srcPixels, w, h, sstride: sbpl, dest: (QRgbaFloat32 *)destPixels, dstride: dbpl); |
354 | } |
355 | |
356 | void qt_memrotate270_128(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) |
357 | { |
358 | qt_memrotate270(src: (const QRgbaFloat32 *)srcPixels, w, h, sstride: sbpl, dest: (QRgbaFloat32 *)destPixels, dstride: dbpl); |
359 | } |
360 | |
361 | MemRotateFunc qMemRotateFunctions[QPixelLayout::BPPCount][3] = |
362 | // 90, 180, 270 |
363 | { |
364 | { nullptr, nullptr, nullptr }, // BPPNone, |
365 | { nullptr, nullptr, nullptr }, // BPP1MSB, |
366 | { nullptr, nullptr, nullptr }, // BPP1LSB, |
367 | { qt_memrotate90_8, qt_memrotate180_8, qt_memrotate270_8 }, // BPP8, |
368 | { qt_memrotate90_16, qt_memrotate180_16, qt_memrotate270_16 }, // BPP16, |
369 | { qt_memrotate90_24, qt_memrotate180_24, qt_memrotate270_24 }, // BPP24 |
370 | { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // BPP32 |
371 | { qt_memrotate90_64, qt_memrotate180_64, qt_memrotate270_64 }, // BPP64 |
372 | { qt_memrotate90_64, qt_memrotate180_64, qt_memrotate270_64 }, // BPP16FPx4 |
373 | { qt_memrotate90_128, qt_memrotate180_128, qt_memrotate270_128 }, // BPP32FPx4 |
374 | }; |
375 | |
376 | QT_END_NAMESPACE |
377 | |