1 | //! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD. |
2 | |
3 | use crate::types::*; |
4 | use crate::{vec128_storage, vec256_storage, vec512_storage}; |
5 | use core::marker::PhantomData; |
6 | use core::ops::*; |
7 | |
8 | #[derive (Copy, Clone, Default)] |
9 | #[allow (non_camel_case_types)] |
10 | pub struct x2<W, G>(pub [W; 2], PhantomData<G>); |
11 | impl<W, G> x2<W, G> { |
12 | #[inline (always)] |
13 | pub fn new(xs: [W; 2]) -> Self { |
14 | x2(xs, PhantomData) |
15 | } |
16 | } |
17 | macro_rules! fwd_binop_x2 { |
18 | ($trait:ident, $fn:ident) => { |
19 | impl<W: $trait + Copy, G> $trait for x2<W, G> { |
20 | type Output = x2<W::Output, G>; |
21 | #[inline(always)] |
22 | fn $fn(self, rhs: Self) -> Self::Output { |
23 | x2::new([self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1])]) |
24 | } |
25 | } |
26 | }; |
27 | } |
28 | macro_rules! fwd_binop_assign_x2 { |
29 | ($trait:ident, $fn_assign:ident) => { |
30 | impl<W: $trait + Copy, G> $trait for x2<W, G> { |
31 | #[inline(always)] |
32 | fn $fn_assign(&mut self, rhs: Self) { |
33 | (self.0[0]).$fn_assign(rhs.0[0]); |
34 | (self.0[1]).$fn_assign(rhs.0[1]); |
35 | } |
36 | } |
37 | }; |
38 | } |
39 | macro_rules! fwd_unop_x2 { |
40 | ($fn:ident) => { |
41 | #[inline(always)] |
42 | fn $fn(self) -> Self { |
43 | x2::new([self.0[0].$fn(), self.0[1].$fn()]) |
44 | } |
45 | }; |
46 | } |
47 | impl<W, G> RotateEachWord32 for x2<W, G> |
48 | where |
49 | W: Copy + RotateEachWord32, |
50 | { |
51 | fwd_unop_x2!(rotate_each_word_right7); |
52 | fwd_unop_x2!(rotate_each_word_right8); |
53 | fwd_unop_x2!(rotate_each_word_right11); |
54 | fwd_unop_x2!(rotate_each_word_right12); |
55 | fwd_unop_x2!(rotate_each_word_right16); |
56 | fwd_unop_x2!(rotate_each_word_right20); |
57 | fwd_unop_x2!(rotate_each_word_right24); |
58 | fwd_unop_x2!(rotate_each_word_right25); |
59 | } |
60 | impl<W, G> RotateEachWord64 for x2<W, G> |
61 | where |
62 | W: Copy + RotateEachWord64, |
63 | { |
64 | fwd_unop_x2!(rotate_each_word_right32); |
65 | } |
66 | impl<W, G> RotateEachWord128 for x2<W, G> where W: RotateEachWord128 {} |
67 | impl<W, G> BitOps0 for x2<W, G> |
68 | where |
69 | W: BitOps0, |
70 | G: Copy, |
71 | { |
72 | } |
73 | impl<W, G> BitOps32 for x2<W, G> |
74 | where |
75 | W: BitOps32 + BitOps0, |
76 | G: Copy, |
77 | { |
78 | } |
79 | impl<W, G> BitOps64 for x2<W, G> |
80 | where |
81 | W: BitOps64 + BitOps0, |
82 | G: Copy, |
83 | { |
84 | } |
85 | impl<W, G> BitOps128 for x2<W, G> |
86 | where |
87 | W: BitOps128 + BitOps0, |
88 | G: Copy, |
89 | { |
90 | } |
91 | fwd_binop_x2!(BitAnd, bitand); |
92 | fwd_binop_x2!(BitOr, bitor); |
93 | fwd_binop_x2!(BitXor, bitxor); |
94 | fwd_binop_x2!(AndNot, andnot); |
95 | fwd_binop_assign_x2!(BitAndAssign, bitand_assign); |
96 | fwd_binop_assign_x2!(BitOrAssign, bitor_assign); |
97 | fwd_binop_assign_x2!(BitXorAssign, bitxor_assign); |
98 | impl<W, G> ArithOps for x2<W, G> |
99 | where |
100 | W: ArithOps, |
101 | G: Copy, |
102 | { |
103 | } |
104 | fwd_binop_x2!(Add, add); |
105 | fwd_binop_assign_x2!(AddAssign, add_assign); |
106 | impl<W: Not + Copy, G> Not for x2<W, G> { |
107 | type Output = x2<W::Output, G>; |
108 | #[inline (always)] |
109 | fn not(self) -> Self::Output { |
110 | x2::new([self.0[0].not(), self.0[1].not()]) |
111 | } |
112 | } |
113 | impl<W, G> UnsafeFrom<[W; 2]> for x2<W, G> { |
114 | #[inline (always)] |
115 | unsafe fn unsafe_from(xs: [W; 2]) -> Self { |
116 | x2::new(xs) |
117 | } |
118 | } |
119 | impl<W: Copy, G> Vec2<W> for x2<W, G> { |
120 | #[inline (always)] |
121 | fn extract(self, i: u32) -> W { |
122 | self.0[i as usize] |
123 | } |
124 | #[inline (always)] |
125 | fn insert(mut self, w: W, i: u32) -> Self { |
126 | self.0[i as usize] = w; |
127 | self |
128 | } |
129 | } |
130 | impl<W: Copy + Store<vec128_storage>, G> Store<vec256_storage> for x2<W, G> { |
131 | #[inline (always)] |
132 | unsafe fn unpack(p: vec256_storage) -> Self { |
133 | let p: [vec128_storage; 2] = p.split128(); |
134 | x2::new([W::unpack(p[0]), W::unpack(p[1])]) |
135 | } |
136 | } |
137 | impl<W, G> From<x2<W, G>> for vec256_storage |
138 | where |
139 | W: Copy, |
140 | vec128_storage: From<W>, |
141 | { |
142 | #[inline (always)] |
143 | fn from(x: x2<W, G>) -> Self { |
144 | vec256_storage::new128([x.0[0].into(), x.0[1].into()]) |
145 | } |
146 | } |
147 | impl<W, G> Swap64 for x2<W, G> |
148 | where |
149 | W: Swap64 + Copy, |
150 | { |
151 | fwd_unop_x2!(swap1); |
152 | fwd_unop_x2!(swap2); |
153 | fwd_unop_x2!(swap4); |
154 | fwd_unop_x2!(swap8); |
155 | fwd_unop_x2!(swap16); |
156 | fwd_unop_x2!(swap32); |
157 | fwd_unop_x2!(swap64); |
158 | } |
159 | impl<W: Copy, G> MultiLane<[W; 2]> for x2<W, G> { |
160 | #[inline (always)] |
161 | fn to_lanes(self) -> [W; 2] { |
162 | self.0 |
163 | } |
164 | #[inline (always)] |
165 | fn from_lanes(lanes: [W; 2]) -> Self { |
166 | x2::new(xs:lanes) |
167 | } |
168 | } |
169 | impl<W: BSwap + Copy, G> BSwap for x2<W, G> { |
170 | #[inline (always)] |
171 | fn bswap(self) -> Self { |
172 | x2::new([self.0[0].bswap(), self.0[1].bswap()]) |
173 | } |
174 | } |
175 | impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> { |
176 | #[inline (always)] |
177 | unsafe fn unsafe_read_le(input: &[u8]) -> Self { |
178 | let input: (&[u8], &[u8]) = input.split_at(mid:input.len() / 2); |
179 | x2::new([W::unsafe_read_le(input:input.0), W::unsafe_read_le(input:input.1)]) |
180 | } |
181 | #[inline (always)] |
182 | unsafe fn unsafe_read_be(input: &[u8]) -> Self { |
183 | let input: (&[u8], &[u8]) = input.split_at(mid:input.len() / 2); |
184 | x2::new([W::unsafe_read_be(input:input.0), W::unsafe_read_be(input:input.1)]) |
185 | } |
186 | #[inline (always)] |
187 | fn write_le(self, out: &mut [u8]) { |
188 | let out: (&mut [u8], &mut [u8]) = out.split_at_mut(mid:out.len() / 2); |
189 | self.0[0].write_le(out:out.0); |
190 | self.0[1].write_le(out:out.1); |
191 | } |
192 | #[inline (always)] |
193 | fn write_be(self, out: &mut [u8]) { |
194 | let out: (&mut [u8], &mut [u8]) = out.split_at_mut(mid:out.len() / 2); |
195 | self.0[0].write_be(out:out.0); |
196 | self.0[1].write_be(out:out.1); |
197 | } |
198 | } |
199 | impl<W: Copy + LaneWords4, G: Copy> LaneWords4 for x2<W, G> { |
200 | #[inline (always)] |
201 | fn shuffle_lane_words2301(self) -> Self { |
202 | Self::new([ |
203 | self.0[0].shuffle_lane_words2301(), |
204 | self.0[1].shuffle_lane_words2301(), |
205 | ]) |
206 | } |
207 | #[inline (always)] |
208 | fn shuffle_lane_words1230(self) -> Self { |
209 | Self::new([ |
210 | self.0[0].shuffle_lane_words1230(), |
211 | self.0[1].shuffle_lane_words1230(), |
212 | ]) |
213 | } |
214 | #[inline (always)] |
215 | fn shuffle_lane_words3012(self) -> Self { |
216 | Self::new([ |
217 | self.0[0].shuffle_lane_words3012(), |
218 | self.0[1].shuffle_lane_words3012(), |
219 | ]) |
220 | } |
221 | } |
222 | |
223 | #[derive (Copy, Clone, Default)] |
224 | #[allow (non_camel_case_types)] |
225 | pub struct x4<W>(pub [W; 4]); |
226 | impl<W> x4<W> { |
227 | #[inline (always)] |
228 | pub fn new(xs: [W; 4]) -> Self { |
229 | x4(xs) |
230 | } |
231 | } |
232 | macro_rules! fwd_binop_x4 { |
233 | ($trait:ident, $fn:ident) => { |
234 | impl<W: $trait + Copy> $trait for x4<W> { |
235 | type Output = x4<W::Output>; |
236 | #[inline(always)] |
237 | fn $fn(self, rhs: Self) -> Self::Output { |
238 | x4([ |
239 | self.0[0].$fn(rhs.0[0]), |
240 | self.0[1].$fn(rhs.0[1]), |
241 | self.0[2].$fn(rhs.0[2]), |
242 | self.0[3].$fn(rhs.0[3]), |
243 | ]) |
244 | } |
245 | } |
246 | }; |
247 | } |
248 | macro_rules! fwd_binop_assign_x4 { |
249 | ($trait:ident, $fn_assign:ident) => { |
250 | impl<W: $trait + Copy> $trait for x4<W> { |
251 | #[inline(always)] |
252 | fn $fn_assign(&mut self, rhs: Self) { |
253 | self.0[0].$fn_assign(rhs.0[0]); |
254 | self.0[1].$fn_assign(rhs.0[1]); |
255 | self.0[2].$fn_assign(rhs.0[2]); |
256 | self.0[3].$fn_assign(rhs.0[3]); |
257 | } |
258 | } |
259 | }; |
260 | } |
261 | macro_rules! fwd_unop_x4 { |
262 | ($fn:ident) => { |
263 | #[inline(always)] |
264 | fn $fn(self) -> Self { |
265 | x4([ |
266 | self.0[0].$fn(), |
267 | self.0[1].$fn(), |
268 | self.0[2].$fn(), |
269 | self.0[3].$fn(), |
270 | ]) |
271 | } |
272 | }; |
273 | } |
274 | impl<W> RotateEachWord32 for x4<W> |
275 | where |
276 | W: Copy + RotateEachWord32, |
277 | { |
278 | fwd_unop_x4!(rotate_each_word_right7); |
279 | fwd_unop_x4!(rotate_each_word_right8); |
280 | fwd_unop_x4!(rotate_each_word_right11); |
281 | fwd_unop_x4!(rotate_each_word_right12); |
282 | fwd_unop_x4!(rotate_each_word_right16); |
283 | fwd_unop_x4!(rotate_each_word_right20); |
284 | fwd_unop_x4!(rotate_each_word_right24); |
285 | fwd_unop_x4!(rotate_each_word_right25); |
286 | } |
287 | impl<W> RotateEachWord64 for x4<W> |
288 | where |
289 | W: Copy + RotateEachWord64, |
290 | { |
291 | fwd_unop_x4!(rotate_each_word_right32); |
292 | } |
293 | impl<W> RotateEachWord128 for x4<W> where W: RotateEachWord128 {} |
294 | impl<W> BitOps0 for x4<W> where W: BitOps0 {} |
295 | impl<W> BitOps32 for x4<W> where W: BitOps32 + BitOps0 {} |
296 | impl<W> BitOps64 for x4<W> where W: BitOps64 + BitOps0 {} |
297 | impl<W> BitOps128 for x4<W> where W: BitOps128 + BitOps0 {} |
298 | fwd_binop_x4!(BitAnd, bitand); |
299 | fwd_binop_x4!(BitOr, bitor); |
300 | fwd_binop_x4!(BitXor, bitxor); |
301 | fwd_binop_x4!(AndNot, andnot); |
302 | fwd_binop_assign_x4!(BitAndAssign, bitand_assign); |
303 | fwd_binop_assign_x4!(BitOrAssign, bitor_assign); |
304 | fwd_binop_assign_x4!(BitXorAssign, bitxor_assign); |
305 | impl<W> ArithOps for x4<W> where W: ArithOps {} |
306 | fwd_binop_x4!(Add, add); |
307 | fwd_binop_assign_x4!(AddAssign, add_assign); |
308 | impl<W: Not + Copy> Not for x4<W> { |
309 | type Output = x4<W::Output>; |
310 | #[inline (always)] |
311 | fn not(self) -> Self::Output { |
312 | x4([ |
313 | self.0[0].not(), |
314 | self.0[1].not(), |
315 | self.0[2].not(), |
316 | self.0[3].not(), |
317 | ]) |
318 | } |
319 | } |
320 | impl<W> UnsafeFrom<[W; 4]> for x4<W> { |
321 | #[inline (always)] |
322 | unsafe fn unsafe_from(xs: [W; 4]) -> Self { |
323 | x4(xs) |
324 | } |
325 | } |
326 | impl<W: Copy> Vec4<W> for x4<W> { |
327 | #[inline (always)] |
328 | fn extract(self, i: u32) -> W { |
329 | self.0[i as usize] |
330 | } |
331 | #[inline (always)] |
332 | fn insert(mut self, w: W, i: u32) -> Self { |
333 | self.0[i as usize] = w; |
334 | self |
335 | } |
336 | } |
337 | impl<W: Copy> Vec4Ext<W> for x4<W> { |
338 | #[inline (always)] |
339 | fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) |
340 | where |
341 | Self: Sized, |
342 | { |
343 | ( |
344 | x4([a.0[0], b.0[0], c.0[0], d.0[0]]), |
345 | x4([a.0[1], b.0[1], c.0[1], d.0[1]]), |
346 | x4([a.0[2], b.0[2], c.0[2], d.0[2]]), |
347 | x4([a.0[3], b.0[3], c.0[3], d.0[3]]), |
348 | ) |
349 | } |
350 | } |
351 | impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> { |
352 | #[inline (always)] |
353 | unsafe fn unpack(p: vec512_storage) -> Self { |
354 | let p: [vec128_storage; 4] = p.split128(); |
355 | x4([ |
356 | W::unpack(p[0]), |
357 | W::unpack(p[1]), |
358 | W::unpack(p[2]), |
359 | W::unpack(p[3]), |
360 | ]) |
361 | } |
362 | } |
363 | impl<W> From<x4<W>> for vec512_storage |
364 | where |
365 | W: Copy, |
366 | vec128_storage: From<W>, |
367 | { |
368 | #[inline (always)] |
369 | fn from(x: x4<W>) -> Self { |
370 | vec512_storage::new128([x.0[0].into(), x.0[1].into(), x.0[2].into(), x.0[3].into()]) |
371 | } |
372 | } |
373 | impl<W> Swap64 for x4<W> |
374 | where |
375 | W: Swap64 + Copy, |
376 | { |
377 | fwd_unop_x4!(swap1); |
378 | fwd_unop_x4!(swap2); |
379 | fwd_unop_x4!(swap4); |
380 | fwd_unop_x4!(swap8); |
381 | fwd_unop_x4!(swap16); |
382 | fwd_unop_x4!(swap32); |
383 | fwd_unop_x4!(swap64); |
384 | } |
385 | impl<W: Copy> MultiLane<[W; 4]> for x4<W> { |
386 | #[inline (always)] |
387 | fn to_lanes(self) -> [W; 4] { |
388 | self.0 |
389 | } |
390 | #[inline (always)] |
391 | fn from_lanes(lanes: [W; 4]) -> Self { |
392 | x4(lanes) |
393 | } |
394 | } |
395 | impl<W: BSwap + Copy> BSwap for x4<W> { |
396 | #[inline (always)] |
397 | fn bswap(self) -> Self { |
398 | x4([ |
399 | self.0[0].bswap(), |
400 | self.0[1].bswap(), |
401 | self.0[2].bswap(), |
402 | self.0[3].bswap(), |
403 | ]) |
404 | } |
405 | } |
406 | impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> { |
407 | #[inline (always)] |
408 | unsafe fn unsafe_read_le(input: &[u8]) -> Self { |
409 | let n = input.len() / 4; |
410 | x4([ |
411 | W::unsafe_read_le(&input[..n]), |
412 | W::unsafe_read_le(&input[n..n * 2]), |
413 | W::unsafe_read_le(&input[n * 2..n * 3]), |
414 | W::unsafe_read_le(&input[n * 3..]), |
415 | ]) |
416 | } |
417 | #[inline (always)] |
418 | unsafe fn unsafe_read_be(input: &[u8]) -> Self { |
419 | let n = input.len() / 4; |
420 | x4([ |
421 | W::unsafe_read_be(&input[..n]), |
422 | W::unsafe_read_be(&input[n..n * 2]), |
423 | W::unsafe_read_be(&input[n * 2..n * 3]), |
424 | W::unsafe_read_be(&input[n * 3..]), |
425 | ]) |
426 | } |
427 | #[inline (always)] |
428 | fn write_le(self, out: &mut [u8]) { |
429 | let n = out.len() / 4; |
430 | self.0[0].write_le(&mut out[..n]); |
431 | self.0[1].write_le(&mut out[n..n * 2]); |
432 | self.0[2].write_le(&mut out[n * 2..n * 3]); |
433 | self.0[3].write_le(&mut out[n * 3..]); |
434 | } |
435 | #[inline (always)] |
436 | fn write_be(self, out: &mut [u8]) { |
437 | let n = out.len() / 4; |
438 | self.0[0].write_be(&mut out[..n]); |
439 | self.0[1].write_be(&mut out[n..n * 2]); |
440 | self.0[2].write_be(&mut out[n * 2..n * 3]); |
441 | self.0[3].write_be(&mut out[n * 3..]); |
442 | } |
443 | } |
444 | impl<W: Copy + LaneWords4> LaneWords4 for x4<W> { |
445 | #[inline (always)] |
446 | fn shuffle_lane_words2301(self) -> Self { |
447 | x4([ |
448 | self.0[0].shuffle_lane_words2301(), |
449 | self.0[1].shuffle_lane_words2301(), |
450 | self.0[2].shuffle_lane_words2301(), |
451 | self.0[3].shuffle_lane_words2301(), |
452 | ]) |
453 | } |
454 | #[inline (always)] |
455 | fn shuffle_lane_words1230(self) -> Self { |
456 | x4([ |
457 | self.0[0].shuffle_lane_words1230(), |
458 | self.0[1].shuffle_lane_words1230(), |
459 | self.0[2].shuffle_lane_words1230(), |
460 | self.0[3].shuffle_lane_words1230(), |
461 | ]) |
462 | } |
463 | #[inline (always)] |
464 | fn shuffle_lane_words3012(self) -> Self { |
465 | x4([ |
466 | self.0[0].shuffle_lane_words3012(), |
467 | self.0[1].shuffle_lane_words3012(), |
468 | self.0[2].shuffle_lane_words3012(), |
469 | self.0[3].shuffle_lane_words3012(), |
470 | ]) |
471 | } |
472 | } |
473 | |