1 | #![allow (non_camel_case_types)] |
2 | |
3 | use crate::soft::{x2, x4}; |
4 | use crate::types::*; |
5 | use core::ops::*; |
6 | |
7 | #[repr (C)] |
8 | #[derive (Clone, Copy)] |
9 | pub union vec128_storage { |
10 | d: [u32; 4], |
11 | q: [u64; 2], |
12 | } |
13 | impl From<[u32; 4]> for vec128_storage { |
14 | #[inline (always)] |
15 | fn from(d: [u32; 4]) -> Self { |
16 | Self { d } |
17 | } |
18 | } |
19 | impl From<vec128_storage> for [u32; 4] { |
20 | #[inline (always)] |
21 | fn from(d: vec128_storage) -> Self { |
22 | unsafe { d.d } |
23 | } |
24 | } |
25 | impl From<[u64; 2]> for vec128_storage { |
26 | #[inline (always)] |
27 | fn from(q: [u64; 2]) -> Self { |
28 | Self { q } |
29 | } |
30 | } |
31 | impl From<vec128_storage> for [u64; 2] { |
32 | #[inline (always)] |
33 | fn from(q: vec128_storage) -> Self { |
34 | unsafe { q.q } |
35 | } |
36 | } |
37 | impl Default for vec128_storage { |
38 | #[inline (always)] |
39 | fn default() -> Self { |
40 | Self { q: [0, 0] } |
41 | } |
42 | } |
43 | impl Eq for vec128_storage {} |
44 | impl PartialEq<vec128_storage> for vec128_storage { |
45 | #[inline (always)] |
46 | fn eq(&self, rhs: &Self) -> bool { |
47 | unsafe { self.q == rhs.q } |
48 | } |
49 | } |
50 | #[derive (Clone, Copy, PartialEq, Eq, Default)] |
51 | pub struct vec256_storage { |
52 | v128: [vec128_storage; 2], |
53 | } |
54 | impl vec256_storage { |
55 | #[inline (always)] |
56 | pub fn new128(v128: [vec128_storage; 2]) -> Self { |
57 | Self { v128 } |
58 | } |
59 | #[inline (always)] |
60 | pub fn split128(self) -> [vec128_storage; 2] { |
61 | self.v128 |
62 | } |
63 | } |
64 | impl From<vec256_storage> for [u64; 4] { |
65 | #[inline (always)] |
66 | fn from(q: vec256_storage) -> Self { |
67 | let [a: u64, b: u64]: [u64; 2] = q.v128[0].into(); |
68 | let [c: u64, d: u64]: [u64; 2] = q.v128[1].into(); |
69 | [a, b, c, d] |
70 | } |
71 | } |
72 | impl From<[u64; 4]> for vec256_storage { |
73 | #[inline (always)] |
74 | fn from([a: u64, b: u64, c: u64, d: u64]: [u64; 4]) -> Self { |
75 | Self { |
76 | v128: [[a, b].into(), [c, d].into()], |
77 | } |
78 | } |
79 | } |
80 | #[derive (Clone, Copy, PartialEq, Eq, Default)] |
81 | pub struct vec512_storage { |
82 | v128: [vec128_storage; 4], |
83 | } |
84 | impl vec512_storage { |
85 | #[inline (always)] |
86 | pub fn new128(v128: [vec128_storage; 4]) -> Self { |
87 | Self { v128 } |
88 | } |
89 | #[inline (always)] |
90 | pub fn split128(self) -> [vec128_storage; 4] { |
91 | self.v128 |
92 | } |
93 | } |
94 | |
95 | #[inline (always)] |
96 | fn dmap<T, F>(t: T, f: F) -> T |
97 | where |
98 | T: Store<vec128_storage> + Into<vec128_storage>, |
99 | F: Fn(u32) -> u32, |
100 | { |
101 | let t: vec128_storage = t.into(); |
102 | let d: [u32; 4] = unsafe { t.d }; |
103 | let d: vec128_storage = vec128_storage { |
104 | d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])], |
105 | }; |
106 | unsafe { T::unpack(d) } |
107 | } |
108 | |
109 | fn dmap2<T, F>(a: T, b: T, f: F) -> T |
110 | where |
111 | T: Store<vec128_storage> + Into<vec128_storage>, |
112 | F: Fn(u32, u32) -> u32, |
113 | { |
114 | let a: vec128_storage = a.into(); |
115 | let b: vec128_storage = b.into(); |
116 | let ao: [u32; 4] = unsafe { a.d }; |
117 | let bo: [u32; 4] = unsafe { b.d }; |
118 | let d: vec128_storage = vec128_storage { |
119 | d: [ |
120 | f(ao[0], bo[0]), |
121 | f(ao[1], bo[1]), |
122 | f(ao[2], bo[2]), |
123 | f(ao[3], bo[3]), |
124 | ], |
125 | }; |
126 | unsafe { T::unpack(d) } |
127 | } |
128 | |
129 | #[inline (always)] |
130 | fn qmap<T, F>(t: T, f: F) -> T |
131 | where |
132 | T: Store<vec128_storage> + Into<vec128_storage>, |
133 | F: Fn(u64) -> u64, |
134 | { |
135 | let t: vec128_storage = t.into(); |
136 | let q: [u64; 2] = unsafe { t.q }; |
137 | let q: vec128_storage = vec128_storage { |
138 | q: [f(q[0]), f(q[1])], |
139 | }; |
140 | unsafe { T::unpack(q) } |
141 | } |
142 | |
143 | #[inline (always)] |
144 | fn qmap2<T, F>(a: T, b: T, f: F) -> T |
145 | where |
146 | T: Store<vec128_storage> + Into<vec128_storage>, |
147 | F: Fn(u64, u64) -> u64, |
148 | { |
149 | let a: vec128_storage = a.into(); |
150 | let b: vec128_storage = b.into(); |
151 | let ao: [u64; 2] = unsafe { a.q }; |
152 | let bo: [u64; 2] = unsafe { b.q }; |
153 | let q: vec128_storage = vec128_storage { |
154 | q: [f(ao[0], bo[0]), f(ao[1], bo[1])], |
155 | }; |
156 | unsafe { T::unpack(q) } |
157 | } |
158 | |
159 | #[inline (always)] |
160 | fn o_of_q(q: [u64; 2]) -> u128 { |
161 | u128::from(q[0]) | (u128::from(q[1]) << 64) |
162 | } |
163 | |
164 | #[inline (always)] |
165 | fn q_of_o(o: u128) -> [u64; 2] { |
166 | [o as u64, (o >> 64) as u64] |
167 | } |
168 | |
169 | #[inline (always)] |
170 | fn omap<T, F>(a: T, f: F) -> T |
171 | where |
172 | T: Store<vec128_storage> + Into<vec128_storage>, |
173 | F: Fn(u128) -> u128, |
174 | { |
175 | let a: vec128_storage = a.into(); |
176 | let ao: u128 = o_of_q(unsafe { a.q }); |
177 | let o: vec128_storage = vec128_storage { q: q_of_o(f(ao)) }; |
178 | unsafe { T::unpack(o) } |
179 | } |
180 | |
181 | #[inline (always)] |
182 | fn omap2<T, F>(a: T, b: T, f: F) -> T |
183 | where |
184 | T: Store<vec128_storage> + Into<vec128_storage>, |
185 | F: Fn(u128, u128) -> u128, |
186 | { |
187 | let a: vec128_storage = a.into(); |
188 | let b: vec128_storage = b.into(); |
189 | let ao: u128 = o_of_q(unsafe { a.q }); |
190 | let bo: u128 = o_of_q(unsafe { b.q }); |
191 | let o: vec128_storage = vec128_storage { |
192 | q: q_of_o(f(ao, bo)), |
193 | }; |
194 | unsafe { T::unpack(o) } |
195 | } |
196 | |
197 | impl RotateEachWord128 for u128x1_generic {} |
198 | impl BitOps128 for u128x1_generic {} |
199 | impl BitOps64 for u128x1_generic {} |
200 | impl BitOps64 for u64x2_generic {} |
201 | impl BitOps32 for u128x1_generic {} |
202 | impl BitOps32 for u64x2_generic {} |
203 | impl BitOps32 for u32x4_generic {} |
204 | impl BitOps0 for u128x1_generic {} |
205 | impl BitOps0 for u64x2_generic {} |
206 | impl BitOps0 for u32x4_generic {} |
207 | |
208 | macro_rules! impl_bitops { |
209 | ($vec:ident) => { |
210 | impl Not for $vec { |
211 | type Output = Self; |
212 | #[inline(always)] |
213 | fn not(self) -> Self::Output { |
214 | omap(self, |x| !x) |
215 | } |
216 | } |
217 | impl BitAnd for $vec { |
218 | type Output = Self; |
219 | #[inline(always)] |
220 | fn bitand(self, rhs: Self) -> Self::Output { |
221 | omap2(self, rhs, |x, y| x & y) |
222 | } |
223 | } |
224 | impl BitOr for $vec { |
225 | type Output = Self; |
226 | #[inline(always)] |
227 | fn bitor(self, rhs: Self) -> Self::Output { |
228 | omap2(self, rhs, |x, y| x | y) |
229 | } |
230 | } |
231 | impl BitXor for $vec { |
232 | type Output = Self; |
233 | #[inline(always)] |
234 | fn bitxor(self, rhs: Self) -> Self::Output { |
235 | omap2(self, rhs, |x, y| x ^ y) |
236 | } |
237 | } |
238 | impl AndNot for $vec { |
239 | type Output = Self; |
240 | #[inline(always)] |
241 | fn andnot(self, rhs: Self) -> Self::Output { |
242 | omap2(self, rhs, |x, y| !x & y) |
243 | } |
244 | } |
245 | impl BitAndAssign for $vec { |
246 | #[inline(always)] |
247 | fn bitand_assign(&mut self, rhs: Self) { |
248 | *self = *self & rhs |
249 | } |
250 | } |
251 | impl BitOrAssign for $vec { |
252 | #[inline(always)] |
253 | fn bitor_assign(&mut self, rhs: Self) { |
254 | *self = *self | rhs |
255 | } |
256 | } |
257 | impl BitXorAssign for $vec { |
258 | #[inline(always)] |
259 | fn bitxor_assign(&mut self, rhs: Self) { |
260 | *self = *self ^ rhs |
261 | } |
262 | } |
263 | |
264 | impl Swap64 for $vec { |
265 | #[inline(always)] |
266 | fn swap1(self) -> Self { |
267 | qmap(self, |x| { |
268 | ((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1) |
269 | }) |
270 | } |
271 | #[inline(always)] |
272 | fn swap2(self) -> Self { |
273 | qmap(self, |x| { |
274 | ((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2) |
275 | }) |
276 | } |
277 | #[inline(always)] |
278 | fn swap4(self) -> Self { |
279 | qmap(self, |x| { |
280 | ((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4) |
281 | }) |
282 | } |
283 | #[inline(always)] |
284 | fn swap8(self) -> Self { |
285 | qmap(self, |x| { |
286 | ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8) |
287 | }) |
288 | } |
289 | #[inline(always)] |
290 | fn swap16(self) -> Self { |
291 | dmap(self, |x| x.rotate_left(16)) |
292 | } |
293 | #[inline(always)] |
294 | fn swap32(self) -> Self { |
295 | qmap(self, |x| x.rotate_left(32)) |
296 | } |
297 | #[inline(always)] |
298 | fn swap64(self) -> Self { |
299 | omap(self, |x| (x << 64) | (x >> 64)) |
300 | } |
301 | } |
302 | }; |
303 | } |
304 | impl_bitops!(u32x4_generic); |
305 | impl_bitops!(u64x2_generic); |
306 | impl_bitops!(u128x1_generic); |
307 | |
308 | impl RotateEachWord32 for u32x4_generic { |
309 | #[inline (always)] |
310 | fn rotate_each_word_right7(self) -> Self { |
311 | dmap(self, |x| x.rotate_right(7)) |
312 | } |
313 | #[inline (always)] |
314 | fn rotate_each_word_right8(self) -> Self { |
315 | dmap(self, |x| x.rotate_right(8)) |
316 | } |
317 | #[inline (always)] |
318 | fn rotate_each_word_right11(self) -> Self { |
319 | dmap(self, |x| x.rotate_right(11)) |
320 | } |
321 | #[inline (always)] |
322 | fn rotate_each_word_right12(self) -> Self { |
323 | dmap(self, |x| x.rotate_right(12)) |
324 | } |
325 | #[inline (always)] |
326 | fn rotate_each_word_right16(self) -> Self { |
327 | dmap(self, |x| x.rotate_right(16)) |
328 | } |
329 | #[inline (always)] |
330 | fn rotate_each_word_right20(self) -> Self { |
331 | dmap(self, |x| x.rotate_right(20)) |
332 | } |
333 | #[inline (always)] |
334 | fn rotate_each_word_right24(self) -> Self { |
335 | dmap(self, |x| x.rotate_right(24)) |
336 | } |
337 | #[inline (always)] |
338 | fn rotate_each_word_right25(self) -> Self { |
339 | dmap(self, |x| x.rotate_right(25)) |
340 | } |
341 | } |
342 | |
343 | impl RotateEachWord32 for u64x2_generic { |
344 | #[inline (always)] |
345 | fn rotate_each_word_right7(self) -> Self { |
346 | qmap(self, |x| x.rotate_right(7)) |
347 | } |
348 | #[inline (always)] |
349 | fn rotate_each_word_right8(self) -> Self { |
350 | qmap(self, |x| x.rotate_right(8)) |
351 | } |
352 | #[inline (always)] |
353 | fn rotate_each_word_right11(self) -> Self { |
354 | qmap(self, |x| x.rotate_right(11)) |
355 | } |
356 | #[inline (always)] |
357 | fn rotate_each_word_right12(self) -> Self { |
358 | qmap(self, |x| x.rotate_right(12)) |
359 | } |
360 | #[inline (always)] |
361 | fn rotate_each_word_right16(self) -> Self { |
362 | qmap(self, |x| x.rotate_right(16)) |
363 | } |
364 | #[inline (always)] |
365 | fn rotate_each_word_right20(self) -> Self { |
366 | qmap(self, |x| x.rotate_right(20)) |
367 | } |
368 | #[inline (always)] |
369 | fn rotate_each_word_right24(self) -> Self { |
370 | qmap(self, |x| x.rotate_right(24)) |
371 | } |
372 | #[inline (always)] |
373 | fn rotate_each_word_right25(self) -> Self { |
374 | qmap(self, |x| x.rotate_right(25)) |
375 | } |
376 | } |
377 | impl RotateEachWord64 for u64x2_generic { |
378 | #[inline (always)] |
379 | fn rotate_each_word_right32(self) -> Self { |
380 | qmap(self, |x: u64| x.rotate_right(32)) |
381 | } |
382 | } |
383 | |
384 | // workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web) |
385 | #[inline (always)] |
386 | fn rotate_u128_right(x: u128, i: u32) -> u128 { |
387 | (x >> i) | (x << (128 - i)) |
388 | } |
389 | #[test ] |
390 | fn test_rotate_u128() { |
391 | const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f; |
392 | assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17)); |
393 | } |
394 | |
395 | impl RotateEachWord32 for u128x1_generic { |
396 | #[inline (always)] |
397 | fn rotate_each_word_right7(self) -> Self { |
398 | Self([rotate_u128_right(self.0[0], 7)]) |
399 | } |
400 | #[inline (always)] |
401 | fn rotate_each_word_right8(self) -> Self { |
402 | Self([rotate_u128_right(self.0[0], 8)]) |
403 | } |
404 | #[inline (always)] |
405 | fn rotate_each_word_right11(self) -> Self { |
406 | Self([rotate_u128_right(self.0[0], 11)]) |
407 | } |
408 | #[inline (always)] |
409 | fn rotate_each_word_right12(self) -> Self { |
410 | Self([rotate_u128_right(self.0[0], 12)]) |
411 | } |
412 | #[inline (always)] |
413 | fn rotate_each_word_right16(self) -> Self { |
414 | Self([rotate_u128_right(self.0[0], 16)]) |
415 | } |
416 | #[inline (always)] |
417 | fn rotate_each_word_right20(self) -> Self { |
418 | Self([rotate_u128_right(self.0[0], 20)]) |
419 | } |
420 | #[inline (always)] |
421 | fn rotate_each_word_right24(self) -> Self { |
422 | Self([rotate_u128_right(self.0[0], 24)]) |
423 | } |
424 | #[inline (always)] |
425 | fn rotate_each_word_right25(self) -> Self { |
426 | Self([rotate_u128_right(self.0[0], 25)]) |
427 | } |
428 | } |
429 | impl RotateEachWord64 for u128x1_generic { |
430 | #[inline (always)] |
431 | fn rotate_each_word_right32(self) -> Self { |
432 | Self([rotate_u128_right(self.0[0], i:32)]) |
433 | } |
434 | } |
435 | |
436 | #[derive (Copy, Clone)] |
437 | pub struct GenericMachine; |
438 | impl Machine for GenericMachine { |
439 | type u32x4 = u32x4_generic; |
440 | type u64x2 = u64x2_generic; |
441 | type u128x1 = u128x1_generic; |
442 | type u32x4x2 = u32x4x2_generic; |
443 | type u64x2x2 = u64x2x2_generic; |
444 | type u64x4 = u64x4_generic; |
445 | type u128x2 = u128x2_generic; |
446 | type u32x4x4 = u32x4x4_generic; |
447 | type u64x2x4 = u64x2x4_generic; |
448 | type u128x4 = u128x4_generic; |
449 | #[inline (always)] |
450 | unsafe fn instance() -> Self { |
451 | Self |
452 | } |
453 | } |
454 | |
455 | #[derive (Copy, Clone, Debug, PartialEq)] |
456 | pub struct u32x4_generic([u32; 4]); |
457 | #[derive (Copy, Clone, Debug, PartialEq)] |
458 | pub struct u64x2_generic([u64; 2]); |
459 | #[derive (Copy, Clone, Debug, PartialEq)] |
460 | pub struct u128x1_generic([u128; 1]); |
461 | |
462 | impl From<u32x4_generic> for vec128_storage { |
463 | #[inline (always)] |
464 | fn from(d: u32x4_generic) -> Self { |
465 | Self { d: d.0 } |
466 | } |
467 | } |
468 | impl From<u64x2_generic> for vec128_storage { |
469 | #[inline (always)] |
470 | fn from(q: u64x2_generic) -> Self { |
471 | Self { q: q.0 } |
472 | } |
473 | } |
474 | impl From<u128x1_generic> for vec128_storage { |
475 | #[inline (always)] |
476 | fn from(o: u128x1_generic) -> Self { |
477 | Self { q: q_of_o(o.0[0]) } |
478 | } |
479 | } |
480 | |
481 | impl Store<vec128_storage> for u32x4_generic { |
482 | #[inline (always)] |
483 | unsafe fn unpack(s: vec128_storage) -> Self { |
484 | Self(s.d) |
485 | } |
486 | } |
487 | impl Store<vec128_storage> for u64x2_generic { |
488 | #[inline (always)] |
489 | unsafe fn unpack(s: vec128_storage) -> Self { |
490 | Self(s.q) |
491 | } |
492 | } |
493 | impl Store<vec128_storage> for u128x1_generic { |
494 | #[inline (always)] |
495 | unsafe fn unpack(s: vec128_storage) -> Self { |
496 | Self([o_of_q(s.q); 1]) |
497 | } |
498 | } |
499 | |
500 | impl ArithOps for u32x4_generic {} |
501 | impl ArithOps for u64x2_generic {} |
502 | impl ArithOps for u128x1_generic {} |
503 | |
504 | impl Add for u32x4_generic { |
505 | type Output = Self; |
506 | #[inline (always)] |
507 | fn add(self, rhs: Self) -> Self::Output { |
508 | dmap2(self, b:rhs, |x: u32, y: u32| x.wrapping_add(y)) |
509 | } |
510 | } |
511 | impl Add for u64x2_generic { |
512 | type Output = Self; |
513 | #[inline (always)] |
514 | fn add(self, rhs: Self) -> Self::Output { |
515 | qmap2(self, b:rhs, |x: u64, y: u64| x.wrapping_add(y)) |
516 | } |
517 | } |
518 | impl Add for u128x1_generic { |
519 | type Output = Self; |
520 | #[inline (always)] |
521 | fn add(self, rhs: Self) -> Self::Output { |
522 | omap2(self, b:rhs, |x: u128, y: u128| x.wrapping_add(y)) |
523 | } |
524 | } |
525 | impl AddAssign for u32x4_generic { |
526 | #[inline (always)] |
527 | fn add_assign(&mut self, rhs: Self) { |
528 | *self = *self + rhs |
529 | } |
530 | } |
531 | impl AddAssign for u64x2_generic { |
532 | #[inline (always)] |
533 | fn add_assign(&mut self, rhs: Self) { |
534 | *self = *self + rhs |
535 | } |
536 | } |
537 | impl AddAssign for u128x1_generic { |
538 | #[inline (always)] |
539 | fn add_assign(&mut self, rhs: Self) { |
540 | *self = *self + rhs |
541 | } |
542 | } |
543 | impl BSwap for u32x4_generic { |
544 | #[inline (always)] |
545 | fn bswap(self) -> Self { |
546 | dmap(self, |x: u32| x.swap_bytes()) |
547 | } |
548 | } |
549 | impl BSwap for u64x2_generic { |
550 | #[inline (always)] |
551 | fn bswap(self) -> Self { |
552 | qmap(self, |x: u64| x.swap_bytes()) |
553 | } |
554 | } |
555 | impl BSwap for u128x1_generic { |
556 | #[inline (always)] |
557 | fn bswap(self) -> Self { |
558 | omap(self, |x: u128| x.swap_bytes()) |
559 | } |
560 | } |
561 | impl StoreBytes for u32x4_generic { |
562 | #[inline (always)] |
563 | unsafe fn unsafe_read_le(input: &[u8]) -> Self { |
564 | assert_eq!(input.len(), 16); |
565 | let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16])); |
566 | dmap(x, |x| x.to_le()) |
567 | } |
568 | #[inline (always)] |
569 | unsafe fn unsafe_read_be(input: &[u8]) -> Self { |
570 | assert_eq!(input.len(), 16); |
571 | let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16])); |
572 | dmap(x, |x| x.to_be()) |
573 | } |
574 | #[inline (always)] |
575 | fn write_le(self, out: &mut [u8]) { |
576 | assert_eq!(out.len(), 16); |
577 | let x = dmap(self, |x| x.to_le()); |
578 | unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) } |
579 | } |
580 | #[inline (always)] |
581 | fn write_be(self, out: &mut [u8]) { |
582 | assert_eq!(out.len(), 16); |
583 | let x = dmap(self, |x| x.to_be()); |
584 | unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) } |
585 | } |
586 | } |
587 | impl StoreBytes for u64x2_generic { |
588 | #[inline (always)] |
589 | unsafe fn unsafe_read_le(input: &[u8]) -> Self { |
590 | assert_eq!(input.len(), 16); |
591 | let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16])); |
592 | qmap(x, |x| x.to_le()) |
593 | } |
594 | #[inline (always)] |
595 | unsafe fn unsafe_read_be(input: &[u8]) -> Self { |
596 | assert_eq!(input.len(), 16); |
597 | let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16])); |
598 | qmap(x, |x| x.to_be()) |
599 | } |
600 | #[inline (always)] |
601 | fn write_le(self, out: &mut [u8]) { |
602 | assert_eq!(out.len(), 16); |
603 | let x = qmap(self, |x| x.to_le()); |
604 | unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) } |
605 | } |
606 | #[inline (always)] |
607 | fn write_be(self, out: &mut [u8]) { |
608 | assert_eq!(out.len(), 16); |
609 | let x = qmap(self, |x| x.to_be()); |
610 | unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) } |
611 | } |
612 | } |
613 | |
614 | #[derive (Copy, Clone)] |
615 | pub struct G0; |
616 | #[derive (Copy, Clone)] |
617 | pub struct G1; |
618 | pub type u32x4x2_generic = x2<u32x4_generic, G0>; |
619 | pub type u64x2x2_generic = x2<u64x2_generic, G0>; |
620 | pub type u64x4_generic = x2<u64x2_generic, G1>; |
621 | pub type u128x2_generic = x2<u128x1_generic, G0>; |
622 | pub type u32x4x4_generic = x4<u32x4_generic>; |
623 | pub type u64x2x4_generic = x4<u64x2_generic>; |
624 | pub type u128x4_generic = x4<u128x1_generic>; |
625 | |
626 | impl Vector<[u32; 16]> for u32x4x4_generic { |
627 | fn to_scalars(self) -> [u32; 16] { |
628 | let [a: u32x4_generic, b: u32x4_generic, c: u32x4_generic, d: u32x4_generic] = self.0; |
629 | let a: [u32; 4] = a.0; |
630 | let b: [u32; 4] = b.0; |
631 | let c: [u32; 4] = c.0; |
632 | let d: [u32; 4] = d.0; |
633 | [ |
634 | a[0], a[1], a[2], a[3], // |
635 | b[0], b[1], b[2], b[3], // |
636 | c[0], c[1], c[2], c[3], // |
637 | d[0], d[1], d[2], d[3], // |
638 | ] |
639 | } |
640 | } |
641 | |
642 | impl MultiLane<[u32; 4]> for u32x4_generic { |
643 | #[inline (always)] |
644 | fn to_lanes(self) -> [u32; 4] { |
645 | self.0 |
646 | } |
647 | #[inline (always)] |
648 | fn from_lanes(xs: [u32; 4]) -> Self { |
649 | Self(xs) |
650 | } |
651 | } |
652 | impl MultiLane<[u64; 2]> for u64x2_generic { |
653 | #[inline (always)] |
654 | fn to_lanes(self) -> [u64; 2] { |
655 | self.0 |
656 | } |
657 | #[inline (always)] |
658 | fn from_lanes(xs: [u64; 2]) -> Self { |
659 | Self(xs) |
660 | } |
661 | } |
662 | impl MultiLane<[u64; 4]> for u64x4_generic { |
663 | #[inline (always)] |
664 | fn to_lanes(self) -> [u64; 4] { |
665 | let (a: [u64; 2], b: [u64; 2]) = (self.0[0].to_lanes(), self.0[1].to_lanes()); |
666 | [a[0], a[1], b[0], b[1]] |
667 | } |
668 | #[inline (always)] |
669 | fn from_lanes(xs: [u64; 4]) -> Self { |
670 | let (a: u64x2_generic, b: u64x2_generic) = ( |
671 | u64x2_generic::from_lanes([xs[0], xs[1]]), |
672 | u64x2_generic::from_lanes([xs[2], xs[3]]), |
673 | ); |
674 | x2::new([a, b]) |
675 | } |
676 | } |
677 | impl MultiLane<[u128; 1]> for u128x1_generic { |
678 | #[inline (always)] |
679 | fn to_lanes(self) -> [u128; 1] { |
680 | self.0 |
681 | } |
682 | #[inline (always)] |
683 | fn from_lanes(xs: [u128; 1]) -> Self { |
684 | Self(xs) |
685 | } |
686 | } |
687 | impl Vec4<u32> for u32x4_generic { |
688 | #[inline (always)] |
689 | fn extract(self, i: u32) -> u32 { |
690 | self.0[i as usize] |
691 | } |
692 | #[inline (always)] |
693 | fn insert(mut self, v: u32, i: u32) -> Self { |
694 | self.0[i as usize] = v; |
695 | self |
696 | } |
697 | } |
698 | impl Vec4<u64> for u64x4_generic { |
699 | #[inline (always)] |
700 | fn extract(self, i: u32) -> u64 { |
701 | let d: [u64; 4] = self.to_lanes(); |
702 | d[i as usize] |
703 | } |
704 | #[inline (always)] |
705 | fn insert(self, v: u64, i: u32) -> Self { |
706 | self.0[(i / 2) as usize].insert(w:v, i:i % 2); |
707 | self |
708 | } |
709 | } |
710 | impl Vec2<u64> for u64x2_generic { |
711 | #[inline (always)] |
712 | fn extract(self, i: u32) -> u64 { |
713 | self.0[i as usize] |
714 | } |
715 | #[inline (always)] |
716 | fn insert(mut self, v: u64, i: u32) -> Self { |
717 | self.0[i as usize] = v; |
718 | self |
719 | } |
720 | } |
721 | |
722 | impl Words4 for u32x4_generic { |
723 | #[inline (always)] |
724 | fn shuffle2301(self) -> Self { |
725 | self.swap64() |
726 | } |
727 | #[inline (always)] |
728 | fn shuffle1230(self) -> Self { |
729 | let x: [u32; 4] = self.0; |
730 | Self([x[3], x[0], x[1], x[2]]) |
731 | } |
732 | #[inline (always)] |
733 | fn shuffle3012(self) -> Self { |
734 | let x: [u32; 4] = self.0; |
735 | Self([x[1], x[2], x[3], x[0]]) |
736 | } |
737 | } |
738 | impl LaneWords4 for u32x4_generic { |
739 | #[inline (always)] |
740 | fn shuffle_lane_words2301(self) -> Self { |
741 | self.shuffle2301() |
742 | } |
743 | #[inline (always)] |
744 | fn shuffle_lane_words1230(self) -> Self { |
745 | self.shuffle1230() |
746 | } |
747 | #[inline (always)] |
748 | fn shuffle_lane_words3012(self) -> Self { |
749 | self.shuffle3012() |
750 | } |
751 | } |
752 | |
753 | impl Words4 for u64x4_generic { |
754 | #[inline (always)] |
755 | fn shuffle2301(self) -> Self { |
756 | x2::new([self.0[1], self.0[0]]) |
757 | } |
758 | #[inline (always)] |
759 | fn shuffle1230(self) -> Self { |
760 | unimplemented!() |
761 | } |
762 | #[inline (always)] |
763 | fn shuffle3012(self) -> Self { |
764 | unimplemented!() |
765 | } |
766 | } |
767 | |
768 | impl u32x4<GenericMachine> for u32x4_generic {} |
769 | impl u64x2<GenericMachine> for u64x2_generic {} |
770 | impl u128x1<GenericMachine> for u128x1_generic {} |
771 | impl u32x4x2<GenericMachine> for u32x4x2_generic {} |
772 | impl u64x2x2<GenericMachine> for u64x2x2_generic {} |
773 | impl u64x4<GenericMachine> for u64x4_generic {} |
774 | impl u128x2<GenericMachine> for u128x2_generic {} |
775 | impl u32x4x4<GenericMachine> for u32x4x4_generic {} |
776 | impl u64x2x4<GenericMachine> for u64x2x4_generic {} |
777 | impl u128x4<GenericMachine> for u128x4_generic {} |
778 | |
779 | #[macro_export ] |
780 | macro_rules! dispatch { |
781 | ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => { |
782 | #[inline(always)] |
783 | $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret { |
784 | let $mach = unsafe { $crate::generic::GenericMachine::instance() }; |
785 | #[inline(always)] |
786 | fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body |
787 | fn_impl($mach, $($arg),*) |
788 | } |
789 | }; |
790 | ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => { |
791 | dispatch!($mach, $MTy, { |
792 | $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body |
793 | }); |
794 | } |
795 | } |
796 | #[macro_export ] |
797 | macro_rules! dispatch_light128 { |
798 | ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => { |
799 | #[inline(always)] |
800 | $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret { |
801 | let $mach = unsafe { $crate::generic::GenericMachine::instance() }; |
802 | #[inline(always)] |
803 | fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body |
804 | fn_impl($mach, $($arg),*) |
805 | } |
806 | }; |
807 | ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => { |
808 | dispatch!($mach, $MTy, { |
809 | $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body |
810 | }); |
811 | } |
812 | } |
813 | #[macro_export ] |
814 | macro_rules! dispatch_light256 { |
815 | ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => { |
816 | #[inline(always)] |
817 | $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret { |
818 | let $mach = unsafe { $crate::generic::GenericMachine::instance() }; |
819 | #[inline(always)] |
820 | fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body |
821 | fn_impl($mach, $($arg),*) |
822 | } |
823 | }; |
824 | ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => { |
825 | dispatch!($mach, $MTy, { |
826 | $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body |
827 | }); |
828 | } |
829 | } |
830 | #[macro_export ] |
831 | macro_rules! dispatch_light512 { |
832 | ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => { |
833 | #[inline(always)] |
834 | $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret { |
835 | let $mach = unsafe { $crate::generic::GenericMachine::instance() }; |
836 | #[inline(always)] |
837 | fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body |
838 | fn_impl($mach, $($arg),*) |
839 | } |
840 | }; |
841 | ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => { |
842 | dispatch!($mach, $MTy, { |
843 | $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body |
844 | }); |
845 | } |
846 | } |
847 | |
848 | #[cfg (test)] |
849 | mod test { |
850 | use super::*; |
851 | |
852 | #[test ] |
853 | fn test_bswap32() { |
854 | let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100]; |
855 | let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203]; |
856 | |
857 | let m = unsafe { GenericMachine::instance() }; |
858 | |
859 | let x: <GenericMachine as Machine>::u32x4 = m.vec(xs); |
860 | let x = x.bswap(); |
861 | |
862 | let y = m.vec(ys); |
863 | assert_eq!(x, y); |
864 | } |
865 | } |
866 | |