soft.rs source code [crates/ppv_lite86/src/soft.rs]

1	//! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
2
3	use crate::types::*;
4	use crate::{vec128_storage, vec256_storage, vec512_storage};
5	use core::marker::PhantomData;
6	use core::ops::*;
7	use zerocopy::{AsBytes, FromBytes, FromZeroes};
8
9	#[derive(Copy, Clone, Default, FromBytes, AsBytes, FromZeroes)]
10	#[repr(transparent)]
11	#[allow(non_camel_case_types)]
12	pub struct x2<W, G>(pub [W; `2`], PhantomData<G>);
13	impl<W, G> x2<W, G> {
14	#[inline(always)]
15	pub fn new(xs: [W; `2`]) -> Self {
16	x2(xs, PhantomData)
17	}
18	}
19	macro_rules! fwd_binop_x2 {
20	($trait:ident, $fn:ident) => {
21	impl<W: $trait + Copy, G> $trait for x2<W, G> {
22	type Output = x2<W::Output, G>;
23	#[inline(always)]
24	fn $fn(self, rhs: Self) -> Self::Output {
25	x2::new([self.`0`[`0`].$fn(rhs.`0`[`0`]), self.`0`[`1`].$fn(rhs.`0`[`1`])])
26	}
27	}
28	};
29	}
30	macro_rules! fwd_binop_assign_x2 {
31	($trait:ident, $fn_assign:ident) => {
32	impl<W: $trait + Copy, G> $trait for x2<W, G> {
33	#[inline(always)]
34	fn $fn_assign(&mut self, rhs: Self) {
35	(self.`0`[`0`]).$fn_assign(rhs.`0`[`0`]);
36	(self.`0`[`1`]).$fn_assign(rhs.`0`[`1`]);
37	}
38	}
39	};
40	}
41	macro_rules! fwd_unop_x2 {
42	($fn:ident) => {
43	#[inline(always)]
44	fn $fn(self) -> Self {
45	x2::new([self.`0`[`0`].$fn(), self.`0`[`1`].$fn()])
46	}
47	};
48	}
49	impl<W, G> RotateEachWord32 for x2<W, G>
50	where
51	W: Copy + RotateEachWord32,
52	{
53	fwd_unop_x2!(rotate_each_word_right7);
54	fwd_unop_x2!(rotate_each_word_right8);
55	fwd_unop_x2!(rotate_each_word_right11);
56	fwd_unop_x2!(rotate_each_word_right12);
57	fwd_unop_x2!(rotate_each_word_right16);
58	fwd_unop_x2!(rotate_each_word_right20);
59	fwd_unop_x2!(rotate_each_word_right24);
60	fwd_unop_x2!(rotate_each_word_right25);
61	}
62	impl<W, G> RotateEachWord64 for x2<W, G>
63	where
64	W: Copy + RotateEachWord64,
65	{
66	fwd_unop_x2!(rotate_each_word_right32);
67	}
68	impl<W, G> RotateEachWord128 for x2<W, G> where W: RotateEachWord128 {}
69	impl<W, G> BitOps0 for x2<W, G>
70	where
71	W: BitOps0,
72	G: Copy,
73	{
74	}
75	impl<W, G> BitOps32 for x2<W, G>
76	where
77	W: BitOps32 + BitOps0,
78	G: Copy,
79	{
80	}
81	impl<W, G> BitOps64 for x2<W, G>
82	where
83	W: BitOps64 + BitOps0,
84	G: Copy,
85	{
86	}
87	impl<W, G> BitOps128 for x2<W, G>
88	where
89	W: BitOps128 + BitOps0,
90	G: Copy,
91	{
92	}
93	fwd_binop_x2!(BitAnd, bitand);
94	fwd_binop_x2!(BitOr, bitor);
95	fwd_binop_x2!(BitXor, bitxor);
96	fwd_binop_x2!(AndNot, andnot);
97	fwd_binop_assign_x2!(BitAndAssign, bitand_assign);
98	fwd_binop_assign_x2!(BitOrAssign, bitor_assign);
99	fwd_binop_assign_x2!(BitXorAssign, bitxor_assign);
100	impl<W, G> ArithOps for x2<W, G>
101	where
102	W: ArithOps,
103	G: Copy,
104	{
105	}
106	fwd_binop_x2!(Add, add);
107	fwd_binop_assign_x2!(AddAssign, add_assign);
108	impl<W: Not + Copy, G> Not for x2<W, G> {
109	type Output = x2<W::Output, G>;
110	#[inline(always)]
111	fn not(self) -> Self::Output {
112	x2::new([self.0[`0`].not(), self.0[`1`].not()])
113	}
114	}
115	impl<W, G> UnsafeFrom<[W; `2`]> for x2<W, G> {
116	#[inline(always)]
117	unsafe fn unsafe_from(xs: [W; `2`]) -> Self {
118	x2::new(xs)
119	}
120	}
121	impl<W: Copy, G> Vec2<W> for x2<W, G> {
122	#[inline(always)]
123	fn extract(self, i: u32) -> W {
124	self.0[i as usize]
125	}
126	#[inline(always)]
127	fn insert(mut self, w: W, i: u32) -> Self {
128	self.0[i as usize] = w;
129	self
130	}
131	}
132	impl<W: Copy + Store<vec128_storage>, G> Store<vec256_storage> for x2<W, G> {
133	#[inline(always)]
134	unsafe fn unpack(p: vec256_storage) -> Self {
135	let p: [vec128_storage; 2] = p.split128();
136	x2::new([W::unpack(p[`0`]), W::unpack(p[`1`])])
137	}
138	}
139	impl<W, G> From<x2<W, G>> for vec256_storage
140	where
141	W: Copy,
142	vec128_storage: From<W>,
143	{
144	#[inline(always)]
145	fn from(x: x2<W, G>) -> Self {
146	vec256_storage::new128([x.0[`0`].into(), x.0[`1`].into()])
147	}
148	}
149	impl<W, G> Swap64 for x2<W, G>
150	where
151	W: Swap64 + Copy,
152	{
153	fwd_unop_x2!(swap1);
154	fwd_unop_x2!(swap2);
155	fwd_unop_x2!(swap4);
156	fwd_unop_x2!(swap8);
157	fwd_unop_x2!(swap16);
158	fwd_unop_x2!(swap32);
159	fwd_unop_x2!(swap64);
160	}
161	impl<W: Copy, G> MultiLane<[W; `2`]> for x2<W, G> {
162	#[inline(always)]
163	fn to_lanes(self) -> [W; `2`] {
164	self.0
165	}
166	#[inline(always)]
167	fn from_lanes(lanes: [W; `2`]) -> Self {
168	x2::new(xs:lanes)
169	}
170	}
171	impl<W: BSwap + Copy, G> BSwap for x2<W, G> {
172	#[inline(always)]
173	fn bswap(self) -> Self {
174	x2::new([self.0[`0`].bswap(), self.0[`1`].bswap()])
175	}
176	}
177	impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> {
178	#[inline(always)]
179	unsafe fn unsafe_read_le(input: &[u8]) -> Self {
180	let input: (&[u8], &[u8]) = input.split_at(mid:input.len() / `2`);
181	x2::new([W::unsafe_read_le(input.0), W::unsafe_read_le(input.1)])
182	}
183	#[inline(always)]
184	unsafe fn unsafe_read_be(input: &[u8]) -> Self {
185	let input: (&[u8], &[u8]) = input.split_at(mid:input.len() / `2`);
186	x2::new([W::unsafe_read_be(input.0), W::unsafe_read_be(input.1)])
187	}
188	#[inline(always)]
189	fn write_le(self, out: &mut [u8]) {
190	let out: (&mut [u8], &mut [u8]) = out.split_at_mut(mid:out.len() / `2`);
191	self.0[`0`].write_le(out.0);
192	self.0[`1`].write_le(out.1);
193	}
194	#[inline(always)]
195	fn write_be(self, out: &mut [u8]) {
196	let out: (&mut [u8], &mut [u8]) = out.split_at_mut(mid:out.len() / `2`);
197	self.0[`0`].write_be(out.0);
198	self.0[`1`].write_be(out.1);
199	}
200	}
201	impl<W: Copy + LaneWords4, G: Copy> LaneWords4 for x2<W, G> {
202	#[inline(always)]
203	fn shuffle_lane_words2301(self) -> Self {
204	Self::new([
205	self.0[`0`].shuffle_lane_words2301(),
206	self.0[`1`].shuffle_lane_words2301(),
207	])
208	}
209	#[inline(always)]
210	fn shuffle_lane_words1230(self) -> Self {
211	Self::new([
212	self.0[`0`].shuffle_lane_words1230(),
213	self.0[`1`].shuffle_lane_words1230(),
214	])
215	}
216	#[inline(always)]
217	fn shuffle_lane_words3012(self) -> Self {
218	Self::new([
219	self.0[`0`].shuffle_lane_words3012(),
220	self.0[`1`].shuffle_lane_words3012(),
221	])
222	}
223	}
224
225	#[derive(Copy, Clone, Default, FromBytes, AsBytes, FromZeroes)]
226	#[repr(transparent)]
227	#[allow(non_camel_case_types)]
228	pub struct x4<W>(pub [W; `4`]);
229	impl<W> x4<W> {
230	#[inline(always)]
231	pub fn new(xs: [W; `4`]) -> Self {
232	x4(xs)
233	}
234	}
235	macro_rules! fwd_binop_x4 {
236	($trait:ident, $fn:ident) => {
237	impl<W: $trait + Copy> $trait for x4<W> {
238	type Output = x4<W::Output>;
239	#[inline(always)]
240	fn $fn(self, rhs: Self) -> Self::Output {
241	x4([
242	self.`0`[`0`].$fn(rhs.`0`[`0`]),
243	self.`0`[`1`].$fn(rhs.`0`[`1`]),
244	self.`0`[`2`].$fn(rhs.`0`[`2`]),
245	self.`0`[`3`].$fn(rhs.`0`[`3`]),
246	])
247	}
248	}
249	};
250	}
251	macro_rules! fwd_binop_assign_x4 {
252	($trait:ident, $fn_assign:ident) => {
253	impl<W: $trait + Copy> $trait for x4<W> {
254	#[inline(always)]
255	fn $fn_assign(&mut self, rhs: Self) {
256	self.`0`[`0`].$fn_assign(rhs.`0`[`0`]);
257	self.`0`[`1`].$fn_assign(rhs.`0`[`1`]);
258	self.`0`[`2`].$fn_assign(rhs.`0`[`2`]);
259	self.`0`[`3`].$fn_assign(rhs.`0`[`3`]);
260	}
261	}
262	};
263	}
264	macro_rules! fwd_unop_x4 {
265	($fn:ident) => {
266	#[inline(always)]
267	fn $fn(self) -> Self {
268	x4([
269	self.`0`[`0`].$fn(),
270	self.`0`[`1`].$fn(),
271	self.`0`[`2`].$fn(),
272	self.`0`[`3`].$fn(),
273	])
274	}
275	};
276	}
277	impl<W> RotateEachWord32 for x4<W>
278	where
279	W: Copy + RotateEachWord32,
280	{
281	fwd_unop_x4!(rotate_each_word_right7);
282	fwd_unop_x4!(rotate_each_word_right8);
283	fwd_unop_x4!(rotate_each_word_right11);
284	fwd_unop_x4!(rotate_each_word_right12);
285	fwd_unop_x4!(rotate_each_word_right16);
286	fwd_unop_x4!(rotate_each_word_right20);
287	fwd_unop_x4!(rotate_each_word_right24);
288	fwd_unop_x4!(rotate_each_word_right25);
289	}
290	impl<W> RotateEachWord64 for x4<W>
291	where
292	W: Copy + RotateEachWord64,
293	{
294	fwd_unop_x4!(rotate_each_word_right32);
295	}
296	impl<W> RotateEachWord128 for x4<W> where W: RotateEachWord128 {}
297	impl<W> BitOps0 for x4<W> where W: BitOps0 {}
298	impl<W> BitOps32 for x4<W> where W: BitOps32 + BitOps0 {}
299	impl<W> BitOps64 for x4<W> where W: BitOps64 + BitOps0 {}
300	impl<W> BitOps128 for x4<W> where W: BitOps128 + BitOps0 {}
301	fwd_binop_x4!(BitAnd, bitand);
302	fwd_binop_x4!(BitOr, bitor);
303	fwd_binop_x4!(BitXor, bitxor);
304	fwd_binop_x4!(AndNot, andnot);
305	fwd_binop_assign_x4!(BitAndAssign, bitand_assign);
306	fwd_binop_assign_x4!(BitOrAssign, bitor_assign);
307	fwd_binop_assign_x4!(BitXorAssign, bitxor_assign);
308	impl<W> ArithOps for x4<W> where W: ArithOps {}
309	fwd_binop_x4!(Add, add);
310	fwd_binop_assign_x4!(AddAssign, add_assign);
311	impl<W: Not + Copy> Not for x4<W> {
312	type Output = x4<W::Output>;
313	#[inline(always)]
314	fn not(self) -> Self::Output {
315	x4([
316	self.0[`0`].not(),
317	self.0[`1`].not(),
318	self.0[`2`].not(),
319	self.0[`3`].not(),
320	])
321	}
322	}
323	impl<W> UnsafeFrom<[W; `4`]> for x4<W> {
324	#[inline(always)]
325	unsafe fn unsafe_from(xs: [W; `4`]) -> Self {
326	x4(xs)
327	}
328	}
329	impl<W: Copy> Vec4<W> for x4<W> {
330	#[inline(always)]
331	fn extract(self, i: u32) -> W {
332	self.0[i as usize]
333	}
334	#[inline(always)]
335	fn insert(mut self, w: W, i: u32) -> Self {
336	self.0[i as usize] = w;
337	self
338	}
339	}
340	impl<W: Copy> Vec4Ext<W> for x4<W> {
341	#[inline(always)]
342	fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self)
343	where
344	Self: Sized,
345	{
346	(
347	x4([a.0[`0`], b.0[`0`], c.0[`0`], d.0[`0`]]),
348	x4([a.0[`1`], b.0[`1`], c.0[`1`], d.0[`1`]]),
349	x4([a.0[`2`], b.0[`2`], c.0[`2`], d.0[`2`]]),
350	x4([a.0[`3`], b.0[`3`], c.0[`3`], d.0[`3`]]),
351	)
352	}
353	}
354	impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> {
355	#[inline(always)]
356	unsafe fn unpack(p: vec512_storage) -> Self {
357	let p: [vec128_storage; 4] = p.split128();
358	x4([
359	W::unpack(p[`0`]),
360	W::unpack(p[`1`]),
361	W::unpack(p[`2`]),
362	W::unpack(p[`3`]),
363	])
364	}
365	}
366	impl<W> From<x4<W>> for vec512_storage
367	where
368	W: Copy,
369	vec128_storage: From<W>,
370	{
371	#[inline(always)]
372	fn from(x: x4<W>) -> Self {
373	vec512_storage::new128([x.0[`0`].into(), x.0[`1`].into(), x.0[`2`].into(), x.0[`3`].into()])
374	}
375	}
376	impl<W> Swap64 for x4<W>
377	where
378	W: Swap64 + Copy,
379	{
380	fwd_unop_x4!(swap1);
381	fwd_unop_x4!(swap2);
382	fwd_unop_x4!(swap4);
383	fwd_unop_x4!(swap8);
384	fwd_unop_x4!(swap16);
385	fwd_unop_x4!(swap32);
386	fwd_unop_x4!(swap64);
387	}
388	impl<W: Copy> MultiLane<[W; `4`]> for x4<W> {
389	#[inline(always)]
390	fn to_lanes(self) -> [W; `4`] {
391	self.0
392	}
393	#[inline(always)]
394	fn from_lanes(lanes: [W; `4`]) -> Self {
395	x4(lanes)
396	}
397	}
398	impl<W: BSwap + Copy> BSwap for x4<W> {
399	#[inline(always)]
400	fn bswap(self) -> Self {
401	x4([
402	self.0[`0`].bswap(),
403	self.0[`1`].bswap(),
404	self.0[`2`].bswap(),
405	self.0[`3`].bswap(),
406	])
407	}
408	}
409	impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> {
410	#[inline(always)]
411	unsafe fn unsafe_read_le(input: &[u8]) -> Self {
412	let n = input.len() / `4`;
413	x4([
414	W::unsafe_read_le(&input[..n]),
415	W::unsafe_read_le(&input[n..n * `2`]),
416	W::unsafe_read_le(&input[n * `2`..n * `3`]),
417	W::unsafe_read_le(&input[n * `3`..]),
418	])
419	}
420	#[inline(always)]
421	unsafe fn unsafe_read_be(input: &[u8]) -> Self {
422	let n = input.len() / `4`;
423	x4([
424	W::unsafe_read_be(&input[..n]),
425	W::unsafe_read_be(&input[n..n * `2`]),
426	W::unsafe_read_be(&input[n * `2`..n * `3`]),
427	W::unsafe_read_be(&input[n * `3`..]),
428	])
429	}
430	#[inline(always)]
431	fn write_le(self, out: &mut [u8]) {
432	let n = out.len() / `4`;
433	self.0[`0`].write_le(&mut out[..n]);
434	self.0[`1`].write_le(&mut out[n..n * `2`]);
435	self.0[`2`].write_le(&mut out[n * `2`..n * `3`]);
436	self.0[`3`].write_le(&mut out[n * `3`..]);
437	}
438	#[inline(always)]
439	fn write_be(self, out: &mut [u8]) {
440	let n = out.len() / `4`;
441	self.0[`0`].write_be(&mut out[..n]);
442	self.0[`1`].write_be(&mut out[n..n * `2`]);
443	self.0[`2`].write_be(&mut out[n * `2`..n * `3`]);
444	self.0[`3`].write_be(&mut out[n * `3`..]);
445	}
446	}
447	impl<W: Copy + LaneWords4> LaneWords4 for x4<W> {
448	#[inline(always)]
449	fn shuffle_lane_words2301(self) -> Self {
450	x4([
451	self.0[`0`].shuffle_lane_words2301(),
452	self.0[`1`].shuffle_lane_words2301(),
453	self.0[`2`].shuffle_lane_words2301(),
454	self.0[`3`].shuffle_lane_words2301(),
455	])
456	}
457	#[inline(always)]
458	fn shuffle_lane_words1230(self) -> Self {
459	x4([
460	self.0[`0`].shuffle_lane_words1230(),
461	self.0[`1`].shuffle_lane_words1230(),
462	self.0[`2`].shuffle_lane_words1230(),
463	self.0[`3`].shuffle_lane_words1230(),
464	])
465	}
466	#[inline(always)]
467	fn shuffle_lane_words3012(self) -> Self {
468	x4([
469	self.0[`0`].shuffle_lane_words3012(),
470	self.0[`1`].shuffle_lane_words3012(),
471	self.0[`2`].shuffle_lane_words3012(),
472	self.0[`3`].shuffle_lane_words3012(),
473	])
474	}
475	}
476