1#![allow(non_camel_case_types)]
2use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
3
4pub trait AndNot {
5 type Output;
6 fn andnot(self, rhs: Self) -> Self::Output;
7}
8pub trait BSwap {
9 fn bswap(self) -> Self;
10}
11/// Ops that depend on word size
12pub trait ArithOps: Add<Output = Self> + AddAssign + Sized + Copy + Clone + BSwap {}
13/// Ops that are independent of word size and endian
14pub trait BitOps0:
15 BitAnd<Output = Self>
16 + BitOr<Output = Self>
17 + BitXor<Output = Self>
18 + BitXorAssign
19 + Not<Output = Self>
20 + AndNot<Output = Self>
21 + Sized
22 + Copy
23 + Clone
24{
25}
26
27pub trait BitOps32: BitOps0 + RotateEachWord32 {}
28pub trait BitOps64: BitOps32 + RotateEachWord64 {}
29pub trait BitOps128: BitOps64 + RotateEachWord128 {}
30
31pub trait RotateEachWord32 {
32 fn rotate_each_word_right7(self) -> Self;
33 fn rotate_each_word_right8(self) -> Self;
34 fn rotate_each_word_right11(self) -> Self;
35 fn rotate_each_word_right12(self) -> Self;
36 fn rotate_each_word_right16(self) -> Self;
37 fn rotate_each_word_right20(self) -> Self;
38 fn rotate_each_word_right24(self) -> Self;
39 fn rotate_each_word_right25(self) -> Self;
40}
41
42pub trait RotateEachWord64 {
43 fn rotate_each_word_right32(self) -> Self;
44}
45
46pub trait RotateEachWord128 {}
47
48// Vector type naming scheme:
49// uN[xP]xL
50// Unsigned; N-bit words * P bits per lane * L lanes
51//
52// A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
53// wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
54// slow inter-lane operations.
55
56use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
57
58#[allow(clippy::missing_safety_doc)]
59pub trait UnsafeFrom<T> {
60 unsafe fn unsafe_from(t: T) -> Self;
61}
62
63/// A vector composed of two elements, which may be words or themselves vectors.
64pub trait Vec2<W> {
65 fn extract(self, i: u32) -> W;
66 fn insert(self, w: W, i: u32) -> Self;
67}
68
69/// A vector composed of four elements, which may be words or themselves vectors.
70pub trait Vec4<W> {
71 fn extract(self, i: u32) -> W;
72 fn insert(self, w: W, i: u32) -> Self;
73}
74/// Vec4 functions which may not be implemented yet for all Vec4 types.
75/// NOTE: functions in this trait may be moved to Vec4 in any patch release. To avoid breakage,
76/// import Vec4Ext only together with Vec4, and don't qualify its methods.
77pub trait Vec4Ext<W> {
78 fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self)
79 where
80 Self: Sized;
81}
82pub trait Vector<T> {
83 fn to_scalars(self) -> T;
84}
85
86// TODO: multiples of 4 should inherit this
87/// A vector composed of four words; depending on their size, operations may cross lanes.
88pub trait Words4 {
89 fn shuffle1230(self) -> Self;
90 fn shuffle2301(self) -> Self;
91 fn shuffle3012(self) -> Self;
92}
93
94/// A vector composed one or more lanes each composed of four words.
95pub trait LaneWords4 {
96 fn shuffle_lane_words1230(self) -> Self;
97 fn shuffle_lane_words2301(self) -> Self;
98 fn shuffle_lane_words3012(self) -> Self;
99}
100
101// TODO: make this a part of BitOps
102/// Exchange neigboring ranges of bits of the specified size
103pub trait Swap64 {
104 fn swap1(self) -> Self;
105 fn swap2(self) -> Self;
106 fn swap4(self) -> Self;
107 fn swap8(self) -> Self;
108 fn swap16(self) -> Self;
109 fn swap32(self) -> Self;
110 fn swap64(self) -> Self;
111}
112
113pub trait u32x4<M: Machine>:
114 BitOps32
115 + Store<vec128_storage>
116 + ArithOps
117 + Vec4<u32>
118 + Words4
119 + LaneWords4
120 + StoreBytes
121 + MultiLane<[u32; 4]>
122 + Into<vec128_storage>
123{
124}
125pub trait u64x2<M: Machine>:
126 BitOps64 + Store<vec128_storage> + ArithOps + Vec2<u64> + MultiLane<[u64; 2]> + Into<vec128_storage>
127{
128}
129pub trait u128x1<M: Machine>:
130 BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
131{
132}
133
134pub trait u32x4x2<M: Machine>:
135 BitOps32
136 + Store<vec256_storage>
137 + Vec2<M::u32x4>
138 + MultiLane<[M::u32x4; 2]>
139 + ArithOps
140 + Into<vec256_storage>
141 + StoreBytes
142{
143}
144pub trait u64x2x2<M: Machine>:
145 BitOps64
146 + Store<vec256_storage>
147 + Vec2<M::u64x2>
148 + MultiLane<[M::u64x2; 2]>
149 + ArithOps
150 + StoreBytes
151 + Into<vec256_storage>
152{
153}
154pub trait u64x4<M: Machine>:
155 BitOps64
156 + Store<vec256_storage>
157 + Vec4<u64>
158 + MultiLane<[u64; 4]>
159 + ArithOps
160 + Words4
161 + StoreBytes
162 + Into<vec256_storage>
163{
164}
165pub trait u128x2<M: Machine>:
166 BitOps128
167 + Store<vec256_storage>
168 + Vec2<M::u128x1>
169 + MultiLane<[M::u128x1; 2]>
170 + Swap64
171 + Into<vec256_storage>
172{
173}
174
175pub trait u32x4x4<M: Machine>:
176 BitOps32
177 + Store<vec512_storage>
178 + Vec4<M::u32x4>
179 + Vec4Ext<M::u32x4>
180 + Vector<[u32; 16]>
181 + MultiLane<[M::u32x4; 4]>
182 + ArithOps
183 + LaneWords4
184 + Into<vec512_storage>
185 + StoreBytes
186{
187}
188pub trait u64x2x4<M: Machine>:
189 BitOps64
190 + Store<vec512_storage>
191 + Vec4<M::u64x2>
192 + MultiLane<[M::u64x2; 4]>
193 + ArithOps
194 + Into<vec512_storage>
195{
196}
197// TODO: Words4
198pub trait u128x4<M: Machine>:
199 BitOps128
200 + Store<vec512_storage>
201 + Vec4<M::u128x1>
202 + MultiLane<[M::u128x1; 4]>
203 + Swap64
204 + Into<vec512_storage>
205{
206}
207
208/// A vector composed of multiple 128-bit lanes.
209pub trait MultiLane<Lanes> {
210 /// Split a multi-lane vector into single-lane vectors.
211 fn to_lanes(self) -> Lanes;
212 /// Build a multi-lane vector from individual lanes.
213 fn from_lanes(lanes: Lanes) -> Self;
214}
215
216/// Combine single vectors into a multi-lane vector.
217pub trait VZip<V> {
218 fn vzip(self) -> V;
219}
220
221impl<V, T> VZip<V> for T
222where
223 V: MultiLane<T>,
224{
225 #[inline(always)]
226 fn vzip(self) -> V {
227 V::from_lanes(self)
228 }
229}
230
231pub trait Machine: Sized + Copy {
232 type u32x4: u32x4<Self>;
233 type u64x2: u64x2<Self>;
234 type u128x1: u128x1<Self>;
235
236 type u32x4x2: u32x4x2<Self>;
237 type u64x2x2: u64x2x2<Self>;
238 type u64x4: u64x4<Self>;
239 type u128x2: u128x2<Self>;
240
241 type u32x4x4: u32x4x4<Self>;
242 type u64x2x4: u64x2x4<Self>;
243 type u128x4: u128x4<Self>;
244
245 #[inline(always)]
246 fn unpack<S, V: Store<S>>(self, s: S) -> V {
247 unsafe { V::unpack(s) }
248 }
249
250 #[inline(always)]
251 fn vec<V, A>(self, a: A) -> V
252 where
253 V: MultiLane<A>,
254 {
255 V::from_lanes(a)
256 }
257
258 #[inline(always)]
259 fn read_le<V>(self, input: &[u8]) -> V
260 where
261 V: StoreBytes,
262 {
263 unsafe { V::unsafe_read_le(input) }
264 }
265
266 #[inline(always)]
267 fn read_be<V>(self, input: &[u8]) -> V
268 where
269 V: StoreBytes,
270 {
271 unsafe { V::unsafe_read_be(input) }
272 }
273
274 /// # Safety
275 /// Caller must ensure the type of Self is appropriate for the hardware of the execution
276 /// environment.
277 unsafe fn instance() -> Self;
278}
279
280pub trait Store<S> {
281 /// # Safety
282 /// Caller must ensure the type of Self is appropriate for the hardware of the execution
283 /// environment.
284 unsafe fn unpack(p: S) -> Self;
285}
286
287pub trait StoreBytes {
288 /// # Safety
289 /// Caller must ensure the type of Self is appropriate for the hardware of the execution
290 /// environment.
291 unsafe fn unsafe_read_le(input: &[u8]) -> Self;
292 /// # Safety
293 /// Caller must ensure the type of Self is appropriate for the hardware of the execution
294 /// environment.
295 unsafe fn unsafe_read_be(input: &[u8]) -> Self;
296 fn write_le(self, out: &mut [u8]);
297 fn write_be(self, out: &mut [u8]);
298}
299