1 | #![allow (non_camel_case_types)] |
2 | use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not}; |
3 | |
4 | pub trait AndNot { |
5 | type Output; |
6 | fn andnot(self, rhs: Self) -> Self::Output; |
7 | } |
8 | pub trait BSwap { |
9 | fn bswap(self) -> Self; |
10 | } |
11 | /// Ops that depend on word size |
12 | pub trait ArithOps: Add<Output = Self> + AddAssign + Sized + Copy + Clone + BSwap {} |
13 | /// Ops that are independent of word size and endian |
14 | pub trait BitOps0: |
15 | BitAnd<Output = Self> |
16 | + BitOr<Output = Self> |
17 | + BitXor<Output = Self> |
18 | + BitXorAssign |
19 | + Not<Output = Self> |
20 | + AndNot<Output = Self> |
21 | + Sized |
22 | + Copy |
23 | + Clone |
24 | { |
25 | } |
26 | |
27 | pub trait BitOps32: BitOps0 + RotateEachWord32 {} |
28 | pub trait BitOps64: BitOps32 + RotateEachWord64 {} |
29 | pub trait BitOps128: BitOps64 + RotateEachWord128 {} |
30 | |
31 | pub trait RotateEachWord32 { |
32 | fn rotate_each_word_right7(self) -> Self; |
33 | fn rotate_each_word_right8(self) -> Self; |
34 | fn rotate_each_word_right11(self) -> Self; |
35 | fn rotate_each_word_right12(self) -> Self; |
36 | fn rotate_each_word_right16(self) -> Self; |
37 | fn rotate_each_word_right20(self) -> Self; |
38 | fn rotate_each_word_right24(self) -> Self; |
39 | fn rotate_each_word_right25(self) -> Self; |
40 | } |
41 | |
42 | pub trait RotateEachWord64 { |
43 | fn rotate_each_word_right32(self) -> Self; |
44 | } |
45 | |
46 | pub trait RotateEachWord128 {} |
47 | |
48 | // Vector type naming scheme: |
49 | // uN[xP]xL |
50 | // Unsigned; N-bit words * P bits per lane * L lanes |
51 | // |
52 | // A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of |
53 | // wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and |
54 | // slow inter-lane operations. |
55 | |
56 | use crate::arch::{vec128_storage, vec256_storage, vec512_storage}; |
57 | |
58 | #[allow (clippy::missing_safety_doc)] |
59 | pub trait UnsafeFrom<T> { |
60 | unsafe fn unsafe_from(t: T) -> Self; |
61 | } |
62 | |
63 | /// A vector composed of two elements, which may be words or themselves vectors. |
64 | pub trait Vec2<W> { |
65 | fn extract(self, i: u32) -> W; |
66 | fn insert(self, w: W, i: u32) -> Self; |
67 | } |
68 | |
69 | /// A vector composed of four elements, which may be words or themselves vectors. |
70 | pub trait Vec4<W> { |
71 | fn extract(self, i: u32) -> W; |
72 | fn insert(self, w: W, i: u32) -> Self; |
73 | } |
74 | /// Vec4 functions which may not be implemented yet for all Vec4 types. |
75 | /// NOTE: functions in this trait may be moved to Vec4 in any patch release. To avoid breakage, |
76 | /// import Vec4Ext only together with Vec4, and don't qualify its methods. |
77 | pub trait Vec4Ext<W> { |
78 | fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) |
79 | where |
80 | Self: Sized; |
81 | } |
82 | pub trait Vector<T> { |
83 | fn to_scalars(self) -> T; |
84 | } |
85 | |
86 | // TODO: multiples of 4 should inherit this |
87 | /// A vector composed of four words; depending on their size, operations may cross lanes. |
88 | pub trait Words4 { |
89 | fn shuffle1230(self) -> Self; |
90 | fn shuffle2301(self) -> Self; |
91 | fn shuffle3012(self) -> Self; |
92 | } |
93 | |
94 | /// A vector composed one or more lanes each composed of four words. |
95 | pub trait LaneWords4 { |
96 | fn shuffle_lane_words1230(self) -> Self; |
97 | fn shuffle_lane_words2301(self) -> Self; |
98 | fn shuffle_lane_words3012(self) -> Self; |
99 | } |
100 | |
101 | // TODO: make this a part of BitOps |
102 | /// Exchange neigboring ranges of bits of the specified size |
103 | pub trait Swap64 { |
104 | fn swap1(self) -> Self; |
105 | fn swap2(self) -> Self; |
106 | fn swap4(self) -> Self; |
107 | fn swap8(self) -> Self; |
108 | fn swap16(self) -> Self; |
109 | fn swap32(self) -> Self; |
110 | fn swap64(self) -> Self; |
111 | } |
112 | |
113 | pub trait u32x4<M: Machine>: |
114 | BitOps32 |
115 | + Store<vec128_storage> |
116 | + ArithOps |
117 | + Vec4<u32> |
118 | + Words4 |
119 | + LaneWords4 |
120 | + StoreBytes |
121 | + MultiLane<[u32; 4]> |
122 | + Into<vec128_storage> |
123 | { |
124 | } |
125 | pub trait u64x2<M: Machine>: |
126 | BitOps64 + Store<vec128_storage> + ArithOps + Vec2<u64> + MultiLane<[u64; 2]> + Into<vec128_storage> |
127 | { |
128 | } |
129 | pub trait u128x1<M: Machine>: |
130 | BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage> |
131 | { |
132 | } |
133 | |
134 | pub trait u32x4x2<M: Machine>: |
135 | BitOps32 |
136 | + Store<vec256_storage> |
137 | + Vec2<M::u32x4> |
138 | + MultiLane<[M::u32x4; 2]> |
139 | + ArithOps |
140 | + Into<vec256_storage> |
141 | + StoreBytes |
142 | { |
143 | } |
144 | pub trait u64x2x2<M: Machine>: |
145 | BitOps64 |
146 | + Store<vec256_storage> |
147 | + Vec2<M::u64x2> |
148 | + MultiLane<[M::u64x2; 2]> |
149 | + ArithOps |
150 | + StoreBytes |
151 | + Into<vec256_storage> |
152 | { |
153 | } |
154 | pub trait u64x4<M: Machine>: |
155 | BitOps64 |
156 | + Store<vec256_storage> |
157 | + Vec4<u64> |
158 | + MultiLane<[u64; 4]> |
159 | + ArithOps |
160 | + Words4 |
161 | + StoreBytes |
162 | + Into<vec256_storage> |
163 | { |
164 | } |
165 | pub trait u128x2<M: Machine>: |
166 | BitOps128 |
167 | + Store<vec256_storage> |
168 | + Vec2<M::u128x1> |
169 | + MultiLane<[M::u128x1; 2]> |
170 | + Swap64 |
171 | + Into<vec256_storage> |
172 | { |
173 | } |
174 | |
175 | pub trait u32x4x4<M: Machine>: |
176 | BitOps32 |
177 | + Store<vec512_storage> |
178 | + Vec4<M::u32x4> |
179 | + Vec4Ext<M::u32x4> |
180 | + Vector<[u32; 16]> |
181 | + MultiLane<[M::u32x4; 4]> |
182 | + ArithOps |
183 | + LaneWords4 |
184 | + Into<vec512_storage> |
185 | + StoreBytes |
186 | { |
187 | } |
188 | pub trait u64x2x4<M: Machine>: |
189 | BitOps64 |
190 | + Store<vec512_storage> |
191 | + Vec4<M::u64x2> |
192 | + MultiLane<[M::u64x2; 4]> |
193 | + ArithOps |
194 | + Into<vec512_storage> |
195 | { |
196 | } |
197 | // TODO: Words4 |
198 | pub trait u128x4<M: Machine>: |
199 | BitOps128 |
200 | + Store<vec512_storage> |
201 | + Vec4<M::u128x1> |
202 | + MultiLane<[M::u128x1; 4]> |
203 | + Swap64 |
204 | + Into<vec512_storage> |
205 | { |
206 | } |
207 | |
208 | /// A vector composed of multiple 128-bit lanes. |
209 | pub trait MultiLane<Lanes> { |
210 | /// Split a multi-lane vector into single-lane vectors. |
211 | fn to_lanes(self) -> Lanes; |
212 | /// Build a multi-lane vector from individual lanes. |
213 | fn from_lanes(lanes: Lanes) -> Self; |
214 | } |
215 | |
216 | /// Combine single vectors into a multi-lane vector. |
217 | pub trait VZip<V> { |
218 | fn vzip(self) -> V; |
219 | } |
220 | |
221 | impl<V, T> VZip<V> for T |
222 | where |
223 | V: MultiLane<T>, |
224 | { |
225 | #[inline (always)] |
226 | fn vzip(self) -> V { |
227 | V::from_lanes(self) |
228 | } |
229 | } |
230 | |
231 | pub trait Machine: Sized + Copy { |
232 | type u32x4: u32x4<Self>; |
233 | type u64x2: u64x2<Self>; |
234 | type u128x1: u128x1<Self>; |
235 | |
236 | type u32x4x2: u32x4x2<Self>; |
237 | type u64x2x2: u64x2x2<Self>; |
238 | type u64x4: u64x4<Self>; |
239 | type u128x2: u128x2<Self>; |
240 | |
241 | type u32x4x4: u32x4x4<Self>; |
242 | type u64x2x4: u64x2x4<Self>; |
243 | type u128x4: u128x4<Self>; |
244 | |
245 | #[inline (always)] |
246 | fn unpack<S, V: Store<S>>(self, s: S) -> V { |
247 | unsafe { V::unpack(s) } |
248 | } |
249 | |
250 | #[inline (always)] |
251 | fn vec<V, A>(self, a: A) -> V |
252 | where |
253 | V: MultiLane<A>, |
254 | { |
255 | V::from_lanes(a) |
256 | } |
257 | |
258 | #[inline (always)] |
259 | fn read_le<V>(self, input: &[u8]) -> V |
260 | where |
261 | V: StoreBytes, |
262 | { |
263 | unsafe { V::unsafe_read_le(input) } |
264 | } |
265 | |
266 | #[inline (always)] |
267 | fn read_be<V>(self, input: &[u8]) -> V |
268 | where |
269 | V: StoreBytes, |
270 | { |
271 | unsafe { V::unsafe_read_be(input) } |
272 | } |
273 | |
274 | /// # Safety |
275 | /// Caller must ensure the type of Self is appropriate for the hardware of the execution |
276 | /// environment. |
277 | unsafe fn instance() -> Self; |
278 | } |
279 | |
280 | pub trait Store<S> { |
281 | /// # Safety |
282 | /// Caller must ensure the type of Self is appropriate for the hardware of the execution |
283 | /// environment. |
284 | unsafe fn unpack(p: S) -> Self; |
285 | } |
286 | |
287 | pub trait StoreBytes { |
288 | /// # Safety |
289 | /// Caller must ensure the type of Self is appropriate for the hardware of the execution |
290 | /// environment. |
291 | unsafe fn unsafe_read_le(input: &[u8]) -> Self; |
292 | /// # Safety |
293 | /// Caller must ensure the type of Self is appropriate for the hardware of the execution |
294 | /// environment. |
295 | unsafe fn unsafe_read_be(input: &[u8]) -> Self; |
296 | fn write_le(self, out: &mut [u8]); |
297 | fn write_be(self, out: &mut [u8]); |
298 | } |
299 | |