| 1 | #![allow (non_camel_case_types)] |
| 2 | use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not}; |
| 3 | |
| 4 | pub trait AndNot { |
| 5 | type Output; |
| 6 | fn andnot(self, rhs: Self) -> Self::Output; |
| 7 | } |
| 8 | pub trait BSwap { |
| 9 | fn bswap(self) -> Self; |
| 10 | } |
| 11 | /// Ops that depend on word size |
| 12 | pub trait ArithOps: Add<Output = Self> + AddAssign + Sized + Copy + Clone + BSwap {} |
| 13 | /// Ops that are independent of word size and endian |
| 14 | pub trait BitOps0: |
| 15 | BitAnd<Output = Self> |
| 16 | + BitOr<Output = Self> |
| 17 | + BitXor<Output = Self> |
| 18 | + BitXorAssign |
| 19 | + Not<Output = Self> |
| 20 | + AndNot<Output = Self> |
| 21 | + Sized |
| 22 | + Copy |
| 23 | + Clone |
| 24 | { |
| 25 | } |
| 26 | |
| 27 | pub trait BitOps32: BitOps0 + RotateEachWord32 {} |
| 28 | pub trait BitOps64: BitOps32 + RotateEachWord64 {} |
| 29 | pub trait BitOps128: BitOps64 + RotateEachWord128 {} |
| 30 | |
| 31 | pub trait RotateEachWord32 { |
| 32 | fn rotate_each_word_right7(self) -> Self; |
| 33 | fn rotate_each_word_right8(self) -> Self; |
| 34 | fn rotate_each_word_right11(self) -> Self; |
| 35 | fn rotate_each_word_right12(self) -> Self; |
| 36 | fn rotate_each_word_right16(self) -> Self; |
| 37 | fn rotate_each_word_right20(self) -> Self; |
| 38 | fn rotate_each_word_right24(self) -> Self; |
| 39 | fn rotate_each_word_right25(self) -> Self; |
| 40 | } |
| 41 | |
| 42 | pub trait RotateEachWord64 { |
| 43 | fn rotate_each_word_right32(self) -> Self; |
| 44 | } |
| 45 | |
| 46 | pub trait RotateEachWord128 {} |
| 47 | |
| 48 | // Vector type naming scheme: |
| 49 | // uN[xP]xL |
| 50 | // Unsigned; N-bit words * P bits per lane * L lanes |
| 51 | // |
| 52 | // A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of |
| 53 | // wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and |
| 54 | // slow inter-lane operations. |
| 55 | |
| 56 | use crate::arch::{vec128_storage, vec256_storage, vec512_storage}; |
| 57 | |
| 58 | #[allow (clippy::missing_safety_doc)] |
| 59 | pub trait UnsafeFrom<T> { |
| 60 | unsafe fn unsafe_from(t: T) -> Self; |
| 61 | } |
| 62 | |
| 63 | /// A vector composed of two elements, which may be words or themselves vectors. |
| 64 | pub trait Vec2<W> { |
| 65 | fn extract(self, i: u32) -> W; |
| 66 | fn insert(self, w: W, i: u32) -> Self; |
| 67 | } |
| 68 | |
| 69 | /// A vector composed of four elements, which may be words or themselves vectors. |
| 70 | pub trait Vec4<W> { |
| 71 | fn extract(self, i: u32) -> W; |
| 72 | fn insert(self, w: W, i: u32) -> Self; |
| 73 | } |
| 74 | /// Vec4 functions which may not be implemented yet for all Vec4 types. |
| 75 | /// NOTE: functions in this trait may be moved to Vec4 in any patch release. To avoid breakage, |
| 76 | /// import Vec4Ext only together with Vec4, and don't qualify its methods. |
| 77 | pub trait Vec4Ext<W> { |
| 78 | fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) |
| 79 | where |
| 80 | Self: Sized; |
| 81 | } |
| 82 | pub trait Vector<T> { |
| 83 | fn to_scalars(self) -> T; |
| 84 | } |
| 85 | |
| 86 | // TODO: multiples of 4 should inherit this |
| 87 | /// A vector composed of four words; depending on their size, operations may cross lanes. |
| 88 | pub trait Words4 { |
| 89 | fn shuffle1230(self) -> Self; |
| 90 | fn shuffle2301(self) -> Self; |
| 91 | fn shuffle3012(self) -> Self; |
| 92 | } |
| 93 | |
| 94 | /// A vector composed one or more lanes each composed of four words. |
| 95 | pub trait LaneWords4 { |
| 96 | fn shuffle_lane_words1230(self) -> Self; |
| 97 | fn shuffle_lane_words2301(self) -> Self; |
| 98 | fn shuffle_lane_words3012(self) -> Self; |
| 99 | } |
| 100 | |
| 101 | // TODO: make this a part of BitOps |
| 102 | /// Exchange neigboring ranges of bits of the specified size |
| 103 | pub trait Swap64 { |
| 104 | fn swap1(self) -> Self; |
| 105 | fn swap2(self) -> Self; |
| 106 | fn swap4(self) -> Self; |
| 107 | fn swap8(self) -> Self; |
| 108 | fn swap16(self) -> Self; |
| 109 | fn swap32(self) -> Self; |
| 110 | fn swap64(self) -> Self; |
| 111 | } |
| 112 | |
| 113 | pub trait u32x4<M: Machine>: |
| 114 | BitOps32 |
| 115 | + Store<vec128_storage> |
| 116 | + ArithOps |
| 117 | + Vec4<u32> |
| 118 | + Words4 |
| 119 | + LaneWords4 |
| 120 | + StoreBytes |
| 121 | + MultiLane<[u32; 4]> |
| 122 | + Into<vec128_storage> |
| 123 | { |
| 124 | } |
| 125 | pub trait u64x2<M: Machine>: |
| 126 | BitOps64 + Store<vec128_storage> + ArithOps + Vec2<u64> + MultiLane<[u64; 2]> + Into<vec128_storage> |
| 127 | { |
| 128 | } |
| 129 | pub trait u128x1<M: Machine>: |
| 130 | BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage> |
| 131 | { |
| 132 | } |
| 133 | |
| 134 | pub trait u32x4x2<M: Machine>: |
| 135 | BitOps32 |
| 136 | + Store<vec256_storage> |
| 137 | + Vec2<M::u32x4> |
| 138 | + MultiLane<[M::u32x4; 2]> |
| 139 | + ArithOps |
| 140 | + Into<vec256_storage> |
| 141 | + StoreBytes |
| 142 | { |
| 143 | } |
| 144 | pub trait u64x2x2<M: Machine>: |
| 145 | BitOps64 |
| 146 | + Store<vec256_storage> |
| 147 | + Vec2<M::u64x2> |
| 148 | + MultiLane<[M::u64x2; 2]> |
| 149 | + ArithOps |
| 150 | + StoreBytes |
| 151 | + Into<vec256_storage> |
| 152 | { |
| 153 | } |
| 154 | pub trait u64x4<M: Machine>: |
| 155 | BitOps64 |
| 156 | + Store<vec256_storage> |
| 157 | + Vec4<u64> |
| 158 | + MultiLane<[u64; 4]> |
| 159 | + ArithOps |
| 160 | + Words4 |
| 161 | + StoreBytes |
| 162 | + Into<vec256_storage> |
| 163 | { |
| 164 | } |
| 165 | pub trait u128x2<M: Machine>: |
| 166 | BitOps128 |
| 167 | + Store<vec256_storage> |
| 168 | + Vec2<M::u128x1> |
| 169 | + MultiLane<[M::u128x1; 2]> |
| 170 | + Swap64 |
| 171 | + Into<vec256_storage> |
| 172 | { |
| 173 | } |
| 174 | |
| 175 | pub trait u32x4x4<M: Machine>: |
| 176 | BitOps32 |
| 177 | + Store<vec512_storage> |
| 178 | + Vec4<M::u32x4> |
| 179 | + Vec4Ext<M::u32x4> |
| 180 | + Vector<[u32; 16]> |
| 181 | + MultiLane<[M::u32x4; 4]> |
| 182 | + ArithOps |
| 183 | + LaneWords4 |
| 184 | + Into<vec512_storage> |
| 185 | + StoreBytes |
| 186 | { |
| 187 | } |
| 188 | pub trait u64x2x4<M: Machine>: |
| 189 | BitOps64 |
| 190 | + Store<vec512_storage> |
| 191 | + Vec4<M::u64x2> |
| 192 | + MultiLane<[M::u64x2; 4]> |
| 193 | + ArithOps |
| 194 | + Into<vec512_storage> |
| 195 | { |
| 196 | } |
| 197 | // TODO: Words4 |
| 198 | pub trait u128x4<M: Machine>: |
| 199 | BitOps128 |
| 200 | + Store<vec512_storage> |
| 201 | + Vec4<M::u128x1> |
| 202 | + MultiLane<[M::u128x1; 4]> |
| 203 | + Swap64 |
| 204 | + Into<vec512_storage> |
| 205 | { |
| 206 | } |
| 207 | |
| 208 | /// A vector composed of multiple 128-bit lanes. |
| 209 | pub trait MultiLane<Lanes> { |
| 210 | /// Split a multi-lane vector into single-lane vectors. |
| 211 | fn to_lanes(self) -> Lanes; |
| 212 | /// Build a multi-lane vector from individual lanes. |
| 213 | fn from_lanes(lanes: Lanes) -> Self; |
| 214 | } |
| 215 | |
| 216 | /// Combine single vectors into a multi-lane vector. |
| 217 | pub trait VZip<V> { |
| 218 | fn vzip(self) -> V; |
| 219 | } |
| 220 | |
| 221 | impl<V, T> VZip<V> for T |
| 222 | where |
| 223 | V: MultiLane<T>, |
| 224 | { |
| 225 | #[inline (always)] |
| 226 | fn vzip(self) -> V { |
| 227 | V::from_lanes(self) |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | pub trait Machine: Sized + Copy { |
| 232 | type u32x4: u32x4<Self>; |
| 233 | type u64x2: u64x2<Self>; |
| 234 | type u128x1: u128x1<Self>; |
| 235 | |
| 236 | type u32x4x2: u32x4x2<Self>; |
| 237 | type u64x2x2: u64x2x2<Self>; |
| 238 | type u64x4: u64x4<Self>; |
| 239 | type u128x2: u128x2<Self>; |
| 240 | |
| 241 | type u32x4x4: u32x4x4<Self>; |
| 242 | type u64x2x4: u64x2x4<Self>; |
| 243 | type u128x4: u128x4<Self>; |
| 244 | |
| 245 | #[inline (always)] |
| 246 | fn unpack<S, V: Store<S>>(self, s: S) -> V { |
| 247 | unsafe { V::unpack(s) } |
| 248 | } |
| 249 | |
| 250 | #[inline (always)] |
| 251 | fn vec<V, A>(self, a: A) -> V |
| 252 | where |
| 253 | V: MultiLane<A>, |
| 254 | { |
| 255 | V::from_lanes(a) |
| 256 | } |
| 257 | |
| 258 | #[inline (always)] |
| 259 | fn read_le<V>(self, input: &[u8]) -> V |
| 260 | where |
| 261 | V: StoreBytes, |
| 262 | { |
| 263 | unsafe { V::unsafe_read_le(input) } |
| 264 | } |
| 265 | |
| 266 | #[inline (always)] |
| 267 | fn read_be<V>(self, input: &[u8]) -> V |
| 268 | where |
| 269 | V: StoreBytes, |
| 270 | { |
| 271 | unsafe { V::unsafe_read_be(input) } |
| 272 | } |
| 273 | |
| 274 | /// # Safety |
| 275 | /// Caller must ensure the type of Self is appropriate for the hardware of the execution |
| 276 | /// environment. |
| 277 | unsafe fn instance() -> Self; |
| 278 | } |
| 279 | |
| 280 | pub trait Store<S> { |
| 281 | /// # Safety |
| 282 | /// Caller must ensure the type of Self is appropriate for the hardware of the execution |
| 283 | /// environment. |
| 284 | unsafe fn unpack(p: S) -> Self; |
| 285 | } |
| 286 | |
| 287 | pub trait StoreBytes { |
| 288 | /// # Safety |
| 289 | /// Caller must ensure the type of Self is appropriate for the hardware of the execution |
| 290 | /// environment. |
| 291 | unsafe fn unsafe_read_le(input: &[u8]) -> Self; |
| 292 | /// # Safety |
| 293 | /// Caller must ensure the type of Self is appropriate for the hardware of the execution |
| 294 | /// environment. |
| 295 | unsafe fn unsafe_read_be(input: &[u8]) -> Self; |
| 296 | fn write_le(self, out: &mut [u8]); |
| 297 | fn write_be(self, out: &mut [u8]); |
| 298 | } |
| 299 | |