1 | use crate::simd::{ |
2 | cmp::SimdPartialOrd, |
3 | num::SimdUint, |
4 | ptr::{SimdConstPtr, SimdMutPtr}, |
5 | LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle, |
6 | }; |
7 | |
8 | /// A SIMD vector with the shape of `[T; N]` but the operations of `T`. |
9 | /// |
10 | /// `Simd<T, N>` supports the operators (+, *, etc.) that `T` does in "elementwise" fashion. |
11 | /// These take the element at each index from the left-hand side and right-hand side, |
12 | /// perform the operation, then return the result in the same index in a vector of equal size. |
13 | /// However, `Simd` differs from normal iteration and normal arrays: |
14 | /// - `Simd<T, N>` executes `N` operations in a single step with no `break`s |
15 | /// - `Simd<T, N>` can have an alignment greater than `T`, for better mechanical sympathy |
16 | /// |
17 | /// By always imposing these constraints on `Simd`, it is easier to compile elementwise operations |
18 | /// into machine instructions that can themselves be executed in parallel. |
19 | /// |
20 | /// ```rust |
21 | /// # #![feature (portable_simd)] |
22 | /// # use core::simd::{Simd}; |
23 | /// # use core::array; |
24 | /// let a: [i32; 4] = [-2, 0, 2, 4]; |
25 | /// let b = [10, 9, 8, 7]; |
26 | /// let sum = array::from_fn(|i| a[i] + b[i]); |
27 | /// let prod = array::from_fn(|i| a[i] * b[i]); |
28 | /// |
29 | /// // `Simd<T, N>` implements `From<[T; N]>` |
30 | /// let (v, w) = (Simd::from(a), Simd::from(b)); |
31 | /// // Which means arrays implement `Into<Simd<T, N>>`. |
32 | /// assert_eq!(v + w, sum.into()); |
33 | /// assert_eq!(v * w, prod.into()); |
34 | /// ``` |
35 | /// |
36 | /// |
37 | /// `Simd` with integer elements treats operators as wrapping, as if `T` was [`Wrapping<T>`]. |
38 | /// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior. |
39 | /// This means there is no warning on overflows, even in "debug" builds. |
40 | /// For most applications where `Simd` is appropriate, it is "not a bug" to wrap, |
41 | /// and even "debug builds" are unlikely to tolerate the loss of performance. |
42 | /// You may want to consider using explicitly checked arithmetic if such is required. |
43 | /// Division by zero on integers still causes a panic, so |
44 | /// you may want to consider using `f32` or `f64` if that is unacceptable. |
45 | /// |
46 | /// [`Wrapping<T>`]: core::num::Wrapping |
47 | /// |
48 | /// # Layout |
49 | /// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), with a greater alignment. |
50 | /// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`. |
51 | /// Thus it is sound to [`transmute`] `Simd<T, N>` to `[T; N]` and should optimize to "zero cost", |
52 | /// but the reverse transmutation may require a copy the compiler cannot simply elide. |
53 | /// |
54 | /// # ABI "Features" |
55 | /// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed and returned via memory, |
56 | /// not SIMD registers, except as an optimization. Using `#[inline]` on functions that accept |
57 | /// `Simd<T, N>` or return it is recommended, at the cost of code generation time, as |
58 | /// inlining SIMD-using functions can omit a large function prolog or epilog and thus |
59 | /// improve both speed and code size. The need for this may be corrected in the future. |
60 | /// |
61 | /// Using `#[inline(always)]` still requires additional care. |
62 | /// |
63 | /// # Safe SIMD with Unsafe Rust |
64 | /// |
65 | /// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code. |
66 | /// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from. |
67 | /// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations, |
68 | /// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`. |
69 | /// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with |
70 | /// [`read_unaligned`] and [`write_unaligned`]. This is because: |
71 | /// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment) |
72 | /// - `Simd<T, N>` is often read from or written to [`[T]`](slice) and other types aligned to `T` |
73 | /// - combining these actions violates the `unsafe` contract and explodes the program into |
74 | /// a puff of **undefined behavior** |
75 | /// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned |
76 | /// if it sees the optimization |
77 | /// - most contemporary processors with "aligned" and "unaligned" read and write instructions |
78 | /// exhibit no performance difference if the "unaligned" variant is aligned at runtime |
79 | /// |
80 | /// Less obligations mean unaligned reads and writes are less likely to make the program unsound, |
81 | /// and may be just as fast as stricter alternatives. |
82 | /// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for |
83 | /// converting `[T]` to `[Simd<T, N>]`, and allows soundly operating on an aligned SIMD body, |
84 | /// but it may cost more time when handling the scalar head and tail. |
85 | /// If these are not enough, it is most ideal to design data structures to be already aligned |
86 | /// to `mem::align_of::<Simd<T, N>>()` before using `unsafe` Rust to read or write. |
87 | /// Other ways to compensate for these facts, like materializing `Simd` to or from an array first, |
88 | /// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`]. |
89 | /// |
90 | /// [`transmute`]: core::mem::transmute |
91 | /// [raw pointers]: pointer |
92 | /// [`read_unaligned`]: pointer::read_unaligned |
93 | /// [`write_unaligned`]: pointer::write_unaligned |
94 | /// [`read`]: pointer::read |
95 | /// [`write`]: pointer::write |
96 | /// [as_simd]: slice::as_simd |
97 | // |
98 | // NOTE: Accessing the inner array directly in any way (e.g. by using the `.0` field syntax) or |
99 | // directly constructing an instance of the type (i.e. `let vector = Simd(array)`) should be |
100 | // avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also |
101 | // causes rustc to emit illegal LLVM IR in some cases. |
102 | #[repr (simd)] |
103 | pub struct Simd<T, const N: usize>([T; N]) |
104 | where |
105 | LaneCount<N>: SupportedLaneCount, |
106 | T: SimdElement; |
107 | |
108 | impl<T, const N: usize> Simd<T, N> |
109 | where |
110 | LaneCount<N>: SupportedLaneCount, |
111 | T: SimdElement, |
112 | { |
113 | /// Number of elements in this vector. |
114 | pub const LEN: usize = N; |
115 | |
116 | /// Returns the number of elements in this SIMD vector. |
117 | /// |
118 | /// # Examples |
119 | /// |
120 | /// ``` |
121 | /// # #![feature (portable_simd)] |
122 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
123 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
124 | /// # use simd::u32x4; |
125 | /// let v = u32x4::splat(0); |
126 | /// assert_eq!(v.len(), 4); |
127 | /// ``` |
128 | #[inline ] |
129 | #[allow (clippy::len_without_is_empty)] |
130 | pub const fn len(&self) -> usize { |
131 | Self::LEN |
132 | } |
133 | |
134 | /// Constructs a new SIMD vector with all elements set to the given value. |
135 | /// |
136 | /// # Examples |
137 | /// |
138 | /// ``` |
139 | /// # #![feature (portable_simd)] |
140 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
141 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
142 | /// # use simd::u32x4; |
143 | /// let v = u32x4::splat(8); |
144 | /// assert_eq!(v.as_array(), &[8, 8, 8, 8]); |
145 | /// ``` |
146 | #[inline ] |
147 | pub fn splat(value: T) -> Self { |
148 | // This is preferred over `[value; N]`, since it's explicitly a splat: |
149 | // https://github.com/rust-lang/rust/issues/97804 |
150 | struct Splat; |
151 | impl<const N: usize> Swizzle<N> for Splat { |
152 | const INDEX: [usize; N] = [0; N]; |
153 | } |
154 | Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value])) |
155 | } |
156 | |
157 | /// Returns an array reference containing the entire SIMD vector. |
158 | /// |
159 | /// # Examples |
160 | /// |
161 | /// ``` |
162 | /// # #![feature (portable_simd)] |
163 | /// # use core::simd::{Simd, u64x4}; |
164 | /// let v: u64x4 = Simd::from_array([0, 1, 2, 3]); |
165 | /// assert_eq!(v.as_array(), &[0, 1, 2, 3]); |
166 | /// ``` |
167 | #[inline ] |
168 | pub const fn as_array(&self) -> &[T; N] { |
169 | // SAFETY: `Simd<T, N>` is just an overaligned `[T; N]` with |
170 | // potential padding at the end, so pointer casting to a |
171 | // `&[T; N]` is safe. |
172 | // |
173 | // NOTE: This deliberately doesn't just use `&self.0`, see the comment |
174 | // on the struct definition for details. |
175 | unsafe { &*(self as *const Self as *const [T; N]) } |
176 | } |
177 | |
178 | /// Returns a mutable array reference containing the entire SIMD vector. |
179 | #[inline ] |
180 | pub fn as_mut_array(&mut self) -> &mut [T; N] { |
181 | // SAFETY: `Simd<T, N>` is just an overaligned `[T; N]` with |
182 | // potential padding at the end, so pointer casting to a |
183 | // `&mut [T; N]` is safe. |
184 | // |
185 | // NOTE: This deliberately doesn't just use `&mut self.0`, see the comment |
186 | // on the struct definition for details. |
187 | unsafe { &mut *(self as *mut Self as *mut [T; N]) } |
188 | } |
189 | |
190 | /// Load a vector from an array of `T`. |
191 | /// |
192 | /// This function is necessary since `repr(simd)` has padding for non-power-of-2 vectors (at the time of writing). |
193 | /// With padding, `read_unaligned` will read past the end of an array of N elements. |
194 | /// |
195 | /// # Safety |
196 | /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read`. |
197 | #[inline ] |
198 | const unsafe fn load(ptr: *const [T; N]) -> Self { |
199 | // There are potentially simpler ways to write this function, but this should result in |
200 | // LLVM `load <N x T>` |
201 | |
202 | let mut tmp = core::mem::MaybeUninit::<Self>::uninit(); |
203 | // SAFETY: `Simd<T, N>` always contains `N` elements of type `T`. It may have padding |
204 | // which does not need to be initialized. The safety of reading `ptr` is ensured by the |
205 | // caller. |
206 | unsafe { |
207 | core::ptr::copy_nonoverlapping(ptr, tmp.as_mut_ptr().cast(), 1); |
208 | tmp.assume_init() |
209 | } |
210 | } |
211 | |
212 | /// Store a vector to an array of `T`. |
213 | /// |
214 | /// See `load` as to why this function is necessary. |
215 | /// |
216 | /// # Safety |
217 | /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write`. |
218 | #[inline ] |
219 | const unsafe fn store(self, ptr: *mut [T; N]) { |
220 | // There are potentially simpler ways to write this function, but this should result in |
221 | // LLVM `store <N x T>` |
222 | |
223 | // Creating a temporary helps LLVM turn the memcpy into a store. |
224 | let tmp = self; |
225 | // SAFETY: `Simd<T, N>` always contains `N` elements of type `T`. The safety of writing |
226 | // `ptr` is ensured by the caller. |
227 | unsafe { core::ptr::copy_nonoverlapping(tmp.as_array(), ptr, 1) } |
228 | } |
229 | |
230 | /// Converts an array to a SIMD vector. |
231 | #[inline ] |
232 | pub const fn from_array(array: [T; N]) -> Self { |
233 | // SAFETY: `&array` is safe to read. |
234 | // |
235 | // FIXME: We currently use a pointer load instead of `transmute_copy` because `repr(simd)` |
236 | // results in padding for non-power-of-2 vectors (so vectors are larger than arrays). |
237 | // |
238 | // NOTE: This deliberately doesn't just use `Self(array)`, see the comment |
239 | // on the struct definition for details. |
240 | unsafe { Self::load(&array) } |
241 | } |
242 | |
243 | /// Converts a SIMD vector to an array. |
244 | #[inline ] |
245 | pub const fn to_array(self) -> [T; N] { |
246 | let mut tmp = core::mem::MaybeUninit::uninit(); |
247 | // SAFETY: writing to `tmp` is safe and initializes it. |
248 | // |
249 | // FIXME: We currently use a pointer store instead of `transmute_copy` because `repr(simd)` |
250 | // results in padding for non-power-of-2 vectors (so vectors are larger than arrays). |
251 | // |
252 | // NOTE: This deliberately doesn't just use `self.0`, see the comment |
253 | // on the struct definition for details. |
254 | unsafe { |
255 | self.store(tmp.as_mut_ptr()); |
256 | tmp.assume_init() |
257 | } |
258 | } |
259 | |
260 | /// Converts a slice to a SIMD vector containing `slice[..N]`. |
261 | /// |
262 | /// # Panics |
263 | /// |
264 | /// Panics if the slice's length is less than the vector's `Simd::N`. |
265 | /// Use `load_or_default` for an alternative that does not panic. |
266 | /// |
267 | /// # Example |
268 | /// |
269 | /// ``` |
270 | /// # #![feature (portable_simd)] |
271 | /// # use core::simd::u32x4; |
272 | /// let source = vec![1, 2, 3, 4, 5, 6]; |
273 | /// let v = u32x4::from_slice(&source); |
274 | /// assert_eq!(v.as_array(), &[1, 2, 3, 4]); |
275 | /// ``` |
276 | #[must_use ] |
277 | #[inline ] |
278 | #[track_caller ] |
279 | pub const fn from_slice(slice: &[T]) -> Self { |
280 | assert!( |
281 | slice.len() >= Self::LEN, |
282 | "slice length must be at least the number of elements" |
283 | ); |
284 | // SAFETY: We just checked that the slice contains |
285 | // at least `N` elements. |
286 | unsafe { Self::load(slice.as_ptr().cast()) } |
287 | } |
288 | |
289 | /// Writes a SIMD vector to the first `N` elements of a slice. |
290 | /// |
291 | /// # Panics |
292 | /// |
293 | /// Panics if the slice's length is less than the vector's `Simd::N`. |
294 | /// |
295 | /// # Example |
296 | /// |
297 | /// ``` |
298 | /// # #![feature (portable_simd)] |
299 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
300 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
301 | /// # use simd::u32x4; |
302 | /// let mut dest = vec![0; 6]; |
303 | /// let v = u32x4::from_array([1, 2, 3, 4]); |
304 | /// v.copy_to_slice(&mut dest); |
305 | /// assert_eq!(&dest, &[1, 2, 3, 4, 0, 0]); |
306 | /// ``` |
307 | #[inline ] |
308 | #[track_caller ] |
309 | pub fn copy_to_slice(self, slice: &mut [T]) { |
310 | assert!( |
311 | slice.len() >= Self::LEN, |
312 | "slice length must be at least the number of elements" |
313 | ); |
314 | // SAFETY: We just checked that the slice contains |
315 | // at least `N` elements. |
316 | unsafe { self.store(slice.as_mut_ptr().cast()) } |
317 | } |
318 | |
319 | /// Reads contiguous elements from `slice`. Elements are read so long as they're in-bounds for |
320 | /// the `slice`. Otherwise, the default value for the element type is returned. |
321 | /// |
322 | /// # Examples |
323 | /// ``` |
324 | /// # #![feature (portable_simd)] |
325 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
326 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
327 | /// # use simd::Simd; |
328 | /// let vec: Vec<i32> = vec![10, 11]; |
329 | /// |
330 | /// let result = Simd::<i32, 4>::load_or_default(&vec); |
331 | /// assert_eq!(result, Simd::from_array([10, 11, 0, 0])); |
332 | /// ``` |
333 | #[must_use ] |
334 | #[inline ] |
335 | pub fn load_or_default(slice: &[T]) -> Self |
336 | where |
337 | T: Default, |
338 | { |
339 | Self::load_or(slice, Default::default()) |
340 | } |
341 | |
342 | /// Reads contiguous elements from `slice`. Elements are read so long as they're in-bounds for |
343 | /// the `slice`. Otherwise, the corresponding value from `or` is passed through. |
344 | /// |
345 | /// # Examples |
346 | /// ``` |
347 | /// # #![feature (portable_simd)] |
348 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
349 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
350 | /// # use simd::Simd; |
351 | /// let vec: Vec<i32> = vec![10, 11]; |
352 | /// let or = Simd::from_array([-5, -4, -3, -2]); |
353 | /// |
354 | /// let result = Simd::load_or(&vec, or); |
355 | /// assert_eq!(result, Simd::from_array([10, 11, -3, -2])); |
356 | /// ``` |
357 | #[must_use ] |
358 | #[inline ] |
359 | pub fn load_or(slice: &[T], or: Self) -> Self { |
360 | Self::load_select(slice, Mask::splat(true), or) |
361 | } |
362 | |
363 | /// Reads contiguous elements from `slice`. Each element is read from memory if its |
364 | /// corresponding element in `enable` is `true`. |
365 | /// |
366 | /// When the element is disabled or out of bounds for the slice, that memory location |
367 | /// is not accessed and the corresponding value from `or` is passed through. |
368 | /// |
369 | /// # Examples |
370 | /// ``` |
371 | /// # #![feature (portable_simd)] |
372 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
373 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
374 | /// # use simd::{Simd, Mask}; |
375 | /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
376 | /// let enable = Mask::from_array([true, true, false, true]); |
377 | /// let or = Simd::from_array([-5, -4, -3, -2]); |
378 | /// |
379 | /// let result = Simd::load_select(&vec, enable, or); |
380 | /// assert_eq!(result, Simd::from_array([10, 11, -3, 13])); |
381 | /// ``` |
382 | #[must_use ] |
383 | #[inline ] |
384 | pub fn load_select_or_default(slice: &[T], enable: Mask<<T as SimdElement>::Mask, N>) -> Self |
385 | where |
386 | T: Default, |
387 | { |
388 | Self::load_select(slice, enable, Default::default()) |
389 | } |
390 | |
391 | /// Reads contiguous elements from `slice`. Each element is read from memory if its |
392 | /// corresponding element in `enable` is `true`. |
393 | /// |
394 | /// When the element is disabled or out of bounds for the slice, that memory location |
395 | /// is not accessed and the corresponding value from `or` is passed through. |
396 | /// |
397 | /// # Examples |
398 | /// ``` |
399 | /// # #![feature (portable_simd)] |
400 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
401 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
402 | /// # use simd::{Simd, Mask}; |
403 | /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
404 | /// let enable = Mask::from_array([true, true, false, true]); |
405 | /// let or = Simd::from_array([-5, -4, -3, -2]); |
406 | /// |
407 | /// let result = Simd::load_select(&vec, enable, or); |
408 | /// assert_eq!(result, Simd::from_array([10, 11, -3, 13])); |
409 | /// ``` |
410 | #[must_use ] |
411 | #[inline ] |
412 | pub fn load_select( |
413 | slice: &[T], |
414 | mut enable: Mask<<T as SimdElement>::Mask, N>, |
415 | or: Self, |
416 | ) -> Self { |
417 | enable &= mask_up_to(slice.len()); |
418 | // SAFETY: We performed the bounds check by updating the mask. &[T] is properly aligned to |
419 | // the element. |
420 | unsafe { Self::load_select_ptr(slice.as_ptr(), enable, or) } |
421 | } |
422 | |
423 | /// Reads contiguous elements from `slice`. Each element is read from memory if its |
424 | /// corresponding element in `enable` is `true`. |
425 | /// |
426 | /// When the element is disabled, that memory location is not accessed and the corresponding |
427 | /// value from `or` is passed through. |
428 | #[must_use ] |
429 | #[inline ] |
430 | pub unsafe fn load_select_unchecked( |
431 | slice: &[T], |
432 | enable: Mask<<T as SimdElement>::Mask, N>, |
433 | or: Self, |
434 | ) -> Self { |
435 | let ptr = slice.as_ptr(); |
436 | // SAFETY: The safety of reading elements from `slice` is ensured by the caller. |
437 | unsafe { Self::load_select_ptr(ptr, enable, or) } |
438 | } |
439 | |
440 | /// Reads contiguous elements starting at `ptr`. Each element is read from memory if its |
441 | /// corresponding element in `enable` is `true`. |
442 | /// |
443 | /// When the element is disabled, that memory location is not accessed and the corresponding |
444 | /// value from `or` is passed through. |
445 | #[must_use ] |
446 | #[inline ] |
447 | pub unsafe fn load_select_ptr( |
448 | ptr: *const T, |
449 | enable: Mask<<T as SimdElement>::Mask, N>, |
450 | or: Self, |
451 | ) -> Self { |
452 | // SAFETY: The safety of reading elements through `ptr` is ensured by the caller. |
453 | unsafe { core::intrinsics::simd::simd_masked_load(enable.to_int(), ptr, or) } |
454 | } |
455 | |
456 | /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. |
457 | /// If an index is out-of-bounds, the element is instead selected from the `or` vector. |
458 | /// |
459 | /// # Examples |
460 | /// ``` |
461 | /// # #![feature (portable_simd)] |
462 | /// # use core::simd::Simd; |
463 | /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
464 | /// let idxs = Simd::from_array([9, 3, 0, 5]); // Note the index that is out-of-bounds |
465 | /// let alt = Simd::from_array([-5, -4, -3, -2]); |
466 | /// |
467 | /// let result = Simd::gather_or(&vec, idxs, alt); |
468 | /// assert_eq!(result, Simd::from_array([-5, 13, 10, 15])); |
469 | /// ``` |
470 | #[must_use ] |
471 | #[inline ] |
472 | pub fn gather_or(slice: &[T], idxs: Simd<usize, N>, or: Self) -> Self { |
473 | Self::gather_select(slice, Mask::splat(true), idxs, or) |
474 | } |
475 | |
476 | /// Reads from indices in `slice` to construct a SIMD vector. |
477 | /// If an index is out-of-bounds, the element is set to the default given by `T: Default`. |
478 | /// |
479 | /// # Examples |
480 | /// ``` |
481 | /// # #![feature (portable_simd)] |
482 | /// # use core::simd::Simd; |
483 | /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
484 | /// let idxs = Simd::from_array([9, 3, 0, 5]); // Note the index that is out-of-bounds |
485 | /// |
486 | /// let result = Simd::gather_or_default(&vec, idxs); |
487 | /// assert_eq!(result, Simd::from_array([0, 13, 10, 15])); |
488 | /// ``` |
489 | #[must_use ] |
490 | #[inline ] |
491 | pub fn gather_or_default(slice: &[T], idxs: Simd<usize, N>) -> Self |
492 | where |
493 | T: Default, |
494 | { |
495 | Self::gather_or(slice, idxs, Self::splat(T::default())) |
496 | } |
497 | |
498 | /// Reads from indices in `slice` to construct a SIMD vector. |
499 | /// The mask `enable`s all `true` indices and disables all `false` indices. |
500 | /// If an index is disabled or is out-of-bounds, the element is selected from the `or` vector. |
501 | /// |
502 | /// # Examples |
503 | /// ``` |
504 | /// # #![feature (portable_simd)] |
505 | /// # use core::simd::{Simd, Mask}; |
506 | /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
507 | /// let idxs = Simd::from_array([9, 3, 0, 5]); // Includes an out-of-bounds index |
508 | /// let alt = Simd::from_array([-5, -4, -3, -2]); |
509 | /// let enable = Mask::from_array([true, true, true, false]); // Includes a masked element |
510 | /// |
511 | /// let result = Simd::gather_select(&vec, enable, idxs, alt); |
512 | /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2])); |
513 | /// ``` |
514 | #[must_use ] |
515 | #[inline ] |
516 | pub fn gather_select( |
517 | slice: &[T], |
518 | enable: Mask<isize, N>, |
519 | idxs: Simd<usize, N>, |
520 | or: Self, |
521 | ) -> Self { |
522 | let enable: Mask<isize, N> = enable & idxs.simd_lt(Simd::splat(slice.len())); |
523 | // Safety: We have masked-off out-of-bounds indices. |
524 | unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) } |
525 | } |
526 | |
527 | /// Reads from indices in `slice` to construct a SIMD vector. |
528 | /// The mask `enable`s all `true` indices and disables all `false` indices. |
529 | /// If an index is disabled, the element is selected from the `or` vector. |
530 | /// |
531 | /// # Safety |
532 | /// |
533 | /// Calling this function with an `enable`d out-of-bounds index is *[undefined behavior]* |
534 | /// even if the resulting value is not used. |
535 | /// |
536 | /// # Examples |
537 | /// ``` |
538 | /// # #![feature (portable_simd)] |
539 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
540 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
541 | /// # use simd::{Simd, cmp::SimdPartialOrd, Mask}; |
542 | /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
543 | /// let idxs = Simd::from_array([9, 3, 0, 5]); // Includes an out-of-bounds index |
544 | /// let alt = Simd::from_array([-5, -4, -3, -2]); |
545 | /// let enable = Mask::from_array([true, true, true, false]); // Includes a masked element |
546 | /// // If this mask was used to gather, it would be unsound. Let's fix that. |
547 | /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len())); |
548 | /// |
549 | /// // The out-of-bounds index has been masked, so it's safe to gather now. |
550 | /// let result = unsafe { Simd::gather_select_unchecked(&vec, enable, idxs, alt) }; |
551 | /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2])); |
552 | /// ``` |
553 | /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html |
554 | #[must_use ] |
555 | #[inline ] |
556 | #[cfg_attr (miri, track_caller)] // even without panics, this helps for Miri backtraces |
557 | pub unsafe fn gather_select_unchecked( |
558 | slice: &[T], |
559 | enable: Mask<isize, N>, |
560 | idxs: Simd<usize, N>, |
561 | or: Self, |
562 | ) -> Self { |
563 | let base_ptr = Simd::<*const T, N>::splat(slice.as_ptr()); |
564 | // Ferris forgive me, I have done pointer arithmetic here. |
565 | let ptrs = base_ptr.wrapping_add(idxs); |
566 | // Safety: The caller is responsible for determining the indices are okay to read |
567 | unsafe { Self::gather_select_ptr(ptrs, enable, or) } |
568 | } |
569 | |
570 | /// Read elementwise from pointers into a SIMD vector. |
571 | /// |
572 | /// # Safety |
573 | /// |
574 | /// Each read must satisfy the same conditions as [`core::ptr::read`]. |
575 | /// |
576 | /// # Example |
577 | /// ``` |
578 | /// # #![feature (portable_simd)] |
579 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
580 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
581 | /// # use simd::prelude::*; |
582 | /// let values = [6, 2, 4, 9]; |
583 | /// let offsets = Simd::from_array([1, 0, 0, 3]); |
584 | /// let source = Simd::splat(values.as_ptr()).wrapping_add(offsets); |
585 | /// let gathered = unsafe { Simd::gather_ptr(source) }; |
586 | /// assert_eq!(gathered, Simd::from_array([2, 6, 6, 9])); |
587 | /// ``` |
588 | #[must_use ] |
589 | #[inline ] |
590 | #[cfg_attr (miri, track_caller)] // even without panics, this helps for Miri backtraces |
591 | pub unsafe fn gather_ptr(source: Simd<*const T, N>) -> Self |
592 | where |
593 | T: Default, |
594 | { |
595 | // TODO: add an intrinsic that doesn't use a passthru vector, and remove the T: Default bound |
596 | // Safety: The caller is responsible for upholding all invariants |
597 | unsafe { Self::gather_select_ptr(source, Mask::splat(true), Self::default()) } |
598 | } |
599 | |
600 | /// Conditionally read elementwise from pointers into a SIMD vector. |
601 | /// The mask `enable`s all `true` pointers and disables all `false` pointers. |
602 | /// If a pointer is disabled, the element is selected from the `or` vector, |
603 | /// and no read is performed. |
604 | /// |
605 | /// # Safety |
606 | /// |
607 | /// Enabled elements must satisfy the same conditions as [`core::ptr::read`]. |
608 | /// |
609 | /// # Example |
610 | /// ``` |
611 | /// # #![feature (portable_simd)] |
612 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
613 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
614 | /// # use simd::prelude::*; |
615 | /// let values = [6, 2, 4, 9]; |
616 | /// let enable = Mask::from_array([true, true, false, true]); |
617 | /// let offsets = Simd::from_array([1, 0, 0, 3]); |
618 | /// let source = Simd::splat(values.as_ptr()).wrapping_add(offsets); |
619 | /// let gathered = unsafe { Simd::gather_select_ptr(source, enable, Simd::splat(0)) }; |
620 | /// assert_eq!(gathered, Simd::from_array([2, 6, 0, 9])); |
621 | /// ``` |
622 | #[must_use ] |
623 | #[inline ] |
624 | #[cfg_attr (miri, track_caller)] // even without panics, this helps for Miri backtraces |
625 | pub unsafe fn gather_select_ptr( |
626 | source: Simd<*const T, N>, |
627 | enable: Mask<isize, N>, |
628 | or: Self, |
629 | ) -> Self { |
630 | // Safety: The caller is responsible for upholding all invariants |
631 | unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) } |
632 | } |
633 | |
634 | /// Conditionally write contiguous elements to `slice`. The `enable` mask controls |
635 | /// which elements are written, as long as they're in-bounds of the `slice`. |
636 | /// If the element is disabled or out of bounds, no memory access to that location |
637 | /// is made. |
638 | /// |
639 | /// # Examples |
640 | /// ``` |
641 | /// # #![feature (portable_simd)] |
642 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
643 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
644 | /// # use simd::{Simd, Mask}; |
645 | /// let mut arr = [0i32; 4]; |
646 | /// let write = Simd::from_array([-5, -4, -3, -2]); |
647 | /// let enable = Mask::from_array([false, true, true, true]); |
648 | /// |
649 | /// write.store_select(&mut arr[..3], enable); |
650 | /// assert_eq!(arr, [0, -4, -3, 0]); |
651 | /// ``` |
652 | #[inline ] |
653 | pub fn store_select(self, slice: &mut [T], mut enable: Mask<<T as SimdElement>::Mask, N>) { |
654 | enable &= mask_up_to(slice.len()); |
655 | // SAFETY: We performed the bounds check by updating the mask. &[T] is properly aligned to |
656 | // the element. |
657 | unsafe { self.store_select_ptr(slice.as_mut_ptr(), enable) } |
658 | } |
659 | |
660 | /// Conditionally write contiguous elements to `slice`. The `enable` mask controls |
661 | /// which elements are written. |
662 | /// |
663 | /// # Safety |
664 | /// |
665 | /// Every enabled element must be in bounds for the `slice`. |
666 | /// |
667 | /// # Examples |
668 | /// ``` |
669 | /// # #![feature (portable_simd)] |
670 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
671 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
672 | /// # use simd::{Simd, Mask}; |
673 | /// let mut arr = [0i32; 4]; |
674 | /// let write = Simd::from_array([-5, -4, -3, -2]); |
675 | /// let enable = Mask::from_array([false, true, true, true]); |
676 | /// |
677 | /// unsafe { write.store_select_unchecked(&mut arr, enable) }; |
678 | /// assert_eq!(arr, [0, -4, -3, -2]); |
679 | /// ``` |
680 | #[inline ] |
681 | pub unsafe fn store_select_unchecked( |
682 | self, |
683 | slice: &mut [T], |
684 | enable: Mask<<T as SimdElement>::Mask, N>, |
685 | ) { |
686 | let ptr = slice.as_mut_ptr(); |
687 | // SAFETY: The safety of writing elements in `slice` is ensured by the caller. |
688 | unsafe { self.store_select_ptr(ptr, enable) } |
689 | } |
690 | |
691 | /// Conditionally write contiguous elements starting from `ptr`. |
692 | /// The `enable` mask controls which elements are written. |
693 | /// When disabled, the memory location corresponding to that element is not accessed. |
694 | /// |
695 | /// # Safety |
696 | /// |
697 | /// Memory addresses for element are calculated [`pointer::wrapping_offset`] and |
698 | /// each enabled element must satisfy the same conditions as [`core::ptr::write`]. |
699 | #[inline ] |
700 | pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<<T as SimdElement>::Mask, N>) { |
701 | // SAFETY: The safety of writing elements through `ptr` is ensured by the caller. |
702 | unsafe { core::intrinsics::simd::simd_masked_store(enable.to_int(), ptr, self) } |
703 | } |
704 | |
705 | /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`. |
706 | /// If an index is out-of-bounds, the write is suppressed without panicking. |
707 | /// If two elements in the scattered vector would write to the same index |
708 | /// only the last element is guaranteed to actually be written. |
709 | /// |
710 | /// # Examples |
711 | /// ``` |
712 | /// # #![feature (portable_simd)] |
713 | /// # use core::simd::Simd; |
714 | /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
715 | /// let idxs = Simd::from_array([9, 3, 0, 0]); // Note the duplicate index. |
716 | /// let vals = Simd::from_array([-27, 82, -41, 124]); |
717 | /// |
718 | /// vals.scatter(&mut vec, idxs); // two logical writes means the last wins. |
719 | /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]); |
720 | /// ``` |
721 | #[inline ] |
722 | pub fn scatter(self, slice: &mut [T], idxs: Simd<usize, N>) { |
723 | self.scatter_select(slice, Mask::splat(true), idxs) |
724 | } |
725 | |
726 | /// Writes values from a SIMD vector to multiple potentially discontiguous indices in `slice`. |
727 | /// The mask `enable`s all `true` indices and disables all `false` indices. |
728 | /// If an enabled index is out-of-bounds, the write is suppressed without panicking. |
729 | /// If two enabled elements in the scattered vector would write to the same index, |
730 | /// only the last element is guaranteed to actually be written. |
731 | /// |
732 | /// # Examples |
733 | /// ``` |
734 | /// # #![feature (portable_simd)] |
735 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
736 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
737 | /// # use simd::{Simd, Mask}; |
738 | /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
739 | /// let idxs = Simd::from_array([9, 3, 0, 0]); // Includes an out-of-bounds index |
740 | /// let vals = Simd::from_array([-27, 82, -41, 124]); |
741 | /// let enable = Mask::from_array([true, true, true, false]); // Includes a masked element |
742 | /// |
743 | /// vals.scatter_select(&mut vec, enable, idxs); // The last write is masked, thus omitted. |
744 | /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]); |
745 | /// ``` |
746 | #[inline ] |
747 | pub fn scatter_select(self, slice: &mut [T], enable: Mask<isize, N>, idxs: Simd<usize, N>) { |
748 | let enable: Mask<isize, N> = enable & idxs.simd_lt(Simd::splat(slice.len())); |
749 | // Safety: We have masked-off out-of-bounds indices. |
750 | unsafe { self.scatter_select_unchecked(slice, enable, idxs) } |
751 | } |
752 | |
753 | /// Writes values from a SIMD vector to multiple potentially discontiguous indices in `slice`. |
754 | /// The mask `enable`s all `true` indices and disables all `false` indices. |
755 | /// If two enabled elements in the scattered vector would write to the same index, |
756 | /// only the last element is guaranteed to actually be written. |
757 | /// |
758 | /// # Safety |
759 | /// |
760 | /// Calling this function with an enabled out-of-bounds index is *[undefined behavior]*, |
761 | /// and may lead to memory corruption. |
762 | /// |
763 | /// # Examples |
764 | /// ``` |
765 | /// # #![feature (portable_simd)] |
766 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
767 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
768 | /// # use simd::{Simd, cmp::SimdPartialOrd, Mask}; |
769 | /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
770 | /// let idxs = Simd::from_array([9, 3, 0, 0]); |
771 | /// let vals = Simd::from_array([-27, 82, -41, 124]); |
772 | /// let enable = Mask::from_array([true, true, true, false]); // Masks the final index |
773 | /// // If this mask was used to scatter, it would be unsound. Let's fix that. |
774 | /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len())); |
775 | /// |
776 | /// // We have masked the OOB index, so it's safe to scatter now. |
777 | /// unsafe { vals.scatter_select_unchecked(&mut vec, enable, idxs); } |
778 | /// // The second write to index 0 was masked, thus omitted. |
779 | /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]); |
780 | /// ``` |
781 | /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html |
782 | #[inline ] |
783 | #[cfg_attr (miri, track_caller)] // even without panics, this helps for Miri backtraces |
784 | pub unsafe fn scatter_select_unchecked( |
785 | self, |
786 | slice: &mut [T], |
787 | enable: Mask<isize, N>, |
788 | idxs: Simd<usize, N>, |
789 | ) { |
790 | // Safety: This block works with *mut T derived from &mut 'a [T], |
791 | // which means it is delicate in Rust's borrowing model, circa 2021: |
792 | // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts! |
793 | // Even though this block is largely safe methods, it must be exactly this way |
794 | // to prevent invalidating the raw ptrs while they're live. |
795 | // Thus, entering this block requires all values to use being already ready: |
796 | // 0. idxs we want to write to, which are used to construct the mask. |
797 | // 1. enable, which depends on an initial &'a [T] and the idxs. |
798 | // 2. actual values to scatter (self). |
799 | // 3. &mut [T] which will become our base ptr. |
800 | unsafe { |
801 | // Now Entering ☢️ *mut T Zone |
802 | let base_ptr = Simd::<*mut T, N>::splat(slice.as_mut_ptr()); |
803 | // Ferris forgive me, I have done pointer arithmetic here. |
804 | let ptrs = base_ptr.wrapping_add(idxs); |
805 | // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah |
806 | self.scatter_select_ptr(ptrs, enable); |
807 | // Cleared ☢️ *mut T Zone |
808 | } |
809 | } |
810 | |
811 | /// Write pointers elementwise into a SIMD vector. |
812 | /// |
813 | /// # Safety |
814 | /// |
815 | /// Each write must satisfy the same conditions as [`core::ptr::write`]. |
816 | /// |
817 | /// # Example |
818 | /// ``` |
819 | /// # #![feature (portable_simd)] |
820 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
821 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
822 | /// # use simd::{Simd, ptr::SimdMutPtr}; |
823 | /// let mut values = [0; 4]; |
824 | /// let offset = Simd::from_array([3, 2, 1, 0]); |
825 | /// let ptrs = Simd::splat(values.as_mut_ptr()).wrapping_add(offset); |
826 | /// unsafe { Simd::from_array([6, 3, 5, 7]).scatter_ptr(ptrs); } |
827 | /// assert_eq!(values, [7, 5, 3, 6]); |
828 | /// ``` |
829 | #[inline ] |
830 | #[cfg_attr (miri, track_caller)] // even without panics, this helps for Miri backtraces |
831 | pub unsafe fn scatter_ptr(self, dest: Simd<*mut T, N>) { |
832 | // Safety: The caller is responsible for upholding all invariants |
833 | unsafe { self.scatter_select_ptr(dest, Mask::splat(true)) } |
834 | } |
835 | |
836 | /// Conditionally write pointers elementwise into a SIMD vector. |
837 | /// The mask `enable`s all `true` pointers and disables all `false` pointers. |
838 | /// If a pointer is disabled, the write to its pointee is skipped. |
839 | /// |
840 | /// # Safety |
841 | /// |
842 | /// Enabled pointers must satisfy the same conditions as [`core::ptr::write`]. |
843 | /// |
844 | /// # Example |
845 | /// ``` |
846 | /// # #![feature (portable_simd)] |
847 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
848 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
849 | /// # use simd::{Mask, Simd, ptr::SimdMutPtr}; |
850 | /// let mut values = [0; 4]; |
851 | /// let offset = Simd::from_array([3, 2, 1, 0]); |
852 | /// let ptrs = Simd::splat(values.as_mut_ptr()).wrapping_add(offset); |
853 | /// let enable = Mask::from_array([true, true, false, false]); |
854 | /// unsafe { Simd::from_array([6, 3, 5, 7]).scatter_select_ptr(ptrs, enable); } |
855 | /// assert_eq!(values, [0, 0, 3, 6]); |
856 | /// ``` |
857 | #[inline ] |
858 | #[cfg_attr (miri, track_caller)] // even without panics, this helps for Miri backtraces |
859 | pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, N>, enable: Mask<isize, N>) { |
860 | // Safety: The caller is responsible for upholding all invariants |
861 | unsafe { core::intrinsics::simd::simd_scatter(self, dest, enable.to_int()) } |
862 | } |
863 | } |
864 | |
865 | impl<T, const N: usize> Copy for Simd<T, N> |
866 | where |
867 | LaneCount<N>: SupportedLaneCount, |
868 | T: SimdElement, |
869 | { |
870 | } |
871 | |
872 | impl<T, const N: usize> Clone for Simd<T, N> |
873 | where |
874 | LaneCount<N>: SupportedLaneCount, |
875 | T: SimdElement, |
876 | { |
877 | #[inline ] |
878 | fn clone(&self) -> Self { |
879 | *self |
880 | } |
881 | } |
882 | |
883 | impl<T, const N: usize> Default for Simd<T, N> |
884 | where |
885 | LaneCount<N>: SupportedLaneCount, |
886 | T: SimdElement + Default, |
887 | { |
888 | #[inline ] |
889 | fn default() -> Self { |
890 | Self::splat(T::default()) |
891 | } |
892 | } |
893 | |
894 | impl<T, const N: usize> PartialEq for Simd<T, N> |
895 | where |
896 | LaneCount<N>: SupportedLaneCount, |
897 | T: SimdElement + PartialEq, |
898 | { |
899 | #[inline ] |
900 | fn eq(&self, other: &Self) -> bool { |
901 | // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. |
902 | let mask = unsafe { |
903 | let tfvec: Simd<<T as SimdElement>::Mask, N> = |
904 | core::intrinsics::simd::simd_eq(*self, *other); |
905 | Mask::from_int_unchecked(tfvec) |
906 | }; |
907 | |
908 | // Two vectors are equal if all elements are equal when compared elementwise |
909 | mask.all() |
910 | } |
911 | |
912 | #[allow (clippy::partialeq_ne_impl)] |
913 | #[inline ] |
914 | fn ne(&self, other: &Self) -> bool { |
915 | // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. |
916 | let mask = unsafe { |
917 | let tfvec: Simd<<T as SimdElement>::Mask, N> = |
918 | core::intrinsics::simd::simd_ne(*self, *other); |
919 | Mask::from_int_unchecked(tfvec) |
920 | }; |
921 | |
922 | // Two vectors are non-equal if any elements are non-equal when compared elementwise |
923 | mask.any() |
924 | } |
925 | } |
926 | |
927 | impl<T, const N: usize> PartialOrd for Simd<T, N> |
928 | where |
929 | LaneCount<N>: SupportedLaneCount, |
930 | T: SimdElement + PartialOrd, |
931 | { |
932 | #[inline ] |
933 | fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { |
934 | // TODO use SIMD equality |
935 | self.to_array().partial_cmp(other.as_ref()) |
936 | } |
937 | } |
938 | |
939 | impl<T, const N: usize> Eq for Simd<T, N> |
940 | where |
941 | LaneCount<N>: SupportedLaneCount, |
942 | T: SimdElement + Eq, |
943 | { |
944 | } |
945 | |
946 | impl<T, const N: usize> Ord for Simd<T, N> |
947 | where |
948 | LaneCount<N>: SupportedLaneCount, |
949 | T: SimdElement + Ord, |
950 | { |
951 | #[inline ] |
952 | fn cmp(&self, other: &Self) -> core::cmp::Ordering { |
953 | // TODO use SIMD equality |
954 | self.to_array().cmp(other.as_ref()) |
955 | } |
956 | } |
957 | |
958 | impl<T, const N: usize> core::hash::Hash for Simd<T, N> |
959 | where |
960 | LaneCount<N>: SupportedLaneCount, |
961 | T: SimdElement + core::hash::Hash, |
962 | { |
963 | #[inline ] |
964 | fn hash<H>(&self, state: &mut H) |
965 | where |
966 | H: core::hash::Hasher, |
967 | { |
968 | self.as_array().hash(state) |
969 | } |
970 | } |
971 | |
972 | // array references |
973 | impl<T, const N: usize> AsRef<[T; N]> for Simd<T, N> |
974 | where |
975 | LaneCount<N>: SupportedLaneCount, |
976 | T: SimdElement, |
977 | { |
978 | #[inline ] |
979 | fn as_ref(&self) -> &[T; N] { |
980 | self.as_array() |
981 | } |
982 | } |
983 | |
984 | impl<T, const N: usize> AsMut<[T; N]> for Simd<T, N> |
985 | where |
986 | LaneCount<N>: SupportedLaneCount, |
987 | T: SimdElement, |
988 | { |
989 | #[inline ] |
990 | fn as_mut(&mut self) -> &mut [T; N] { |
991 | self.as_mut_array() |
992 | } |
993 | } |
994 | |
995 | // slice references |
996 | impl<T, const N: usize> AsRef<[T]> for Simd<T, N> |
997 | where |
998 | LaneCount<N>: SupportedLaneCount, |
999 | T: SimdElement, |
1000 | { |
1001 | #[inline ] |
1002 | fn as_ref(&self) -> &[T] { |
1003 | self.as_array() |
1004 | } |
1005 | } |
1006 | |
1007 | impl<T, const N: usize> AsMut<[T]> for Simd<T, N> |
1008 | where |
1009 | LaneCount<N>: SupportedLaneCount, |
1010 | T: SimdElement, |
1011 | { |
1012 | #[inline ] |
1013 | fn as_mut(&mut self) -> &mut [T] { |
1014 | self.as_mut_array() |
1015 | } |
1016 | } |
1017 | |
1018 | // vector/array conversion |
1019 | impl<T, const N: usize> From<[T; N]> for Simd<T, N> |
1020 | where |
1021 | LaneCount<N>: SupportedLaneCount, |
1022 | T: SimdElement, |
1023 | { |
1024 | #[inline ] |
1025 | fn from(array: [T; N]) -> Self { |
1026 | Self::from_array(array) |
1027 | } |
1028 | } |
1029 | |
1030 | impl<T, const N: usize> From<Simd<T, N>> for [T; N] |
1031 | where |
1032 | LaneCount<N>: SupportedLaneCount, |
1033 | T: SimdElement, |
1034 | { |
1035 | #[inline ] |
1036 | fn from(vector: Simd<T, N>) -> Self { |
1037 | vector.to_array() |
1038 | } |
1039 | } |
1040 | |
1041 | impl<T, const N: usize> TryFrom<&[T]> for Simd<T, N> |
1042 | where |
1043 | LaneCount<N>: SupportedLaneCount, |
1044 | T: SimdElement, |
1045 | { |
1046 | type Error = core::array::TryFromSliceError; |
1047 | |
1048 | #[inline ] |
1049 | fn try_from(slice: &[T]) -> Result<Self, core::array::TryFromSliceError> { |
1050 | Ok(Self::from_array(slice.try_into()?)) |
1051 | } |
1052 | } |
1053 | |
1054 | impl<T, const N: usize> TryFrom<&mut [T]> for Simd<T, N> |
1055 | where |
1056 | LaneCount<N>: SupportedLaneCount, |
1057 | T: SimdElement, |
1058 | { |
1059 | type Error = core::array::TryFromSliceError; |
1060 | |
1061 | #[inline ] |
1062 | fn try_from(slice: &mut [T]) -> Result<Self, core::array::TryFromSliceError> { |
1063 | Ok(Self::from_array(slice.try_into()?)) |
1064 | } |
1065 | } |
1066 | |
1067 | mod sealed { |
1068 | pub trait Sealed {} |
1069 | } |
1070 | use sealed::Sealed; |
1071 | |
1072 | /// Marker trait for types that may be used as SIMD vector elements. |
1073 | /// |
1074 | /// # Safety |
1075 | /// This trait, when implemented, asserts the compiler can monomorphize |
1076 | /// `#[repr(simd)]` structs with the marked type as an element. |
1077 | /// Strictly, it is valid to impl if the vector will not be miscompiled. |
1078 | /// Practically, it is user-unfriendly to impl it if the vector won't compile, |
1079 | /// even when no soundness guarantees are broken by allowing the user to try. |
1080 | pub unsafe trait SimdElement: Sealed + Copy { |
1081 | /// The mask element type corresponding to this element type. |
1082 | type Mask: MaskElement; |
1083 | } |
1084 | |
1085 | impl Sealed for u8 {} |
1086 | |
1087 | // Safety: u8 is a valid SIMD element type, and is supported by this API |
1088 | unsafe impl SimdElement for u8 { |
1089 | type Mask = i8; |
1090 | } |
1091 | |
1092 | impl Sealed for u16 {} |
1093 | |
1094 | // Safety: u16 is a valid SIMD element type, and is supported by this API |
1095 | unsafe impl SimdElement for u16 { |
1096 | type Mask = i16; |
1097 | } |
1098 | |
1099 | impl Sealed for u32 {} |
1100 | |
1101 | // Safety: u32 is a valid SIMD element type, and is supported by this API |
1102 | unsafe impl SimdElement for u32 { |
1103 | type Mask = i32; |
1104 | } |
1105 | |
1106 | impl Sealed for u64 {} |
1107 | |
1108 | // Safety: u64 is a valid SIMD element type, and is supported by this API |
1109 | unsafe impl SimdElement for u64 { |
1110 | type Mask = i64; |
1111 | } |
1112 | |
1113 | impl Sealed for usize {} |
1114 | |
1115 | // Safety: usize is a valid SIMD element type, and is supported by this API |
1116 | unsafe impl SimdElement for usize { |
1117 | type Mask = isize; |
1118 | } |
1119 | |
1120 | impl Sealed for i8 {} |
1121 | |
1122 | // Safety: i8 is a valid SIMD element type, and is supported by this API |
1123 | unsafe impl SimdElement for i8 { |
1124 | type Mask = i8; |
1125 | } |
1126 | |
1127 | impl Sealed for i16 {} |
1128 | |
1129 | // Safety: i16 is a valid SIMD element type, and is supported by this API |
1130 | unsafe impl SimdElement for i16 { |
1131 | type Mask = i16; |
1132 | } |
1133 | |
1134 | impl Sealed for i32 {} |
1135 | |
1136 | // Safety: i32 is a valid SIMD element type, and is supported by this API |
1137 | unsafe impl SimdElement for i32 { |
1138 | type Mask = i32; |
1139 | } |
1140 | |
1141 | impl Sealed for i64 {} |
1142 | |
1143 | // Safety: i64 is a valid SIMD element type, and is supported by this API |
1144 | unsafe impl SimdElement for i64 { |
1145 | type Mask = i64; |
1146 | } |
1147 | |
1148 | impl Sealed for isize {} |
1149 | |
1150 | // Safety: isize is a valid SIMD element type, and is supported by this API |
1151 | unsafe impl SimdElement for isize { |
1152 | type Mask = isize; |
1153 | } |
1154 | |
1155 | impl Sealed for f32 {} |
1156 | |
1157 | // Safety: f32 is a valid SIMD element type, and is supported by this API |
1158 | unsafe impl SimdElement for f32 { |
1159 | type Mask = i32; |
1160 | } |
1161 | |
1162 | impl Sealed for f64 {} |
1163 | |
1164 | // Safety: f64 is a valid SIMD element type, and is supported by this API |
1165 | unsafe impl SimdElement for f64 { |
1166 | type Mask = i64; |
1167 | } |
1168 | |
1169 | impl<T> Sealed for *const T {} |
1170 | |
1171 | // Safety: (thin) const pointers are valid SIMD element types, and are supported by this API |
1172 | // |
1173 | // Fat pointers may be supported in the future. |
1174 | unsafe impl<T> SimdElement for *const T |
1175 | where |
1176 | T: core::ptr::Pointee<Metadata = ()>, |
1177 | { |
1178 | type Mask = isize; |
1179 | } |
1180 | |
1181 | impl<T> Sealed for *mut T {} |
1182 | |
1183 | // Safety: (thin) mut pointers are valid SIMD element types, and are supported by this API |
1184 | // |
1185 | // Fat pointers may be supported in the future. |
1186 | unsafe impl<T> SimdElement for *mut T |
1187 | where |
1188 | T: core::ptr::Pointee<Metadata = ()>, |
1189 | { |
1190 | type Mask = isize; |
1191 | } |
1192 | |
1193 | #[inline ] |
1194 | fn lane_indices<const N: usize>() -> Simd<usize, N> |
1195 | where |
1196 | LaneCount<N>: SupportedLaneCount, |
1197 | { |
1198 | let mut index: [usize; N] = [0; N]; |
1199 | for i: usize in 0..N { |
1200 | index[i] = i; |
1201 | } |
1202 | Simd::from_array(index) |
1203 | } |
1204 | |
1205 | #[inline ] |
1206 | fn mask_up_to<M, const N: usize>(len: usize) -> Mask<M, N> |
1207 | where |
1208 | LaneCount<N>: SupportedLaneCount, |
1209 | M: MaskElement, |
1210 | { |
1211 | let index: Simd = lane_indices::<N>(); |
1212 | let max_value: u64 = M::max_unsigned(); |
1213 | macro_rules! case { |
1214 | ($ty:ty) => { |
1215 | if N < <$ty>::MAX as usize && max_value as $ty as u64 == max_value { |
1216 | return index.cast().simd_lt(Simd::splat(len.min(N) as $ty)).cast(); |
1217 | } |
1218 | }; |
1219 | } |
1220 | case!(u8); |
1221 | case!(u16); |
1222 | case!(u32); |
1223 | case!(u64); |
1224 | index.simd_lt(Simd::splat(len)).cast() |
1225 | } |
1226 | |