1 | use crate::simd::{ |
2 | cmp::SimdPartialOrd, |
3 | intrinsics, |
4 | ptr::{SimdConstPtr, SimdMutPtr}, |
5 | LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle, |
6 | }; |
7 | use core::convert::{TryFrom, TryInto}; |
8 | |
9 | /// A SIMD vector with the shape of `[T; N]` but the operations of `T`. |
10 | /// |
11 | /// `Simd<T, N>` supports the operators (+, *, etc.) that `T` does in "elementwise" fashion. |
12 | /// These take the element at each index from the left-hand side and right-hand side, |
13 | /// perform the operation, then return the result in the same index in a vector of equal size. |
14 | /// However, `Simd` differs from normal iteration and normal arrays: |
15 | /// - `Simd<T, N>` executes `N` operations in a single step with no `break`s |
16 | /// - `Simd<T, N>` can have an alignment greater than `T`, for better mechanical sympathy |
17 | /// |
18 | /// By always imposing these constraints on `Simd`, it is easier to compile elementwise operations |
19 | /// into machine instructions that can themselves be executed in parallel. |
20 | /// |
21 | /// ```rust |
22 | /// # #![feature (portable_simd)] |
23 | /// # use core::simd::{Simd}; |
24 | /// # use core::array; |
25 | /// let a: [i32; 4] = [-2, 0, 2, 4]; |
26 | /// let b = [10, 9, 8, 7]; |
27 | /// let sum = array::from_fn(|i| a[i] + b[i]); |
28 | /// let prod = array::from_fn(|i| a[i] * b[i]); |
29 | /// |
30 | /// // `Simd<T, N>` implements `From<[T; N]>` |
31 | /// let (v, w) = (Simd::from(a), Simd::from(b)); |
32 | /// // Which means arrays implement `Into<Simd<T, N>>`. |
33 | /// assert_eq!(v + w, sum.into()); |
34 | /// assert_eq!(v * w, prod.into()); |
35 | /// ``` |
36 | /// |
37 | /// |
38 | /// `Simd` with integer elements treats operators as wrapping, as if `T` was [`Wrapping<T>`]. |
39 | /// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior. |
40 | /// This means there is no warning on overflows, even in "debug" builds. |
41 | /// For most applications where `Simd` is appropriate, it is "not a bug" to wrap, |
42 | /// and even "debug builds" are unlikely to tolerate the loss of performance. |
43 | /// You may want to consider using explicitly checked arithmetic if such is required. |
44 | /// Division by zero on integers still causes a panic, so |
45 | /// you may want to consider using `f32` or `f64` if that is unacceptable. |
46 | /// |
47 | /// [`Wrapping<T>`]: core::num::Wrapping |
48 | /// |
49 | /// # Layout |
50 | /// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), with a greater alignment. |
51 | /// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`. |
52 | /// Thus it is sound to [`transmute`] `Simd<T, N>` to `[T; N]` and should optimize to "zero cost", |
53 | /// but the reverse transmutation may require a copy the compiler cannot simply elide. |
54 | /// |
55 | /// # ABI "Features" |
56 | /// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed and returned via memory, |
57 | /// not SIMD registers, except as an optimization. Using `#[inline]` on functions that accept |
58 | /// `Simd<T, N>` or return it is recommended, at the cost of code generation time, as |
59 | /// inlining SIMD-using functions can omit a large function prolog or epilog and thus |
60 | /// improve both speed and code size. The need for this may be corrected in the future. |
61 | /// |
62 | /// Using `#[inline(always)]` still requires additional care. |
63 | /// |
64 | /// # Safe SIMD with Unsafe Rust |
65 | /// |
66 | /// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code. |
67 | /// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from. |
68 | /// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations, |
69 | /// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`. |
70 | /// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with |
71 | /// [`read_unaligned`] and [`write_unaligned`]. This is because: |
72 | /// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment) |
73 | /// - `Simd<T, N>` is often read from or written to [`[T]`](slice) and other types aligned to `T` |
74 | /// - combining these actions violates the `unsafe` contract and explodes the program into |
75 | /// a puff of **undefined behavior** |
76 | /// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned |
77 | /// if it sees the optimization |
78 | /// - most contemporary processors with "aligned" and "unaligned" read and write instructions |
79 | /// exhibit no performance difference if the "unaligned" variant is aligned at runtime |
80 | /// |
81 | /// Less obligations mean unaligned reads and writes are less likely to make the program unsound, |
82 | /// and may be just as fast as stricter alternatives. |
83 | /// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for |
84 | /// converting `[T]` to `[Simd<T, N>]`, and allows soundly operating on an aligned SIMD body, |
85 | /// but it may cost more time when handling the scalar head and tail. |
86 | /// If these are not enough, it is most ideal to design data structures to be already aligned |
87 | /// to `mem::align_of::<Simd<T, N>>()` before using `unsafe` Rust to read or write. |
88 | /// Other ways to compensate for these facts, like materializing `Simd` to or from an array first, |
89 | /// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`]. |
90 | /// |
91 | /// [`transmute`]: core::mem::transmute |
92 | /// [raw pointers]: pointer |
93 | /// [`read_unaligned`]: pointer::read_unaligned |
94 | /// [`write_unaligned`]: pointer::write_unaligned |
95 | /// [`read`]: pointer::read |
96 | /// [`write`]: pointer::write |
97 | /// [as_simd]: slice::as_simd |
98 | // |
99 | // NOTE: Accessing the inner array directly in any way (e.g. by using the `.0` field syntax) or |
100 | // directly constructing an instance of the type (i.e. `let vector = Simd(array)`) should be |
101 | // avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also |
102 | // causes rustc to emit illegal LLVM IR in some cases. |
103 | #[repr (simd)] |
104 | pub struct Simd<T, const N: usize>([T; N]) |
105 | where |
106 | LaneCount<N>: SupportedLaneCount, |
107 | T: SimdElement; |
108 | |
109 | impl<T, const N: usize> Simd<T, N> |
110 | where |
111 | LaneCount<N>: SupportedLaneCount, |
112 | T: SimdElement, |
113 | { |
114 | /// Number of elements in this vector. |
115 | pub const LEN: usize = N; |
116 | |
117 | /// Returns the number of elements in this SIMD vector. |
118 | /// |
119 | /// # Examples |
120 | /// |
121 | /// ``` |
122 | /// # #![feature (portable_simd)] |
123 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
124 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
125 | /// # use simd::u32x4; |
126 | /// let v = u32x4::splat(0); |
127 | /// assert_eq!(v.len(), 4); |
128 | /// ``` |
129 | #[inline ] |
130 | #[allow (clippy::len_without_is_empty)] |
131 | pub const fn len(&self) -> usize { |
132 | Self::LEN |
133 | } |
134 | |
135 | /// Constructs a new SIMD vector with all elements set to the given value. |
136 | /// |
137 | /// # Examples |
138 | /// |
139 | /// ``` |
140 | /// # #![feature (portable_simd)] |
141 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
142 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
143 | /// # use simd::u32x4; |
144 | /// let v = u32x4::splat(8); |
145 | /// assert_eq!(v.as_array(), &[8, 8, 8, 8]); |
146 | /// ``` |
147 | #[inline ] |
148 | pub fn splat(value: T) -> Self { |
149 | // This is preferred over `[value; N]`, since it's explicitly a splat: |
150 | // https://github.com/rust-lang/rust/issues/97804 |
151 | struct Splat; |
152 | impl<const N: usize> Swizzle<N> for Splat { |
153 | const INDEX: [usize; N] = [0; N]; |
154 | } |
155 | Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value])) |
156 | } |
157 | |
158 | /// Returns an array reference containing the entire SIMD vector. |
159 | /// |
160 | /// # Examples |
161 | /// |
162 | /// ``` |
163 | /// # #![feature (portable_simd)] |
164 | /// # use core::simd::{Simd, u64x4}; |
165 | /// let v: u64x4 = Simd::from_array([0, 1, 2, 3]); |
166 | /// assert_eq!(v.as_array(), &[0, 1, 2, 3]); |
167 | /// ``` |
168 | #[inline ] |
169 | pub const fn as_array(&self) -> &[T; N] { |
170 | // SAFETY: `Simd<T, N>` is just an overaligned `[T; N]` with |
171 | // potential padding at the end, so pointer casting to a |
172 | // `&[T; N]` is safe. |
173 | // |
174 | // NOTE: This deliberately doesn't just use `&self.0`, see the comment |
175 | // on the struct definition for details. |
176 | unsafe { &*(self as *const Self as *const [T; N]) } |
177 | } |
178 | |
179 | /// Returns a mutable array reference containing the entire SIMD vector. |
180 | #[inline ] |
181 | pub fn as_mut_array(&mut self) -> &mut [T; N] { |
182 | // SAFETY: `Simd<T, N>` is just an overaligned `[T; N]` with |
183 | // potential padding at the end, so pointer casting to a |
184 | // `&mut [T; N]` is safe. |
185 | // |
186 | // NOTE: This deliberately doesn't just use `&mut self.0`, see the comment |
187 | // on the struct definition for details. |
188 | unsafe { &mut *(self as *mut Self as *mut [T; N]) } |
189 | } |
190 | |
191 | /// Load a vector from an array of `T`. |
192 | /// |
193 | /// This function is necessary since `repr(simd)` has padding for non-power-of-2 vectors (at the time of writing). |
194 | /// With padding, `read_unaligned` will read past the end of an array of N elements. |
195 | /// |
196 | /// # Safety |
197 | /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read_unaligned`. |
198 | #[inline ] |
199 | const unsafe fn load(ptr: *const [T; N]) -> Self { |
200 | // There are potentially simpler ways to write this function, but this should result in |
201 | // LLVM `load <N x T>` |
202 | |
203 | let mut tmp = core::mem::MaybeUninit::<Self>::uninit(); |
204 | // SAFETY: `Simd<T, N>` always contains `N` elements of type `T`. It may have padding |
205 | // which does not need to be initialized. The safety of reading `ptr` is ensured by the |
206 | // caller. |
207 | unsafe { |
208 | core::ptr::copy_nonoverlapping(ptr, tmp.as_mut_ptr().cast(), 1); |
209 | tmp.assume_init() |
210 | } |
211 | } |
212 | |
213 | /// Store a vector to an array of `T`. |
214 | /// |
215 | /// See `load` as to why this function is necessary. |
216 | /// |
217 | /// # Safety |
218 | /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write_unaligned`. |
219 | #[inline ] |
220 | const unsafe fn store(self, ptr: *mut [T; N]) { |
221 | // There are potentially simpler ways to write this function, but this should result in |
222 | // LLVM `store <N x T>` |
223 | |
224 | // Creating a temporary helps LLVM turn the memcpy into a store. |
225 | let tmp = self; |
226 | // SAFETY: `Simd<T, N>` always contains `N` elements of type `T`. The safety of writing |
227 | // `ptr` is ensured by the caller. |
228 | unsafe { core::ptr::copy_nonoverlapping(tmp.as_array(), ptr, 1) } |
229 | } |
230 | |
231 | /// Converts an array to a SIMD vector. |
232 | #[inline ] |
233 | pub const fn from_array(array: [T; N]) -> Self { |
234 | // SAFETY: `&array` is safe to read. |
235 | // |
236 | // FIXME: We currently use a pointer load instead of `transmute_copy` because `repr(simd)` |
237 | // results in padding for non-power-of-2 vectors (so vectors are larger than arrays). |
238 | // |
239 | // NOTE: This deliberately doesn't just use `Self(array)`, see the comment |
240 | // on the struct definition for details. |
241 | unsafe { Self::load(&array) } |
242 | } |
243 | |
244 | /// Converts a SIMD vector to an array. |
245 | #[inline ] |
246 | pub const fn to_array(self) -> [T; N] { |
247 | let mut tmp = core::mem::MaybeUninit::uninit(); |
248 | // SAFETY: writing to `tmp` is safe and initializes it. |
249 | // |
250 | // FIXME: We currently use a pointer store instead of `transmute_copy` because `repr(simd)` |
251 | // results in padding for non-power-of-2 vectors (so vectors are larger than arrays). |
252 | // |
253 | // NOTE: This deliberately doesn't just use `self.0`, see the comment |
254 | // on the struct definition for details. |
255 | unsafe { |
256 | self.store(tmp.as_mut_ptr()); |
257 | tmp.assume_init() |
258 | } |
259 | } |
260 | |
261 | /// Converts a slice to a SIMD vector containing `slice[..N]`. |
262 | /// |
263 | /// # Panics |
264 | /// |
265 | /// Panics if the slice's length is less than the vector's `Simd::N`. |
266 | /// |
267 | /// # Example |
268 | /// |
269 | /// ``` |
270 | /// # #![feature (portable_simd)] |
271 | /// # use core::simd::u32x4; |
272 | /// let source = vec![1, 2, 3, 4, 5, 6]; |
273 | /// let v = u32x4::from_slice(&source); |
274 | /// assert_eq!(v.as_array(), &[1, 2, 3, 4]); |
275 | /// ``` |
276 | #[must_use ] |
277 | #[inline ] |
278 | #[track_caller ] |
279 | pub const fn from_slice(slice: &[T]) -> Self { |
280 | assert!( |
281 | slice.len() >= Self::LEN, |
282 | "slice length must be at least the number of elements" |
283 | ); |
284 | // SAFETY: We just checked that the slice contains |
285 | // at least `N` elements. |
286 | unsafe { Self::load(slice.as_ptr().cast()) } |
287 | } |
288 | |
289 | /// Writes a SIMD vector to the first `N` elements of a slice. |
290 | /// |
291 | /// # Panics |
292 | /// |
293 | /// Panics if the slice's length is less than the vector's `Simd::N`. |
294 | /// |
295 | /// # Example |
296 | /// |
297 | /// ``` |
298 | /// # #![feature (portable_simd)] |
299 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
300 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
301 | /// # use simd::u32x4; |
302 | /// let mut dest = vec![0; 6]; |
303 | /// let v = u32x4::from_array([1, 2, 3, 4]); |
304 | /// v.copy_to_slice(&mut dest); |
305 | /// assert_eq!(&dest, &[1, 2, 3, 4, 0, 0]); |
306 | /// ``` |
307 | #[inline ] |
308 | #[track_caller ] |
309 | pub fn copy_to_slice(self, slice: &mut [T]) { |
310 | assert!( |
311 | slice.len() >= Self::LEN, |
312 | "slice length must be at least the number of elements" |
313 | ); |
314 | // SAFETY: We just checked that the slice contains |
315 | // at least `N` elements. |
316 | unsafe { self.store(slice.as_mut_ptr().cast()) } |
317 | } |
318 | |
319 | /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. |
320 | /// If an index is out-of-bounds, the element is instead selected from the `or` vector. |
321 | /// |
322 | /// # Examples |
323 | /// ``` |
324 | /// # #![feature (portable_simd)] |
325 | /// # use core::simd::Simd; |
326 | /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
327 | /// let idxs = Simd::from_array([9, 3, 0, 5]); // Note the index that is out-of-bounds |
328 | /// let alt = Simd::from_array([-5, -4, -3, -2]); |
329 | /// |
330 | /// let result = Simd::gather_or(&vec, idxs, alt); |
331 | /// assert_eq!(result, Simd::from_array([-5, 13, 10, 15])); |
332 | /// ``` |
333 | #[must_use ] |
334 | #[inline ] |
335 | pub fn gather_or(slice: &[T], idxs: Simd<usize, N>, or: Self) -> Self { |
336 | Self::gather_select(slice, Mask::splat(true), idxs, or) |
337 | } |
338 | |
339 | /// Reads from indices in `slice` to construct a SIMD vector. |
340 | /// If an index is out-of-bounds, the element is set to the default given by `T: Default`. |
341 | /// |
342 | /// # Examples |
343 | /// ``` |
344 | /// # #![feature (portable_simd)] |
345 | /// # use core::simd::Simd; |
346 | /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
347 | /// let idxs = Simd::from_array([9, 3, 0, 5]); // Note the index that is out-of-bounds |
348 | /// |
349 | /// let result = Simd::gather_or_default(&vec, idxs); |
350 | /// assert_eq!(result, Simd::from_array([0, 13, 10, 15])); |
351 | /// ``` |
352 | #[must_use ] |
353 | #[inline ] |
354 | pub fn gather_or_default(slice: &[T], idxs: Simd<usize, N>) -> Self |
355 | where |
356 | T: Default, |
357 | { |
358 | Self::gather_or(slice, idxs, Self::splat(T::default())) |
359 | } |
360 | |
361 | /// Reads from indices in `slice` to construct a SIMD vector. |
362 | /// The mask `enable`s all `true` indices and disables all `false` indices. |
363 | /// If an index is disabled or is out-of-bounds, the element is selected from the `or` vector. |
364 | /// |
365 | /// # Examples |
366 | /// ``` |
367 | /// # #![feature (portable_simd)] |
368 | /// # use core::simd::{Simd, Mask}; |
369 | /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
370 | /// let idxs = Simd::from_array([9, 3, 0, 5]); // Includes an out-of-bounds index |
371 | /// let alt = Simd::from_array([-5, -4, -3, -2]); |
372 | /// let enable = Mask::from_array([true, true, true, false]); // Includes a masked element |
373 | /// |
374 | /// let result = Simd::gather_select(&vec, enable, idxs, alt); |
375 | /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2])); |
376 | /// ``` |
377 | #[must_use ] |
378 | #[inline ] |
379 | pub fn gather_select( |
380 | slice: &[T], |
381 | enable: Mask<isize, N>, |
382 | idxs: Simd<usize, N>, |
383 | or: Self, |
384 | ) -> Self { |
385 | let enable: Mask<isize, N> = enable & idxs.simd_lt(Simd::splat(slice.len())); |
386 | // Safety: We have masked-off out-of-bounds indices. |
387 | unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) } |
388 | } |
389 | |
390 | /// Reads from indices in `slice` to construct a SIMD vector. |
391 | /// The mask `enable`s all `true` indices and disables all `false` indices. |
392 | /// If an index is disabled, the element is selected from the `or` vector. |
393 | /// |
394 | /// # Safety |
395 | /// |
396 | /// Calling this function with an `enable`d out-of-bounds index is *[undefined behavior]* |
397 | /// even if the resulting value is not used. |
398 | /// |
399 | /// # Examples |
400 | /// ``` |
401 | /// # #![feature (portable_simd)] |
402 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
403 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
404 | /// # use simd::{Simd, cmp::SimdPartialOrd, Mask}; |
405 | /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
406 | /// let idxs = Simd::from_array([9, 3, 0, 5]); // Includes an out-of-bounds index |
407 | /// let alt = Simd::from_array([-5, -4, -3, -2]); |
408 | /// let enable = Mask::from_array([true, true, true, false]); // Includes a masked element |
409 | /// // If this mask was used to gather, it would be unsound. Let's fix that. |
410 | /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len())); |
411 | /// |
412 | /// // The out-of-bounds index has been masked, so it's safe to gather now. |
413 | /// let result = unsafe { Simd::gather_select_unchecked(&vec, enable, idxs, alt) }; |
414 | /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2])); |
415 | /// ``` |
416 | /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html |
417 | #[must_use ] |
418 | #[inline ] |
419 | #[cfg_attr (miri, track_caller)] // even without panics, this helps for Miri backtraces |
420 | pub unsafe fn gather_select_unchecked( |
421 | slice: &[T], |
422 | enable: Mask<isize, N>, |
423 | idxs: Simd<usize, N>, |
424 | or: Self, |
425 | ) -> Self { |
426 | let base_ptr = Simd::<*const T, N>::splat(slice.as_ptr()); |
427 | // Ferris forgive me, I have done pointer arithmetic here. |
428 | let ptrs = base_ptr.wrapping_add(idxs); |
429 | // Safety: The caller is responsible for determining the indices are okay to read |
430 | unsafe { Self::gather_select_ptr(ptrs, enable, or) } |
431 | } |
432 | |
433 | /// Read elementwise from pointers into a SIMD vector. |
434 | /// |
435 | /// # Safety |
436 | /// |
437 | /// Each read must satisfy the same conditions as [`core::ptr::read`]. |
438 | /// |
439 | /// # Example |
440 | /// ``` |
441 | /// # #![feature (portable_simd)] |
442 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
443 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
444 | /// # use simd::prelude::*; |
445 | /// let values = [6, 2, 4, 9]; |
446 | /// let offsets = Simd::from_array([1, 0, 0, 3]); |
447 | /// let source = Simd::splat(values.as_ptr()).wrapping_add(offsets); |
448 | /// let gathered = unsafe { Simd::gather_ptr(source) }; |
449 | /// assert_eq!(gathered, Simd::from_array([2, 6, 6, 9])); |
450 | /// ``` |
451 | #[must_use ] |
452 | #[inline ] |
453 | #[cfg_attr (miri, track_caller)] // even without panics, this helps for Miri backtraces |
454 | pub unsafe fn gather_ptr(source: Simd<*const T, N>) -> Self |
455 | where |
456 | T: Default, |
457 | { |
458 | // TODO: add an intrinsic that doesn't use a passthru vector, and remove the T: Default bound |
459 | // Safety: The caller is responsible for upholding all invariants |
460 | unsafe { Self::gather_select_ptr(source, Mask::splat(true), Self::default()) } |
461 | } |
462 | |
463 | /// Conditionally read elementwise from pointers into a SIMD vector. |
464 | /// The mask `enable`s all `true` pointers and disables all `false` pointers. |
465 | /// If a pointer is disabled, the element is selected from the `or` vector, |
466 | /// and no read is performed. |
467 | /// |
468 | /// # Safety |
469 | /// |
470 | /// Enabled elements must satisfy the same conditions as [`core::ptr::read`]. |
471 | /// |
472 | /// # Example |
473 | /// ``` |
474 | /// # #![feature (portable_simd)] |
475 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
476 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
477 | /// # use simd::prelude::*; |
478 | /// let values = [6, 2, 4, 9]; |
479 | /// let enable = Mask::from_array([true, true, false, true]); |
480 | /// let offsets = Simd::from_array([1, 0, 0, 3]); |
481 | /// let source = Simd::splat(values.as_ptr()).wrapping_add(offsets); |
482 | /// let gathered = unsafe { Simd::gather_select_ptr(source, enable, Simd::splat(0)) }; |
483 | /// assert_eq!(gathered, Simd::from_array([2, 6, 0, 9])); |
484 | /// ``` |
485 | #[must_use ] |
486 | #[inline ] |
487 | #[cfg_attr (miri, track_caller)] // even without panics, this helps for Miri backtraces |
488 | pub unsafe fn gather_select_ptr( |
489 | source: Simd<*const T, N>, |
490 | enable: Mask<isize, N>, |
491 | or: Self, |
492 | ) -> Self { |
493 | // Safety: The caller is responsible for upholding all invariants |
494 | unsafe { intrinsics::simd_gather(or, source, enable.to_int()) } |
495 | } |
496 | |
497 | /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`. |
498 | /// If an index is out-of-bounds, the write is suppressed without panicking. |
499 | /// If two elements in the scattered vector would write to the same index |
500 | /// only the last element is guaranteed to actually be written. |
501 | /// |
502 | /// # Examples |
503 | /// ``` |
504 | /// # #![feature (portable_simd)] |
505 | /// # use core::simd::Simd; |
506 | /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
507 | /// let idxs = Simd::from_array([9, 3, 0, 0]); // Note the duplicate index. |
508 | /// let vals = Simd::from_array([-27, 82, -41, 124]); |
509 | /// |
510 | /// vals.scatter(&mut vec, idxs); // two logical writes means the last wins. |
511 | /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]); |
512 | /// ``` |
513 | #[inline ] |
514 | pub fn scatter(self, slice: &mut [T], idxs: Simd<usize, N>) { |
515 | self.scatter_select(slice, Mask::splat(true), idxs) |
516 | } |
517 | |
518 | /// Writes values from a SIMD vector to multiple potentially discontiguous indices in `slice`. |
519 | /// The mask `enable`s all `true` indices and disables all `false` indices. |
520 | /// If an enabled index is out-of-bounds, the write is suppressed without panicking. |
521 | /// If two enabled elements in the scattered vector would write to the same index, |
522 | /// only the last element is guaranteed to actually be written. |
523 | /// |
524 | /// # Examples |
525 | /// ``` |
526 | /// # #![feature (portable_simd)] |
527 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
528 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
529 | /// # use simd::{Simd, Mask}; |
530 | /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
531 | /// let idxs = Simd::from_array([9, 3, 0, 0]); // Includes an out-of-bounds index |
532 | /// let vals = Simd::from_array([-27, 82, -41, 124]); |
533 | /// let enable = Mask::from_array([true, true, true, false]); // Includes a masked element |
534 | /// |
535 | /// vals.scatter_select(&mut vec, enable, idxs); // The last write is masked, thus omitted. |
536 | /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]); |
537 | /// ``` |
538 | #[inline ] |
539 | pub fn scatter_select(self, slice: &mut [T], enable: Mask<isize, N>, idxs: Simd<usize, N>) { |
540 | let enable: Mask<isize, N> = enable & idxs.simd_lt(Simd::splat(slice.len())); |
541 | // Safety: We have masked-off out-of-bounds indices. |
542 | unsafe { self.scatter_select_unchecked(slice, enable, idxs) } |
543 | } |
544 | |
545 | /// Writes values from a SIMD vector to multiple potentially discontiguous indices in `slice`. |
546 | /// The mask `enable`s all `true` indices and disables all `false` indices. |
547 | /// If two enabled elements in the scattered vector would write to the same index, |
548 | /// only the last element is guaranteed to actually be written. |
549 | /// |
550 | /// # Safety |
551 | /// |
552 | /// Calling this function with an enabled out-of-bounds index is *[undefined behavior]*, |
553 | /// and may lead to memory corruption. |
554 | /// |
555 | /// # Examples |
556 | /// ``` |
557 | /// # #![feature (portable_simd)] |
558 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
559 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
560 | /// # use simd::{Simd, cmp::SimdPartialOrd, Mask}; |
561 | /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; |
562 | /// let idxs = Simd::from_array([9, 3, 0, 0]); |
563 | /// let vals = Simd::from_array([-27, 82, -41, 124]); |
564 | /// let enable = Mask::from_array([true, true, true, false]); // Masks the final index |
565 | /// // If this mask was used to scatter, it would be unsound. Let's fix that. |
566 | /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len())); |
567 | /// |
568 | /// // We have masked the OOB index, so it's safe to scatter now. |
569 | /// unsafe { vals.scatter_select_unchecked(&mut vec, enable, idxs); } |
570 | /// // The second write to index 0 was masked, thus omitted. |
571 | /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]); |
572 | /// ``` |
573 | /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html |
574 | #[inline ] |
575 | #[cfg_attr (miri, track_caller)] // even without panics, this helps for Miri backtraces |
576 | pub unsafe fn scatter_select_unchecked( |
577 | self, |
578 | slice: &mut [T], |
579 | enable: Mask<isize, N>, |
580 | idxs: Simd<usize, N>, |
581 | ) { |
582 | // Safety: This block works with *mut T derived from &mut 'a [T], |
583 | // which means it is delicate in Rust's borrowing model, circa 2021: |
584 | // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts! |
585 | // Even though this block is largely safe methods, it must be exactly this way |
586 | // to prevent invalidating the raw ptrs while they're live. |
587 | // Thus, entering this block requires all values to use being already ready: |
588 | // 0. idxs we want to write to, which are used to construct the mask. |
589 | // 1. enable, which depends on an initial &'a [T] and the idxs. |
590 | // 2. actual values to scatter (self). |
591 | // 3. &mut [T] which will become our base ptr. |
592 | unsafe { |
593 | // Now Entering ☢️ *mut T Zone |
594 | let base_ptr = Simd::<*mut T, N>::splat(slice.as_mut_ptr()); |
595 | // Ferris forgive me, I have done pointer arithmetic here. |
596 | let ptrs = base_ptr.wrapping_add(idxs); |
597 | // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah |
598 | self.scatter_select_ptr(ptrs, enable); |
599 | // Cleared ☢️ *mut T Zone |
600 | } |
601 | } |
602 | |
603 | /// Write pointers elementwise into a SIMD vector. |
604 | /// |
605 | /// # Safety |
606 | /// |
607 | /// Each write must satisfy the same conditions as [`core::ptr::write`]. |
608 | /// |
609 | /// # Example |
610 | /// ``` |
611 | /// # #![feature (portable_simd)] |
612 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
613 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
614 | /// # use simd::{Simd, ptr::SimdMutPtr}; |
615 | /// let mut values = [0; 4]; |
616 | /// let offset = Simd::from_array([3, 2, 1, 0]); |
617 | /// let ptrs = Simd::splat(values.as_mut_ptr()).wrapping_add(offset); |
618 | /// unsafe { Simd::from_array([6, 3, 5, 7]).scatter_ptr(ptrs); } |
619 | /// assert_eq!(values, [7, 5, 3, 6]); |
620 | /// ``` |
621 | #[inline ] |
622 | #[cfg_attr (miri, track_caller)] // even without panics, this helps for Miri backtraces |
623 | pub unsafe fn scatter_ptr(self, dest: Simd<*mut T, N>) { |
624 | // Safety: The caller is responsible for upholding all invariants |
625 | unsafe { self.scatter_select_ptr(dest, Mask::splat(true)) } |
626 | } |
627 | |
628 | /// Conditionally write pointers elementwise into a SIMD vector. |
629 | /// The mask `enable`s all `true` pointers and disables all `false` pointers. |
630 | /// If a pointer is disabled, the write to its pointee is skipped. |
631 | /// |
632 | /// # Safety |
633 | /// |
634 | /// Enabled pointers must satisfy the same conditions as [`core::ptr::write`]. |
635 | /// |
636 | /// # Example |
637 | /// ``` |
638 | /// # #![feature (portable_simd)] |
639 | /// # #[cfg (feature = "as_crate" )] use core_simd::simd; |
640 | /// # #[cfg (not(feature = "as_crate" ))] use core::simd; |
641 | /// # use simd::{Mask, Simd, ptr::SimdMutPtr}; |
642 | /// let mut values = [0; 4]; |
643 | /// let offset = Simd::from_array([3, 2, 1, 0]); |
644 | /// let ptrs = Simd::splat(values.as_mut_ptr()).wrapping_add(offset); |
645 | /// let enable = Mask::from_array([true, true, false, false]); |
646 | /// unsafe { Simd::from_array([6, 3, 5, 7]).scatter_select_ptr(ptrs, enable); } |
647 | /// assert_eq!(values, [0, 0, 3, 6]); |
648 | /// ``` |
649 | #[inline ] |
650 | #[cfg_attr (miri, track_caller)] // even without panics, this helps for Miri backtraces |
651 | pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, N>, enable: Mask<isize, N>) { |
652 | // Safety: The caller is responsible for upholding all invariants |
653 | unsafe { intrinsics::simd_scatter(self, dest, enable.to_int()) } |
654 | } |
655 | } |
656 | |
657 | impl<T, const N: usize> Copy for Simd<T, N> |
658 | where |
659 | LaneCount<N>: SupportedLaneCount, |
660 | T: SimdElement, |
661 | { |
662 | } |
663 | |
664 | impl<T, const N: usize> Clone for Simd<T, N> |
665 | where |
666 | LaneCount<N>: SupportedLaneCount, |
667 | T: SimdElement, |
668 | { |
669 | #[inline ] |
670 | fn clone(&self) -> Self { |
671 | *self |
672 | } |
673 | } |
674 | |
675 | impl<T, const N: usize> Default for Simd<T, N> |
676 | where |
677 | LaneCount<N>: SupportedLaneCount, |
678 | T: SimdElement + Default, |
679 | { |
680 | #[inline ] |
681 | fn default() -> Self { |
682 | Self::splat(T::default()) |
683 | } |
684 | } |
685 | |
686 | impl<T, const N: usize> PartialEq for Simd<T, N> |
687 | where |
688 | LaneCount<N>: SupportedLaneCount, |
689 | T: SimdElement + PartialEq, |
690 | { |
691 | #[inline ] |
692 | fn eq(&self, other: &Self) -> bool { |
693 | // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. |
694 | let mask = unsafe { |
695 | let tfvec: Simd<<T as SimdElement>::Mask, N> = intrinsics::simd_eq(*self, *other); |
696 | Mask::from_int_unchecked(tfvec) |
697 | }; |
698 | |
699 | // Two vectors are equal if all elements are equal when compared elementwise |
700 | mask.all() |
701 | } |
702 | |
703 | #[allow (clippy::partialeq_ne_impl)] |
704 | #[inline ] |
705 | fn ne(&self, other: &Self) -> bool { |
706 | // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. |
707 | let mask = unsafe { |
708 | let tfvec: Simd<<T as SimdElement>::Mask, N> = intrinsics::simd_ne(*self, *other); |
709 | Mask::from_int_unchecked(tfvec) |
710 | }; |
711 | |
712 | // Two vectors are non-equal if any elements are non-equal when compared elementwise |
713 | mask.any() |
714 | } |
715 | } |
716 | |
717 | impl<T, const N: usize> PartialOrd for Simd<T, N> |
718 | where |
719 | LaneCount<N>: SupportedLaneCount, |
720 | T: SimdElement + PartialOrd, |
721 | { |
722 | #[inline ] |
723 | fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { |
724 | // TODO use SIMD equality |
725 | self.to_array().partial_cmp(other.as_ref()) |
726 | } |
727 | } |
728 | |
729 | impl<T, const N: usize> Eq for Simd<T, N> |
730 | where |
731 | LaneCount<N>: SupportedLaneCount, |
732 | T: SimdElement + Eq, |
733 | { |
734 | } |
735 | |
736 | impl<T, const N: usize> Ord for Simd<T, N> |
737 | where |
738 | LaneCount<N>: SupportedLaneCount, |
739 | T: SimdElement + Ord, |
740 | { |
741 | #[inline ] |
742 | fn cmp(&self, other: &Self) -> core::cmp::Ordering { |
743 | // TODO use SIMD equality |
744 | self.to_array().cmp(other.as_ref()) |
745 | } |
746 | } |
747 | |
748 | impl<T, const N: usize> core::hash::Hash for Simd<T, N> |
749 | where |
750 | LaneCount<N>: SupportedLaneCount, |
751 | T: SimdElement + core::hash::Hash, |
752 | { |
753 | #[inline ] |
754 | fn hash<H>(&self, state: &mut H) |
755 | where |
756 | H: core::hash::Hasher, |
757 | { |
758 | self.as_array().hash(state) |
759 | } |
760 | } |
761 | |
762 | // array references |
763 | impl<T, const N: usize> AsRef<[T; N]> for Simd<T, N> |
764 | where |
765 | LaneCount<N>: SupportedLaneCount, |
766 | T: SimdElement, |
767 | { |
768 | #[inline ] |
769 | fn as_ref(&self) -> &[T; N] { |
770 | self.as_array() |
771 | } |
772 | } |
773 | |
774 | impl<T, const N: usize> AsMut<[T; N]> for Simd<T, N> |
775 | where |
776 | LaneCount<N>: SupportedLaneCount, |
777 | T: SimdElement, |
778 | { |
779 | #[inline ] |
780 | fn as_mut(&mut self) -> &mut [T; N] { |
781 | self.as_mut_array() |
782 | } |
783 | } |
784 | |
785 | // slice references |
786 | impl<T, const N: usize> AsRef<[T]> for Simd<T, N> |
787 | where |
788 | LaneCount<N>: SupportedLaneCount, |
789 | T: SimdElement, |
790 | { |
791 | #[inline ] |
792 | fn as_ref(&self) -> &[T] { |
793 | self.as_array() |
794 | } |
795 | } |
796 | |
797 | impl<T, const N: usize> AsMut<[T]> for Simd<T, N> |
798 | where |
799 | LaneCount<N>: SupportedLaneCount, |
800 | T: SimdElement, |
801 | { |
802 | #[inline ] |
803 | fn as_mut(&mut self) -> &mut [T] { |
804 | self.as_mut_array() |
805 | } |
806 | } |
807 | |
808 | // vector/array conversion |
809 | impl<T, const N: usize> From<[T; N]> for Simd<T, N> |
810 | where |
811 | LaneCount<N>: SupportedLaneCount, |
812 | T: SimdElement, |
813 | { |
814 | #[inline ] |
815 | fn from(array: [T; N]) -> Self { |
816 | Self::from_array(array) |
817 | } |
818 | } |
819 | |
820 | impl<T, const N: usize> From<Simd<T, N>> for [T; N] |
821 | where |
822 | LaneCount<N>: SupportedLaneCount, |
823 | T: SimdElement, |
824 | { |
825 | #[inline ] |
826 | fn from(vector: Simd<T, N>) -> Self { |
827 | vector.to_array() |
828 | } |
829 | } |
830 | |
831 | impl<T, const N: usize> TryFrom<&[T]> for Simd<T, N> |
832 | where |
833 | LaneCount<N>: SupportedLaneCount, |
834 | T: SimdElement, |
835 | { |
836 | type Error = core::array::TryFromSliceError; |
837 | |
838 | #[inline ] |
839 | fn try_from(slice: &[T]) -> Result<Self, core::array::TryFromSliceError> { |
840 | Ok(Self::from_array(slice.try_into()?)) |
841 | } |
842 | } |
843 | |
844 | impl<T, const N: usize> TryFrom<&mut [T]> for Simd<T, N> |
845 | where |
846 | LaneCount<N>: SupportedLaneCount, |
847 | T: SimdElement, |
848 | { |
849 | type Error = core::array::TryFromSliceError; |
850 | |
851 | #[inline ] |
852 | fn try_from(slice: &mut [T]) -> Result<Self, core::array::TryFromSliceError> { |
853 | Ok(Self::from_array(slice.try_into()?)) |
854 | } |
855 | } |
856 | |
857 | mod sealed { |
858 | pub trait Sealed {} |
859 | } |
860 | use sealed::Sealed; |
861 | |
862 | /// Marker trait for types that may be used as SIMD vector elements. |
863 | /// |
864 | /// # Safety |
865 | /// This trait, when implemented, asserts the compiler can monomorphize |
866 | /// `#[repr(simd)]` structs with the marked type as an element. |
867 | /// Strictly, it is valid to impl if the vector will not be miscompiled. |
868 | /// Practically, it is user-unfriendly to impl it if the vector won't compile, |
869 | /// even when no soundness guarantees are broken by allowing the user to try. |
870 | pub unsafe trait SimdElement: Sealed + Copy { |
871 | /// The mask element type corresponding to this element type. |
872 | type Mask: MaskElement; |
873 | } |
874 | |
875 | impl Sealed for u8 {} |
876 | |
877 | // Safety: u8 is a valid SIMD element type, and is supported by this API |
878 | unsafe impl SimdElement for u8 { |
879 | type Mask = i8; |
880 | } |
881 | |
882 | impl Sealed for u16 {} |
883 | |
884 | // Safety: u16 is a valid SIMD element type, and is supported by this API |
885 | unsafe impl SimdElement for u16 { |
886 | type Mask = i16; |
887 | } |
888 | |
889 | impl Sealed for u32 {} |
890 | |
891 | // Safety: u32 is a valid SIMD element type, and is supported by this API |
892 | unsafe impl SimdElement for u32 { |
893 | type Mask = i32; |
894 | } |
895 | |
896 | impl Sealed for u64 {} |
897 | |
898 | // Safety: u64 is a valid SIMD element type, and is supported by this API |
899 | unsafe impl SimdElement for u64 { |
900 | type Mask = i64; |
901 | } |
902 | |
903 | impl Sealed for usize {} |
904 | |
905 | // Safety: usize is a valid SIMD element type, and is supported by this API |
906 | unsafe impl SimdElement for usize { |
907 | type Mask = isize; |
908 | } |
909 | |
910 | impl Sealed for i8 {} |
911 | |
912 | // Safety: i8 is a valid SIMD element type, and is supported by this API |
913 | unsafe impl SimdElement for i8 { |
914 | type Mask = i8; |
915 | } |
916 | |
917 | impl Sealed for i16 {} |
918 | |
919 | // Safety: i16 is a valid SIMD element type, and is supported by this API |
920 | unsafe impl SimdElement for i16 { |
921 | type Mask = i16; |
922 | } |
923 | |
924 | impl Sealed for i32 {} |
925 | |
926 | // Safety: i32 is a valid SIMD element type, and is supported by this API |
927 | unsafe impl SimdElement for i32 { |
928 | type Mask = i32; |
929 | } |
930 | |
931 | impl Sealed for i64 {} |
932 | |
933 | // Safety: i64 is a valid SIMD element type, and is supported by this API |
934 | unsafe impl SimdElement for i64 { |
935 | type Mask = i64; |
936 | } |
937 | |
938 | impl Sealed for isize {} |
939 | |
940 | // Safety: isize is a valid SIMD element type, and is supported by this API |
941 | unsafe impl SimdElement for isize { |
942 | type Mask = isize; |
943 | } |
944 | |
945 | impl Sealed for f32 {} |
946 | |
947 | // Safety: f32 is a valid SIMD element type, and is supported by this API |
948 | unsafe impl SimdElement for f32 { |
949 | type Mask = i32; |
950 | } |
951 | |
952 | impl Sealed for f64 {} |
953 | |
954 | // Safety: f64 is a valid SIMD element type, and is supported by this API |
955 | unsafe impl SimdElement for f64 { |
956 | type Mask = i64; |
957 | } |
958 | |
959 | impl<T> Sealed for *const T {} |
960 | |
961 | // Safety: (thin) const pointers are valid SIMD element types, and are supported by this API |
962 | // |
963 | // Fat pointers may be supported in the future. |
964 | unsafe impl<T> SimdElement for *const T |
965 | where |
966 | T: core::ptr::Pointee<Metadata = ()>, |
967 | { |
968 | type Mask = isize; |
969 | } |
970 | |
971 | impl<T> Sealed for *mut T {} |
972 | |
973 | // Safety: (thin) mut pointers are valid SIMD element types, and are supported by this API |
974 | // |
975 | // Fat pointers may be supported in the future. |
976 | unsafe impl<T> SimdElement for *mut T |
977 | where |
978 | T: core::ptr::Pointee<Metadata = ()>, |
979 | { |
980 | type Mask = isize; |
981 | } |
982 | |