| 1 | use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN}; |
| 2 | use arrayref::{array_mut_ref, array_ref}; |
| 3 | |
| 4 | cfg_if::cfg_if! { |
| 5 | if #[cfg(any(target_arch = "x86" , target_arch = "x86_64" ))] { |
| 6 | cfg_if::cfg_if! { |
| 7 | if #[cfg(blake3_avx512_ffi)] { |
| 8 | pub const MAX_SIMD_DEGREE: usize = 16; |
| 9 | } else { |
| 10 | pub const MAX_SIMD_DEGREE: usize = 8; |
| 11 | } |
| 12 | } |
| 13 | } else if #[cfg(blake3_neon)] { |
| 14 | pub const MAX_SIMD_DEGREE: usize = 4; |
| 15 | } else { |
| 16 | pub const MAX_SIMD_DEGREE: usize = 1; |
| 17 | } |
| 18 | } |
| 19 | |
| 20 | // There are some places where we want a static size that's equal to the |
| 21 | // MAX_SIMD_DEGREE, but also at least 2. Constant contexts aren't currently |
| 22 | // allowed to use cmp::max, so we have to hardcode this additional constant |
| 23 | // value. Get rid of this once cmp::max is a const fn. |
| 24 | cfg_if::cfg_if! { |
| 25 | if #[cfg(any(target_arch = "x86" , target_arch = "x86_64" ))] { |
| 26 | cfg_if::cfg_if! { |
| 27 | if #[cfg(blake3_avx512_ffi)] { |
| 28 | pub const MAX_SIMD_DEGREE_OR_2: usize = 16; |
| 29 | } else { |
| 30 | pub const MAX_SIMD_DEGREE_OR_2: usize = 8; |
| 31 | } |
| 32 | } |
| 33 | } else if #[cfg(blake3_neon)] { |
| 34 | pub const MAX_SIMD_DEGREE_OR_2: usize = 4; |
| 35 | } else { |
| 36 | pub const MAX_SIMD_DEGREE_OR_2: usize = 2; |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | #[derive (Clone, Copy, Debug)] |
| 41 | pub enum Platform { |
| 42 | Portable, |
| 43 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 44 | SSE2, |
| 45 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 46 | SSE41, |
| 47 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 48 | AVX2, |
| 49 | #[cfg (blake3_avx512_ffi)] |
| 50 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 51 | AVX512, |
| 52 | #[cfg (blake3_neon)] |
| 53 | NEON, |
| 54 | } |
| 55 | |
| 56 | impl Platform { |
| 57 | #[allow (unreachable_code)] |
| 58 | pub fn detect() -> Self { |
| 59 | #[cfg (miri)] |
| 60 | { |
| 61 | return Platform::Portable; |
| 62 | } |
| 63 | |
| 64 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 65 | { |
| 66 | #[cfg (blake3_avx512_ffi)] |
| 67 | { |
| 68 | if avx512_detected() { |
| 69 | return Platform::AVX512; |
| 70 | } |
| 71 | } |
| 72 | if avx2_detected() { |
| 73 | return Platform::AVX2; |
| 74 | } |
| 75 | if sse41_detected() { |
| 76 | return Platform::SSE41; |
| 77 | } |
| 78 | if sse2_detected() { |
| 79 | return Platform::SSE2; |
| 80 | } |
| 81 | } |
| 82 | // We don't use dynamic feature detection for NEON. If the "neon" |
| 83 | // feature is on, NEON is assumed to be supported. |
| 84 | #[cfg (blake3_neon)] |
| 85 | { |
| 86 | return Platform::NEON; |
| 87 | } |
| 88 | Platform::Portable |
| 89 | } |
| 90 | |
| 91 | pub fn simd_degree(&self) -> usize { |
| 92 | let degree = match self { |
| 93 | Platform::Portable => 1, |
| 94 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 95 | Platform::SSE2 => 4, |
| 96 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 97 | Platform::SSE41 => 4, |
| 98 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 99 | Platform::AVX2 => 8, |
| 100 | #[cfg (blake3_avx512_ffi)] |
| 101 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 102 | Platform::AVX512 => 16, |
| 103 | #[cfg (blake3_neon)] |
| 104 | Platform::NEON => 4, |
| 105 | }; |
| 106 | debug_assert!(degree <= MAX_SIMD_DEGREE); |
| 107 | degree |
| 108 | } |
| 109 | |
| 110 | pub fn compress_in_place( |
| 111 | &self, |
| 112 | cv: &mut CVWords, |
| 113 | block: &[u8; BLOCK_LEN], |
| 114 | block_len: u8, |
| 115 | counter: u64, |
| 116 | flags: u8, |
| 117 | ) { |
| 118 | match self { |
| 119 | Platform::Portable => portable::compress_in_place(cv, block, block_len, counter, flags), |
| 120 | // Safe because detect() checked for platform support. |
| 121 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 122 | Platform::SSE2 => unsafe { |
| 123 | crate::sse2::compress_in_place(cv, block, block_len, counter, flags) |
| 124 | }, |
| 125 | // Safe because detect() checked for platform support. |
| 126 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 127 | Platform::SSE41 | Platform::AVX2 => unsafe { |
| 128 | crate::sse41::compress_in_place(cv, block, block_len, counter, flags) |
| 129 | }, |
| 130 | // Safe because detect() checked for platform support. |
| 131 | #[cfg (blake3_avx512_ffi)] |
| 132 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 133 | Platform::AVX512 => unsafe { |
| 134 | crate::avx512::compress_in_place(cv, block, block_len, counter, flags) |
| 135 | }, |
| 136 | // No NEON compress_in_place() implementation yet. |
| 137 | #[cfg (blake3_neon)] |
| 138 | Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags), |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | pub fn compress_xof( |
| 143 | &self, |
| 144 | cv: &CVWords, |
| 145 | block: &[u8; BLOCK_LEN], |
| 146 | block_len: u8, |
| 147 | counter: u64, |
| 148 | flags: u8, |
| 149 | ) -> [u8; 64] { |
| 150 | match self { |
| 151 | Platform::Portable => portable::compress_xof(cv, block, block_len, counter, flags), |
| 152 | // Safe because detect() checked for platform support. |
| 153 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 154 | Platform::SSE2 => unsafe { |
| 155 | crate::sse2::compress_xof(cv, block, block_len, counter, flags) |
| 156 | }, |
| 157 | // Safe because detect() checked for platform support. |
| 158 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 159 | Platform::SSE41 | Platform::AVX2 => unsafe { |
| 160 | crate::sse41::compress_xof(cv, block, block_len, counter, flags) |
| 161 | }, |
| 162 | // Safe because detect() checked for platform support. |
| 163 | #[cfg (blake3_avx512_ffi)] |
| 164 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 165 | Platform::AVX512 => unsafe { |
| 166 | crate::avx512::compress_xof(cv, block, block_len, counter, flags) |
| 167 | }, |
| 168 | // No NEON compress_xof() implementation yet. |
| 169 | #[cfg (blake3_neon)] |
| 170 | Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags), |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | // IMPLEMENTATION NOTE |
| 175 | // =================== |
| 176 | // hash_many() applies two optimizations. The critically important |
| 177 | // optimization is the high-performance parallel SIMD hashing mode, |
| 178 | // described in detail in the spec. This more than doubles throughput per |
| 179 | // thread. Another optimization is keeping the state vectors transposed |
| 180 | // from block to block within a chunk. When state vectors are transposed |
| 181 | // after every block, there's a small but measurable performance loss. |
| 182 | // Compressing chunks with a dedicated loop avoids this. |
| 183 | |
| 184 | pub fn hash_many<const N: usize>( |
| 185 | &self, |
| 186 | inputs: &[&[u8; N]], |
| 187 | key: &CVWords, |
| 188 | counter: u64, |
| 189 | increment_counter: IncrementCounter, |
| 190 | flags: u8, |
| 191 | flags_start: u8, |
| 192 | flags_end: u8, |
| 193 | out: &mut [u8], |
| 194 | ) { |
| 195 | match self { |
| 196 | Platform::Portable => portable::hash_many( |
| 197 | inputs, |
| 198 | key, |
| 199 | counter, |
| 200 | increment_counter, |
| 201 | flags, |
| 202 | flags_start, |
| 203 | flags_end, |
| 204 | out, |
| 205 | ), |
| 206 | // Safe because detect() checked for platform support. |
| 207 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 208 | Platform::SSE2 => unsafe { |
| 209 | crate::sse2::hash_many( |
| 210 | inputs, |
| 211 | key, |
| 212 | counter, |
| 213 | increment_counter, |
| 214 | flags, |
| 215 | flags_start, |
| 216 | flags_end, |
| 217 | out, |
| 218 | ) |
| 219 | }, |
| 220 | // Safe because detect() checked for platform support. |
| 221 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 222 | Platform::SSE41 => unsafe { |
| 223 | crate::sse41::hash_many( |
| 224 | inputs, |
| 225 | key, |
| 226 | counter, |
| 227 | increment_counter, |
| 228 | flags, |
| 229 | flags_start, |
| 230 | flags_end, |
| 231 | out, |
| 232 | ) |
| 233 | }, |
| 234 | // Safe because detect() checked for platform support. |
| 235 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 236 | Platform::AVX2 => unsafe { |
| 237 | crate::avx2::hash_many( |
| 238 | inputs, |
| 239 | key, |
| 240 | counter, |
| 241 | increment_counter, |
| 242 | flags, |
| 243 | flags_start, |
| 244 | flags_end, |
| 245 | out, |
| 246 | ) |
| 247 | }, |
| 248 | // Safe because detect() checked for platform support. |
| 249 | #[cfg (blake3_avx512_ffi)] |
| 250 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 251 | Platform::AVX512 => unsafe { |
| 252 | crate::avx512::hash_many( |
| 253 | inputs, |
| 254 | key, |
| 255 | counter, |
| 256 | increment_counter, |
| 257 | flags, |
| 258 | flags_start, |
| 259 | flags_end, |
| 260 | out, |
| 261 | ) |
| 262 | }, |
| 263 | // Assumed to be safe if the "neon" feature is on. |
| 264 | #[cfg (blake3_neon)] |
| 265 | Platform::NEON => unsafe { |
| 266 | crate::neon::hash_many( |
| 267 | inputs, |
| 268 | key, |
| 269 | counter, |
| 270 | increment_counter, |
| 271 | flags, |
| 272 | flags_start, |
| 273 | flags_end, |
| 274 | out, |
| 275 | ) |
| 276 | }, |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | pub fn xof_many( |
| 281 | &self, |
| 282 | cv: &CVWords, |
| 283 | block: &[u8; BLOCK_LEN], |
| 284 | block_len: u8, |
| 285 | mut counter: u64, |
| 286 | flags: u8, |
| 287 | out: &mut [u8], |
| 288 | ) { |
| 289 | debug_assert_eq!(0, out.len() % BLOCK_LEN, "whole blocks only" ); |
| 290 | if out.is_empty() { |
| 291 | // The current assembly implementation always outputs at least 1 block. |
| 292 | return; |
| 293 | } |
| 294 | match self { |
| 295 | // Safe because detect() checked for platform support. |
| 296 | #[cfg (blake3_avx512_ffi)] |
| 297 | #[cfg (unix)] |
| 298 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 299 | Platform::AVX512 => unsafe { |
| 300 | crate::avx512::xof_many(cv, block, block_len, counter, flags, out) |
| 301 | }, |
| 302 | _ => { |
| 303 | // For platforms without an optimized xof_many, fall back to a loop over |
| 304 | // compress_xof. This is still faster than portable code. |
| 305 | for out_block in out.chunks_exact_mut(BLOCK_LEN) { |
| 306 | // TODO: Use array_chunks_mut here once that's stable. |
| 307 | let out_array: &mut [u8; BLOCK_LEN] = out_block.try_into().unwrap(); |
| 308 | *out_array = self.compress_xof(cv, block, block_len, counter, flags); |
| 309 | counter += 1; |
| 310 | } |
| 311 | } |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | // Explicit platform constructors, for benchmarks. |
| 316 | |
| 317 | pub fn portable() -> Self { |
| 318 | Self::Portable |
| 319 | } |
| 320 | |
| 321 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 322 | pub fn sse2() -> Option<Self> { |
| 323 | if sse2_detected() { |
| 324 | Some(Self::SSE2) |
| 325 | } else { |
| 326 | None |
| 327 | } |
| 328 | } |
| 329 | |
| 330 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 331 | pub fn sse41() -> Option<Self> { |
| 332 | if sse41_detected() { |
| 333 | Some(Self::SSE41) |
| 334 | } else { |
| 335 | None |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 340 | pub fn avx2() -> Option<Self> { |
| 341 | if avx2_detected() { |
| 342 | Some(Self::AVX2) |
| 343 | } else { |
| 344 | None |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | #[cfg (blake3_avx512_ffi)] |
| 349 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 350 | pub fn avx512() -> Option<Self> { |
| 351 | if avx512_detected() { |
| 352 | Some(Self::AVX512) |
| 353 | } else { |
| 354 | None |
| 355 | } |
| 356 | } |
| 357 | |
| 358 | #[cfg (blake3_neon)] |
| 359 | pub fn neon() -> Option<Self> { |
| 360 | // Assumed to be safe if the "neon" feature is on. |
| 361 | Some(Self::NEON) |
| 362 | } |
| 363 | } |
| 364 | |
| 365 | // Note that AVX-512 is divided into multiple featuresets, and we use two of |
| 366 | // them, F and VL. |
| 367 | #[cfg (blake3_avx512_ffi)] |
| 368 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 369 | #[inline (always)] |
| 370 | #[allow (unreachable_code)] |
| 371 | pub fn avx512_detected() -> bool { |
| 372 | if cfg!(miri) { |
| 373 | return false; |
| 374 | } |
| 375 | |
| 376 | // A testing-only short-circuit. |
| 377 | if cfg!(feature = "no_avx512" ) { |
| 378 | return false; |
| 379 | } |
| 380 | // Static check, e.g. for building with target-cpu=native. |
| 381 | #[cfg (all(target_feature = "avx512f" , target_feature = "avx512vl" ))] |
| 382 | { |
| 383 | return true; |
| 384 | } |
| 385 | // Dynamic check, if std is enabled. |
| 386 | #[cfg (feature = "std" )] |
| 387 | { |
| 388 | if is_x86_feature_detected!("avx512f" ) && is_x86_feature_detected!("avx512vl" ) { |
| 389 | return true; |
| 390 | } |
| 391 | } |
| 392 | false |
| 393 | } |
| 394 | |
| 395 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 396 | #[inline (always)] |
| 397 | #[allow (unreachable_code)] |
| 398 | pub fn avx2_detected() -> bool { |
| 399 | if cfg!(miri) { |
| 400 | return false; |
| 401 | } |
| 402 | |
| 403 | // A testing-only short-circuit. |
| 404 | if cfg!(feature = "no_avx2" ) { |
| 405 | return false; |
| 406 | } |
| 407 | // Static check, e.g. for building with target-cpu=native. |
| 408 | #[cfg (target_feature = "avx2" )] |
| 409 | { |
| 410 | return true; |
| 411 | } |
| 412 | // Dynamic check, if std is enabled. |
| 413 | #[cfg (feature = "std" )] |
| 414 | { |
| 415 | if is_x86_feature_detected!("avx2" ) { |
| 416 | return true; |
| 417 | } |
| 418 | } |
| 419 | false |
| 420 | } |
| 421 | |
| 422 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 423 | #[inline (always)] |
| 424 | #[allow (unreachable_code)] |
| 425 | pub fn sse41_detected() -> bool { |
| 426 | if cfg!(miri) { |
| 427 | return false; |
| 428 | } |
| 429 | |
| 430 | // A testing-only short-circuit. |
| 431 | if cfg!(feature = "no_sse41" ) { |
| 432 | return false; |
| 433 | } |
| 434 | // Static check, e.g. for building with target-cpu=native. |
| 435 | #[cfg (target_feature = "sse4.1" )] |
| 436 | { |
| 437 | return true; |
| 438 | } |
| 439 | // Dynamic check, if std is enabled. |
| 440 | #[cfg (feature = "std" )] |
| 441 | { |
| 442 | if is_x86_feature_detected!("sse4.1" ) { |
| 443 | return true; |
| 444 | } |
| 445 | } |
| 446 | false |
| 447 | } |
| 448 | |
| 449 | #[cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 450 | #[inline (always)] |
| 451 | #[allow (unreachable_code)] |
| 452 | pub fn sse2_detected() -> bool { |
| 453 | if cfg!(miri) { |
| 454 | return false; |
| 455 | } |
| 456 | |
| 457 | // A testing-only short-circuit. |
| 458 | if cfg!(feature = "no_sse2" ) { |
| 459 | return false; |
| 460 | } |
| 461 | // Static check, e.g. for building with target-cpu=native. |
| 462 | #[cfg (target_feature = "sse2" )] |
| 463 | { |
| 464 | return true; |
| 465 | } |
| 466 | // Dynamic check, if std is enabled. |
| 467 | #[cfg (feature = "std" )] |
| 468 | { |
| 469 | if is_x86_feature_detected!("sse2" ) { |
| 470 | return true; |
| 471 | } |
| 472 | } |
| 473 | false |
| 474 | } |
| 475 | |
| 476 | #[inline (always)] |
| 477 | pub fn words_from_le_bytes_32(bytes: &[u8; 32]) -> [u32; 8] { |
| 478 | let mut out: [u32; 8] = [0; 8]; |
| 479 | out[0] = u32::from_le_bytes(*array_ref!(bytes, 0 * 4, 4)); |
| 480 | out[1] = u32::from_le_bytes(*array_ref!(bytes, 1 * 4, 4)); |
| 481 | out[2] = u32::from_le_bytes(*array_ref!(bytes, 2 * 4, 4)); |
| 482 | out[3] = u32::from_le_bytes(*array_ref!(bytes, 3 * 4, 4)); |
| 483 | out[4] = u32::from_le_bytes(*array_ref!(bytes, 4 * 4, 4)); |
| 484 | out[5] = u32::from_le_bytes(*array_ref!(bytes, 5 * 4, 4)); |
| 485 | out[6] = u32::from_le_bytes(*array_ref!(bytes, 6 * 4, 4)); |
| 486 | out[7] = u32::from_le_bytes(*array_ref!(bytes, 7 * 4, 4)); |
| 487 | out |
| 488 | } |
| 489 | |
| 490 | #[inline (always)] |
| 491 | pub fn words_from_le_bytes_64(bytes: &[u8; 64]) -> [u32; 16] { |
| 492 | let mut out: [u32; 16] = [0; 16]; |
| 493 | out[0] = u32::from_le_bytes(*array_ref!(bytes, 0 * 4, 4)); |
| 494 | out[1] = u32::from_le_bytes(*array_ref!(bytes, 1 * 4, 4)); |
| 495 | out[2] = u32::from_le_bytes(*array_ref!(bytes, 2 * 4, 4)); |
| 496 | out[3] = u32::from_le_bytes(*array_ref!(bytes, 3 * 4, 4)); |
| 497 | out[4] = u32::from_le_bytes(*array_ref!(bytes, 4 * 4, 4)); |
| 498 | out[5] = u32::from_le_bytes(*array_ref!(bytes, 5 * 4, 4)); |
| 499 | out[6] = u32::from_le_bytes(*array_ref!(bytes, 6 * 4, 4)); |
| 500 | out[7] = u32::from_le_bytes(*array_ref!(bytes, 7 * 4, 4)); |
| 501 | out[8] = u32::from_le_bytes(*array_ref!(bytes, 8 * 4, 4)); |
| 502 | out[9] = u32::from_le_bytes(*array_ref!(bytes, 9 * 4, 4)); |
| 503 | out[10] = u32::from_le_bytes(*array_ref!(bytes, 10 * 4, 4)); |
| 504 | out[11] = u32::from_le_bytes(*array_ref!(bytes, 11 * 4, 4)); |
| 505 | out[12] = u32::from_le_bytes(*array_ref!(bytes, 12 * 4, 4)); |
| 506 | out[13] = u32::from_le_bytes(*array_ref!(bytes, 13 * 4, 4)); |
| 507 | out[14] = u32::from_le_bytes(*array_ref!(bytes, 14 * 4, 4)); |
| 508 | out[15] = u32::from_le_bytes(*array_ref!(bytes, 15 * 4, 4)); |
| 509 | out |
| 510 | } |
| 511 | |
| 512 | #[inline (always)] |
| 513 | pub fn le_bytes_from_words_32(words: &[u32; 8]) -> [u8; 32] { |
| 514 | let mut out: [u8; 32] = [0; 32]; |
| 515 | *array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes(); |
| 516 | *array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes(); |
| 517 | *array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes(); |
| 518 | *array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes(); |
| 519 | *array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes(); |
| 520 | *array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes(); |
| 521 | *array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes(); |
| 522 | *array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes(); |
| 523 | out |
| 524 | } |
| 525 | |
| 526 | #[inline (always)] |
| 527 | pub fn le_bytes_from_words_64(words: &[u32; 16]) -> [u8; 64] { |
| 528 | let mut out: [u8; 64] = [0; 64]; |
| 529 | *array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes(); |
| 530 | *array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes(); |
| 531 | *array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes(); |
| 532 | *array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes(); |
| 533 | *array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes(); |
| 534 | *array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes(); |
| 535 | *array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes(); |
| 536 | *array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes(); |
| 537 | *array_mut_ref!(out, 8 * 4, 4) = words[8].to_le_bytes(); |
| 538 | *array_mut_ref!(out, 9 * 4, 4) = words[9].to_le_bytes(); |
| 539 | *array_mut_ref!(out, 10 * 4, 4) = words[10].to_le_bytes(); |
| 540 | *array_mut_ref!(out, 11 * 4, 4) = words[11].to_le_bytes(); |
| 541 | *array_mut_ref!(out, 12 * 4, 4) = words[12].to_le_bytes(); |
| 542 | *array_mut_ref!(out, 13 * 4, 4) = words[13].to_le_bytes(); |
| 543 | *array_mut_ref!(out, 14 * 4, 4) = words[14].to_le_bytes(); |
| 544 | *array_mut_ref!(out, 15 * 4, 4) = words[15].to_le_bytes(); |
| 545 | out |
| 546 | } |
| 547 | |