mod.rs source code [crates/exr/src/compression/mod.rs]

1
2	//! Contains the compression attribute definition
3	//! and methods to compress and decompress data.
4
5
6	// private modules make non-breaking changes easier
7	mod zip;
8	mod rle;
9	mod piz;
10	mod pxr24;
11	mod b44;
12
13
14	use std::convert::TryInto;
15	use std::mem::size_of;
16	use half::f16;
17	use crate::meta::attribute::{IntegerBounds, SampleType, ChannelList};
18	use crate::error::{Result, Error, usize_to_i32};
19	use crate::meta::header::Header;
20
21
22	/// A byte vector.
23	pub type ByteVec = Vec<u8>;
24
25	/// A byte slice.
26	pub type Bytes<'s> = &'s [u8];
27
28	/// Specifies which compression method to use.
29	/// Use uncompressed data for fastest loading and writing speeds.
30	/// Use RLE compression for fast loading and writing with slight memory savings.
31	/// Use ZIP compression for slow processing with large memory savings.
32	#[derive(Debug, Clone, Copy, PartialEq)]
33	pub enum Compression {
34
35	/// Store uncompressed values.
36	/// Produces large files that can be read and written very quickly.
37	/// Consider using RLE instead, as it provides some compression with almost equivalent speed.
38	Uncompressed,
39
40	/// Produces slightly smaller files
41	/// that can still be read and written rather quickly.
42	/// The compressed file size is usually between 60 and 75 percent of the uncompressed size.
43	/// Works best for images with large flat areas, such as masks and abstract graphics.
44	/// This compression method is lossless.
45	RLE,
46
47	/// Uses ZIP compression to compress each line. Slowly produces small images
48	/// which can be read with moderate speed. This compression method is lossless.
49	/// Might be slightly faster but larger than `ZIP16´.
50	ZIP1, // TODO ZIP { individual_lines: bool, compression_level: Option<u8> } // TODO specify zip compression level?
51
52	/// Uses ZIP compression to compress blocks of 16 lines. Slowly produces small images
53	/// which can be read with moderate speed. This compression method is lossless.
54	/// Might be slightly slower but smaller than `ZIP1´.
55	ZIP16, // TODO collapse with ZIP1
56
57	/// PIZ compression works well for noisy and natural images. Works better with larger tiles.
58	/// Only supported for flat images, but not for deep data.
59	/// This compression method is lossless.
60	// A wavelet transform is applied to the pixel data, and the result is Huffman-
61	// encoded. This scheme tends to provide the best compression ratio for the types of
62	// images that are typically processed at Industrial Light & Magic. Files are
63	// compressed and decompressed at roughly the same speed. For photographic
64	// images with film grain, the files are reduced to between 35 and 55 percent of their
65	// uncompressed size.
66	// PIZ compression works well for scan-line based files, and also for tiled files with
67	// large tiles, but small tiles do not shrink much. (PIZ-compressed data start with a
68	// relatively long header; if the input to the compressor is short, adding the header
69	// tends to offset any size reduction of the input.)
70	PIZ,
71
72	/// Like `ZIP1`, but reduces precision of `f32` images to `f24`.
73	/// Therefore, this is lossless compression for `f16` and `u32` data, lossy compression for `f32` data.
74	/// This compression method works well for depth
75	/// buffers and similar images, where the possible range of values is very large, but
76	/// where full 32-bit floating-point accuracy is not necessary. Rounding improves
77	/// compression significantly by eliminating the pixels' 8 least significant bits, which
78	/// tend to be very noisy, and therefore difficult to compress.
79	/// This produces really small image files. Only supported for flat images, not for deep data.
80	// After reducing 32-bit floating-point data to 24 bits by rounding (while leaving 16-bit
81	// floating-point data unchanged), differences between horizontally adjacent pixels
82	// are compressed with zlib, similar to ZIP. PXR24 compression preserves image
83	// channels of type HALF and UINT exactly, but the relative error of FLOAT data
84	// increases to about ???.
85	PXR24, // TODO specify zip compression level?
86
87	/// This is a lossy compression method for f16 images.
88	/// It's the predecessor of the `B44A` compression,
89	/// which has improved compression rates for uniformly colored areas.
90	/// You should probably use `B44A` instead of the plain `B44`.
91	///
92	/// Only supported for flat images, not for deep data.
93	// lossy 4-by-4 pixel block compression,
94	// flat fields are compressed more
95	// Channels of type HALF are split into blocks of four by four pixels or 32 bytes. Each
96	// block is then packed into 14 bytes, reducing the data to 44 percent of their
97	// uncompressed size. When B44 compression is applied to RGB images in
98	// combination with luminance/chroma encoding (see below), the size of the
99	// compressed pixels is about 22 percent of the size of the original RGB data.
100	// Channels of type UINT or FLOAT are not compressed.
101	// Decoding is fast enough to allow real-time playback of B44-compressed OpenEXR
102	// image sequences on commodity hardware.
103	// The size of a B44-compressed file depends on the number of pixels in the image,
104	// but not on the data in the pixels. All images with the same resolution and the same
105	// set of channels have the same size. This can be advantageous for systems that
106	// support real-time playback of image sequences; the predictable file size makes it
107	// easier to allocate space on storage media efficiently.
108	// B44 compression is only supported for flat images.
109	B44, // TODO B44 { optimize_uniform_areas: bool }
110
111	/// This is a lossy compression method for f16 images.
112	/// All f32 and u32 channels will be stored without compression.
113	/// All the f16 pixels are divided into 4x4 blocks.
114	/// Each block is then compressed as a whole.
115	///
116	/// The 32 bytes of a block will require only ~14 bytes after compression,
117	/// independent of the actual pixel contents. With chroma subsampling,
118	/// a block will be compressed to ~7 bytes.
119	/// Uniformly colored blocks will be compressed to ~3 bytes.
120	///
121	/// The 512 bytes of an f32 block will not be compressed at all.
122	///
123	/// Should be fast enough for realtime playback.
124	/// Only supported for flat images, not for deep data.
125	B44A, // TODO collapse with B44
126
127	/// __This lossy compression is not yet supported by this implementation.__
128	// lossy DCT based compression, in blocks
129	// of 32 scanlines. More efficient for partial buffer access.
130	DWAA(Option<f32>), // TODO does this have a default value? make this non optional? default Compression Level setting is 45.0
131
132	/// __This lossy compression is not yet supported by this implementation.__
133	// lossy DCT based compression, in blocks
134	// of 256 scanlines. More efficient space
135	// wise and faster to decode full frames
136	// than DWAA_COMPRESSION.
137	DWAB(Option<f32>), // TODO collapse with B44. default Compression Level setting is 45.0
138	}
139
140	impl std::fmt::Display for Compression {
141	fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
142	write!(formatter, "{} compression", match self {
143	Compression::Uncompressed => "no",
144	Compression::RLE => "rle",
145	Compression::ZIP1 => "zip line",
146	Compression::ZIP16 => "zip block",
147	Compression::B44 => "b44",
148	Compression::B44A => "b44a",
149	Compression::DWAA(_) => "dwaa",
150	Compression::DWAB(_) => "dwab",
151	Compression::PIZ => "piz",
152	Compression::PXR24 => "pxr24",
153	})
154	}
155	}
156
157
158
159	impl Compression {
160
161	/// Compress the image section of bytes.
162	pub fn compress_image_section(self, header: &Header, uncompressed_native_endian: ByteVec, pixel_section: IntegerBounds) -> Result<ByteVec> {
163	let max_tile_size = header.max_block_pixel_size();
164
165	assert!(pixel_section.validate(Some(max_tile_size)).is_ok(), "decompress tile coordinate bug");
166	if header.deep { assert!(self.supports_deep_data()) }
167
168	use self::Compression::*;
169	let compressed_little_endian = match self {
170	Uncompressed => {
171	return Ok(convert_current_to_little_endian(
172	uncompressed_native_endian, &header.channels, pixel_section
173	))
174	},
175
176	// we need to clone here, because we might have to fallback to the uncompressed data later (when compressed data is larger than raw data)
177	ZIP16 => zip::compress_bytes(&header.channels, uncompressed_native_endian.clone(), pixel_section),
178	ZIP1 => zip::compress_bytes(&header.channels, uncompressed_native_endian.clone(), pixel_section),
179	RLE => rle::compress_bytes(&header.channels, uncompressed_native_endian.clone(), pixel_section),
180	PIZ => piz::compress(&header.channels, uncompressed_native_endian.clone(), pixel_section),
181	PXR24 => pxr24::compress(&header.channels, uncompressed_native_endian.clone(), pixel_section),
182	B44 => b44::compress(&header.channels, uncompressed_native_endian.clone(), pixel_section, `false`),
183	B44A => b44::compress(&header.channels, uncompressed_native_endian.clone(), pixel_section, `true`),
184	_ => return Err(Error::unsupported(format!("yet unimplemented compression method: {}", self)))
185	};
186
187	let compressed_little_endian = compressed_little_endian.map_err(\|_\|
188	Error::invalid(format!("pixels cannot be compressed ({})", self))
189	)?;
190
191	if self == Uncompressed \|\| compressed_little_endian.len() < uncompressed_native_endian.len() {
192	// only write compressed if it actually is smaller than raw
193	Ok(compressed_little_endian)
194	}
195	else {
196	// if we do not use compression, manually convert uncompressed data
197	Ok(convert_current_to_little_endian(uncompressed_native_endian, &header.channels, pixel_section))
198	}
199	}
200
201	/// Decompress the image section of bytes.
202	pub fn decompress_image_section(self, header: &Header, compressed: ByteVec, pixel_section: IntegerBounds, pedantic: bool) -> Result<ByteVec> {
203	let max_tile_size = header.max_block_pixel_size();
204
205	assert!(pixel_section.validate(Some(max_tile_size)).is_ok(), "decompress tile coordinate bug");
206	if header.deep { assert!(self.supports_deep_data()) }
207
208	let expected_byte_size = pixel_section.size.area() * header.channels.bytes_per_pixel; // FIXME this needs to account for subsampling anywhere
209
210	// note: always true where self == Uncompressed
211	if compressed.len() == expected_byte_size {
212	// the compressed data was larger than the raw data, so the small raw data has been written
213	Ok(convert_little_endian_to_current(compressed, &header.channels, pixel_section))
214	}
215	else {
216	use self::Compression::*;
217	let bytes = match self {
218	Uncompressed => Ok(convert_little_endian_to_current(compressed, &header.channels, pixel_section)),
219	ZIP16 => zip::decompress_bytes(&header.channels, compressed, pixel_section, expected_byte_size, pedantic),
220	ZIP1 => zip::decompress_bytes(&header.channels, compressed, pixel_section, expected_byte_size, pedantic),
221	RLE => rle::decompress_bytes(&header.channels, compressed, pixel_section, expected_byte_size, pedantic),
222	PIZ => piz::decompress(&header.channels, compressed, pixel_section, expected_byte_size, pedantic),
223	PXR24 => pxr24::decompress(&header.channels, compressed, pixel_section, expected_byte_size, pedantic),
224	B44 \| B44A => b44::decompress(&header.channels, compressed, pixel_section, expected_byte_size, pedantic),
225	_ => return Err(Error::unsupported(format!("yet unimplemented compression method: {}", self)))
226	};
227
228	// map all errors to compression errors
229	let bytes = bytes
230	.map_err(\|decompression_error\| match decompression_error {
231	Error::NotSupported(message) =>
232	Error::unsupported(format!("yet unimplemented compression special case ({})", message)),
233
234	error => Error::invalid(format!(
235	"compressed {:?} data ({})",
236	self, error.to_string()
237	)),
238	})?;
239
240	if bytes.len() != expected_byte_size {
241	Err(Error::invalid("decompressed data"))
242	}
243
244	else { Ok(bytes) }
245	}
246	}
247
248	/// For scan line images and deep scan line images, one or more scan lines may be
249	/// stored together as a scan line block. The number of scan lines per block
250	/// depends on how the pixel data are compressed.
251	pub fn scan_lines_per_block(self) -> usize {
252	use self::Compression::*;
253	match self {
254	Uncompressed \| RLE \| ZIP1 => `1`,
255	ZIP16 \| PXR24 => `16`,
256	PIZ \| B44 \| B44A \| DWAA(_) => `32`,
257	DWAB(_) => `256`,
258	}
259	}
260
261	/// Deep data can only be compressed using RLE or ZIP compression.
262	pub fn supports_deep_data(self) -> bool {
263	use self::Compression::*;
264	match self {
265	Uncompressed \| RLE \| ZIP1 => `true`,
266	_ => `false`,
267	}
268	}
269
270	/// Most compression methods will reconstruct the exact pixel bytes,
271	/// but some might throw away unimportant data for specific types of samples.
272	pub fn is_lossless_for(self, sample_type: SampleType) -> bool {
273	use self::Compression::*;
274	match self {
275	PXR24 => sample_type != SampleType::F32, // pxr reduces f32 to f24
276	B44 \| B44A => sample_type != SampleType::F16, // b44 only compresses f16 values, others are left uncompressed
277	Uncompressed \| RLE \| ZIP1 \| ZIP16 \| PIZ => `true`,
278	DWAB(_) \| DWAA(_) => `false`,
279	}
280	}
281
282	/// Most compression methods will reconstruct the exact pixel bytes,
283	/// but some might throw away unimportant data in some cases.
284	pub fn may_loose_data(self) -> bool {
285	use self::Compression::*;
286	match self {
287	Uncompressed \| RLE \| ZIP1 \| ZIP16 \| PIZ => `false`,
288	PXR24 \| B44 \| B44A \| DWAB(_) \| DWAA(_) => `true`,
289	}
290	}
291
292	/// Most compression methods will reconstruct the exact pixel bytes,
293	/// but some might replace NaN with zeroes.
294	pub fn supports_nan(self) -> bool {
295	use self::Compression::*;
296	match self {
297	B44 \| B44A \| DWAB(_) \| DWAA(_) => `false`, // TODO dwa might support it?
298	_ => `true`
299	}
300	}
301
302	}
303
304	// see https://github.com/AcademySoftwareFoundation/openexr/blob/6a9f8af6e89547bcd370ae3cec2b12849eee0b54/OpenEXR/IlmImf/ImfMisc.cpp#L1456-L1541
305
306	#[allow(unused)] // allows the extra parameters to be unused
307	fn convert_current_to_little_endian(mut bytes: ByteVec, channels: &ChannelList, rectangle: IntegerBounds) -> ByteVec {
308	#[cfg(target = "big_endian")]
309	reverse_block_endianness(&mut byte_vec, channels, rectangle);
310
311	bytes
312	}
313
314	#[allow(unused)] // allows the extra parameters to be unused
315	fn convert_little_endian_to_current(mut bytes: ByteVec, channels: &ChannelList, rectangle: IntegerBounds) -> ByteVec {
316	#[cfg(target = "big_endian")]
317	reverse_block_endianness(&mut bytes, channels, rectangle);
318
319	bytes
320	}
321
322	#[allow(unused)] // unused when on little endian system
323	fn reverse_block_endianness(bytes: &mut [u8], channels: &ChannelList, rectangle: IntegerBounds){
324	let mut remaining_bytes: &mut [u8] = bytes;
325
326	for y in rectangle.position.y() .. rectangle.end().y() {
327	for channel in &channels.list {
328	let line_is_subsampled = mod_p(y, usize_to_i32(channel.sampling.y())) != `0`;
329	if line_is_subsampled { continue; }
330
331	let sample_count = rectangle.size.width() / channel.sampling.x();
332
333	match channel.sample_type {
334	SampleType::F16 => remaining_bytes = chomp_convert_n::<f16>(reverse_2_bytes, remaining_bytes, sample_count),
335	SampleType::F32 => remaining_bytes = chomp_convert_n::<f32>(reverse_4_bytes, remaining_bytes, sample_count),
336	SampleType::U32 => remaining_bytes = chomp_convert_n::<u32>(reverse_4_bytes, remaining_bytes, sample_count),
337	}
338	}
339	}
340
341	#[inline]
342	fn chomp_convert_n<T>(convert_single_value: fn(&mut[u8]), mut bytes: &mut [u8], count: usize) -> &mut [u8] {
343	let type_size = size_of::<T>();
344	let (line_bytes, rest) = bytes.split_at_mut(count * type_size);
345	let value_byte_chunks = line_bytes.chunks_exact_mut(type_size);
346
347	for value_bytes in value_byte_chunks {
348	convert_single_value(value_bytes);
349	}
350
351	rest
352	}
353
354	debug_assert!(remaining_bytes.is_empty(), "not all bytes were converted to little endian");
355	}
356
357	#[inline]
358	fn reverse_2_bytes(bytes: &mut [u8]){
359	// this code seems like it could be optimized easily by the compiler
360	let two_bytes: [u8; `2`] = bytes.try_into().expect(msg:"invalid byte count");
361	bytes.copy_from_slice(&[two_bytes[`1`], two_bytes[`0`]]);
362	}
363
364	#[inline]
365	fn reverse_4_bytes(bytes: &mut [u8]){
366	let four_bytes: [u8; `4`] = bytes.try_into().expect(msg:"invalid byte count");
367	bytes.copy_from_slice(&[four_bytes[`3`], four_bytes[`2`], four_bytes[`1`], four_bytes[`0`]]);
368	}
369
370	#[inline]
371	fn div_p (x: i32, y: i32) -> i32 {
372	if x >= `0` {
373	if y >= `0` { x / y }
374	else { -(x / -y) }
375	}
376	else {
377	if y >= `0` { -((y-`1`-x) / y) }
378	else { (-y-`1`-x) / -y }
379	}
380	}
381
382	#[inline]
383	fn mod_p(x: i32, y: i32) -> i32 {
384	x - y * div_p(x, y)
385	}
386
387	/// A collection of functions used to prepare data for compression.
388	mod optimize_bytes {
389
390	/// Integrate over all differences to the previous value in order to reconstruct sample values.
391	pub fn differences_to_samples(buffer: &mut [u8]) {
392	// The naive implementation is very simple:
393	//
394	// for index in 1..buffer.len() {
395	// buffer[index] = (buffer[index - 1] as i32 + buffer[index] as i32 - 128) as u8;
396	// }
397	//
398	// But we process elements in pairs to take advantage of instruction-level parallelism.
399	// When computations within a pair do not depend on each other, they can be processed in parallel.
400	// Since this function is responsible for a very large chunk of execution time,
401	// this tweak alone improves decoding performance of RLE images by 20%.
402	if let Some(first) = buffer.get(`0`) {
403	let mut previous = first as i16*;
404	for chunk in &mut buffer[`1`..].chunks_exact_mut(`2`) {
405	// no bounds checks here due to indices and chunk size being constant
406	let diff0 = chunk[`0`] as i16;
407	let diff1 = chunk[`1`] as i16;
408	// these two computations do not depend on each other, unlike in the naive version,
409	// so they can be executed by the CPU in parallel via instruction-level parallelism
410	let sample0 = (previous + diff0 - `128`) as u8;
411	let sample1 = (previous + diff0 + diff1 - `128` * `2`) as u8;
412	chunk[`0`] = sample0;
413	chunk[`1`] = sample1;
414	previous = sample1 as i16;
415	}
416	// handle the remaining element at the end not processed by the loop over pairs, if present
417	for elem in &mut buffer[`1`..].chunks_exact_mut(`2`).into_remainder().iter_mut() {
418	let sample = (previous + elem as i16* - `128`) as u8;
419	*elem = sample;
420	previous = sample as i16;
421	}
422	}
423	}
424
425	/// Derive over all values in order to produce differences to the previous value.
426	pub fn samples_to_differences(buffer: &mut [u8]){
427	// naive version:
428	// for index in (1..buffer.len()).rev() {
429	// buffer[index] = (buffer[index] as i32 - buffer[index - 1] as i32 + 128) as u8;
430	// }
431	//
432	// But we process elements in batches to take advantage of autovectorization.
433	// If the target platform has no vector instructions (e.g. 32-bit ARM without `-C target-cpu=native`)
434	// this will instead take advantage of instruction-level parallelism.
435	if let Some(first) = buffer.get(`0`) {
436	let mut previous = first as i16*;
437	// Chunk size is 16 because we process bytes (8 bits),
438	// and 816 = 128 bits is the size of a typical SIMD register.*
439	// Even WASM has 128-bit SIMD registers.
440	for chunk in &mut buffer[`1`..].chunks_exact_mut(`16`) {
441	// no bounds checks here due to indices and chunk size being constant
442	let sample0 = chunk[`0`] as i16;
443	let sample1 = chunk[`1`] as i16;
444	let sample2 = chunk[`2`] as i16;
445	let sample3 = chunk[`3`] as i16;
446	let sample4 = chunk[`4`] as i16;
447	let sample5 = chunk[`5`] as i16;
448	let sample6 = chunk[`6`] as i16;
449	let sample7 = chunk[`7`] as i16;
450	let sample8 = chunk[`8`] as i16;
451	let sample9 = chunk[`9`] as i16;
452	let sample10 = chunk[`10`] as i16;
453	let sample11 = chunk[`11`] as i16;
454	let sample12 = chunk[`12`] as i16;
455	let sample13 = chunk[`13`] as i16;
456	let sample14 = chunk[`14`] as i16;
457	let sample15 = chunk[`15`] as i16;
458	// Unlike in decoding, computations in here are truly independent from each other,
459	// which enables the compiler to vectorize this loop.
460	// Even if the target platform has no vector instructions,
461	// so using more parallelism doesn't imply doing more work,
462	// and we're not really limited in how wide we can go.
463	chunk[`0`] = (sample0 - previous + `128`) as u8;
464	chunk[`1`] = (sample1 - sample0 + `128`) as u8;
465	chunk[`2`] = (sample2 - sample1 + `128`) as u8;
466	chunk[`3`] = (sample3 - sample2 + `128`) as u8;
467	chunk[`4`] = (sample4 - sample3 + `128`) as u8;
468	chunk[`5`] = (sample5 - sample4 + `128`) as u8;
469	chunk[`6`] = (sample6 - sample5 + `128`) as u8;
470	chunk[`7`] = (sample7 - sample6 + `128`) as u8;
471	chunk[`8`] = (sample8 - sample7 + `128`) as u8;
472	chunk[`9`] = (sample9 - sample8 + `128`) as u8;
473	chunk[`10`] = (sample10 - sample9 + `128`) as u8;
474	chunk[`11`] = (sample11 - sample10 + `128`) as u8;
475	chunk[`12`] = (sample12 - sample11 + `128`) as u8;
476	chunk[`13`] = (sample13 - sample12 + `128`) as u8;
477	chunk[`14`] = (sample14 - sample13 + `128`) as u8;
478	chunk[`15`] = (sample15 - sample14 + `128`) as u8;
479	previous = sample15;
480	}
481	// Handle the remaining element at the end not processed by the loop over batches, if present
482	// This is what the iterator-based version of this function would look like without vectorization
483	for elem in &mut buffer[`1`..].chunks_exact_mut(`16`).into_remainder().iter_mut() {
484	let diff = (elem as i16* - previous + `128`) as u8;
485	previous = elem as i16*;
486	*elem = diff;
487	}
488	}
489	}
490
491	use std::cell::Cell;
492	thread_local! {
493	// A buffer for reusing between invocations of interleaving and deinterleaving.
494	// Allocating memory is cheap, but zeroing or otherwise initializing it is not.
495	// Doing it hundreds of times (once per block) would be expensive.
496	// This optimization brings down the time spent in interleaving from 15% to 5%.
497	static SCRATCH_SPACE: Cell<Vec<u8>> = Cell::new(Vec::new());
498	}
499
500	fn with_reused_buffer<F>(length: usize, mut func: F) where F: FnMut(&mut [u8]) {
501	SCRATCH_SPACE.with(\|scratch_space\| {
502	// reuse a buffer if we've already initialized one
503	let mut buffer = scratch_space.take();
504	if buffer.len() < length {
505	// Efficiently create a zeroed Vec by requesting zeroed memory from the OS.
506	// This is slightly faster than a `memcpy()` plus `memset()` that would happen otherwise,
507	// but is not a big deal either way since it's not a hot codepath.
508	buffer = vec![`0u8`; length];
509	}
510
511	// call the function
512	func(&mut buffer[..length]);
513
514	// save the internal buffer for reuse
515	scratch_space.set(buffer);
516	});
517	}
518
519	/// Interleave the bytes such that the second half of the array is every other byte.
520	pub fn interleave_byte_blocks(separated: &mut [u8]) {
521	with_reused_buffer(separated.len(), \|interleaved\| {
522
523	// Split the two halves that we are going to interleave.
524	let (first_half, second_half) = separated.split_at((separated.len() + `1`) / `2`);
525	// The first half can be 1 byte longer than the second if the length of the input is odd,
526	// but the loop below only processes numbers in pairs.
527	// To handle it, preserve the last element of the first slice, to be handled after the loop.
528	let first_half_last = first_half.last();
529	// Truncate the first half to match the lenght of the second one; more optimizer-friendly
530	let first_half_iter = &first_half[..second_half.len()];
531
532	// Main loop that performs the interleaving
533	for ((first, second), interleaved) in first_half_iter.iter().zip(second_half.iter())
534	.zip(interleaved.chunks_exact_mut(`2`)) {
535	// The length of each chunk is known to be 2 at compile time,
536	// and each index is also a constant.
537	// This allows the compiler to remove the bounds checks.
538	interleaved[`0`] = *first;
539	interleaved[`1`] = *second;
540	}
541
542	// If the length of the slice was odd, restore the last element of the first half that we saved
543	if interleaved.len() % `2` == `1` {
544	if let Some(value) = first_half_last {
545	// we can unwrap() here because we just checked that the lenght is non-zero:
546	// `% 2 == 1` will fail for zero
547	interleaved.last_mut().unwrap() = value;
548	}
549	}
550
551	// write out the results
552	separated.copy_from_slice(&interleaved);
553	});
554	}
555
556	/// Separate the bytes such that the second half contains every other byte.
557	/// This performs deinterleaving - the inverse of interleaving.
558	pub fn separate_bytes_fragments(source: &mut [u8]) {
559	with_reused_buffer(source.len(), \|separated\| {
560
561	// Split the two halves that we are going to interleave.
562	let (first_half, second_half) = separated.split_at_mut((source.len() + `1`) / `2`);
563	// The first half can be 1 byte longer than the second if the length of the input is odd,
564	// but the loop below only processes numbers in pairs.
565	// To handle it, preserve the last element of the input, to be handled after the loop.
566	let last = source.last();
567	let first_half_iter = &mut first_half[..second_half.len()];
568
569	// Main loop that performs the deinterleaving
570	for ((first, second), interleaved) in first_half_iter.iter_mut().zip(second_half.iter_mut())
571	.zip(source.chunks_exact(`2`)) {
572	// The length of each chunk is known to be 2 at compile time,
573	// and each index is also a constant.
574	// This allows the compiler to remove the bounds checks.
575	*first = interleaved[`0`];
576	*second = interleaved[`1`];
577	}
578
579	// If the length of the slice was odd, restore the last element of the input that we saved
580	if source.len() % `2` == `1` {
581	if let Some(value) = last {
582	// we can unwrap() here because we just checked that the lenght is non-zero:
583	// `% 2 == 1` will fail for zero
584	first_half.last_mut().unwrap() = value;
585	}
586	}
587
588	// write out the results
589	source.copy_from_slice(&separated);
590	});
591	}
592
593
594	#[cfg(test)]
595	pub mod test {
596
597	#[test]
598	fn roundtrip_interleave(){
599	let source = vec![ `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10` ];
600	let mut modified = source.clone();
601
602	super::separate_bytes_fragments(&mut modified);
603	super::interleave_byte_blocks(&mut modified);
604
605	assert_eq!(source, modified);
606	}
607
608	#[test]
609	fn roundtrip_derive(){
610	let source = vec![ `0`, `1`, `2`, `7`, `4`, `5`, `6`, `7`, `13`, `9`, `10` ];
611	let mut modified = source.clone();
612
613	super::samples_to_differences(&mut modified);
614	super::differences_to_samples(&mut modified);
615
616	assert_eq!(source, modified);
617	}
618
619	}
620	}
621
622
623	#[cfg(test)]
624	pub mod test {
625	use super::*;
626	use crate::meta::attribute::ChannelDescription;
627	use crate::block::samples::IntoNativeSample;
628
629	#[test]
630	fn roundtrip_endianness_mixed_channels(){
631	let a32 = ChannelDescription::new("A", SampleType::F32, `true`);
632	let y16 = ChannelDescription::new("Y", SampleType::F16, `true`);
633	let channels = ChannelList::new(smallvec![ a32, y16 ]);
634
635	let data = vec![
636	`23582740683_f32`.to_ne_bytes().as_slice(),
637	`35827420683_f32`.to_ne_bytes().as_slice(),
638	`27406832358_f32`.to_f16().to_ne_bytes().as_slice(),
639	`74062358283_f32`.to_f16().to_ne_bytes().as_slice(),
640
641	`52582740683_f32`.to_ne_bytes().as_slice(),
642	`45827420683_f32`.to_ne_bytes().as_slice(),
643	`15406832358_f32`.to_f16().to_ne_bytes().as_slice(),
644	`65062358283_f32`.to_f16().to_ne_bytes().as_slice(),
645	].into_iter().flatten().map(\|x\| *x).collect();
646
647	roundtrip_convert_endianness(
648	data, &channels,
649	IntegerBounds::from_dimensions((`2`, `2`))
650	);
651	}
652
653	fn roundtrip_convert_endianness(
654	current_endian: ByteVec, channels: &ChannelList, rectangle: IntegerBounds
655	){
656	let little_endian = convert_current_to_little_endian(
657	current_endian.clone(), channels, rectangle
658	);
659
660	let current_endian_decoded = convert_little_endian_to_current(
661	little_endian.clone(), channels, rectangle
662	);
663
664	assert_eq!(current_endian, current_endian_decoded, "endianness conversion failed");
665	}
666	}