dxt.rs source code [crates/image-0.24.9/src/codecs/dxt.rs]

1	//! Decoding of DXT (S3TC) compression
2	//!
3	//! DXT is an image format that supports lossy compression
4	//!
5	//! # Related Links
6	//! <https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_compression_s3tc.txt> - Description of the DXT compression OpenGL extensions.*
7	//!
8	//! Note: this module only implements bare DXT encoding/decoding, it does not parse formats that can contain DXT files like .dds
9
10	use std::io::{self, Read, Seek, SeekFrom, Write};
11
12	use crate::color::ColorType;
13	use crate::error::{ImageError, ImageResult, ParameterError, ParameterErrorKind};
14	use crate::image::{self, ImageDecoder, ImageDecoderRect, ImageReadBuffer, Progress};
15
16	/// What version of DXT compression are we using?
17	/// Note that DXT2 and DXT4 are left away as they're
18	/// just DXT3 and DXT5 with premultiplied alpha
19	#[derive(Clone, Copy, Debug, PartialEq, Eq)]
20	pub enum DxtVariant {
21	/// The DXT1 format. 48 bytes of RGB data in a 4x4 pixel square is
22	/// compressed into an 8 byte block of DXT1 data
23	DXT1,
24	/// The DXT3 format. 64 bytes of RGBA data in a 4x4 pixel square is
25	/// compressed into a 16 byte block of DXT3 data
26	DXT3,
27	/// The DXT5 format. 64 bytes of RGBA data in a 4x4 pixel square is
28	/// compressed into a 16 byte block of DXT5 data
29	DXT5,
30	}
31
32	impl DxtVariant {
33	/// Returns the amount of bytes of raw image data
34	/// that is encoded in a single DXTn block
35	fn decoded_bytes_per_block(self) -> usize {
36	match self {
37	DxtVariant::DXT1 => `48`,
38	DxtVariant::DXT3 \| DxtVariant::DXT5 => `64`,
39	}
40	}
41
42	/// Returns the amount of bytes per block of encoded DXTn data
43	fn encoded_bytes_per_block(self) -> usize {
44	match self {
45	DxtVariant::DXT1 => `8`,
46	DxtVariant::DXT3 \| DxtVariant::DXT5 => `16`,
47	}
48	}
49
50	/// Returns the color type that is stored in this DXT variant
51	pub fn color_type(self) -> ColorType {
52	match self {
53	DxtVariant::DXT1 => ColorType::Rgb8,
54	DxtVariant::DXT3 \| DxtVariant::DXT5 => ColorType::Rgba8,
55	}
56	}
57	}
58
59	/// DXT decoder
60	pub struct DxtDecoder<R: Read> {
61	inner: R,
62	width_blocks: u32,
63	height_blocks: u32,
64	variant: DxtVariant,
65	row: u32,
66	}
67
68	impl<R: Read> DxtDecoder<R> {
69	/// Create a new DXT decoder that decodes from the stream ```r```.
70	/// As DXT is often stored as raw buffers with the width/height
71	/// somewhere else the width and height of the image need
72	/// to be passed in ```width``` and ```height```, as well as the
73	/// DXT variant in ```variant```.
74	/// width and height are required to be powers of 2 and at least 4.
75	/// otherwise an error will be returned
76	pub fn new(
77	r: R,
78	width: u32,
79	height: u32,
80	variant: DxtVariant,
81	) -> Result<DxtDecoder<R>, ImageError> {
82	if width % `4` != `0` \|\| height % `4` != `0` {
83	// TODO: this is actually a bit of a weird case. We could return `DecodingError` but
84	// it's not really the format that is wrong However, the encoder should surely return
85	// `EncodingError` so it would be the logical choice for symmetry.
86	return Err(ImageError::Parameter(ParameterError::from_kind(
87	ParameterErrorKind::DimensionMismatch,
88	)));
89	}
90	let width_blocks = width / `4`;
91	let height_blocks = height / `4`;
92	Ok(DxtDecoder {
93	inner: r,
94	width_blocks,
95	height_blocks,
96	variant,
97	row: `0`,
98	})
99	}
100
101	fn read_scanline(&mut self, buf: &mut [u8]) -> io::Result<usize> {
102	assert_eq!(
103	u64::try_from(buf.len()),
104	Ok(
105	#[allow(deprecated)]
106	self.scanline_bytes()
107	)
108	);
109
110	let mut src =
111	vec![`0u8`; self.variant.encoded_bytes_per_block() * self.width_blocks as usize];
112	self.inner.read_exact(&mut src)?;
113	match self.variant {
114	DxtVariant::DXT1 => decode_dxt1_row(&src, buf),
115	DxtVariant::DXT3 => decode_dxt3_row(&src, buf),
116	DxtVariant::DXT5 => decode_dxt5_row(&src, buf),
117	}
118	self.row += `1`;
119	Ok(buf.len())
120	}
121	}
122
123	// Note that, due to the way that DXT compression works, a scanline is considered to consist out of
124	// 4 lines of pixels.
125	impl<'a, R: 'a + Read> ImageDecoder<'a> for DxtDecoder<R> {
126	type Reader = DxtReader<R>;
127
128	fn dimensions(&self) -> (u32, u32) {
129	(self.width_blocks * `4`, self.height_blocks * `4`)
130	}
131
132	fn color_type(&self) -> ColorType {
133	self.variant.color_type()
134	}
135
136	fn scanline_bytes(&self) -> u64 {
137	self.variant.decoded_bytes_per_block() as u64 * u64::from(self.width_blocks)
138	}
139
140	fn into_reader(self) -> ImageResult<Self::Reader> {
141	Ok(DxtReader {
142	buffer: ImageReadBuffer::new(
143	#[allow(deprecated)]
144	self.scanline_bytes(),
145	self.total_bytes(),
146	),
147	decoder: self,
148	})
149	}
150
151	fn read_image(mut self, buf: &mut [u8]) -> ImageResult<()> {
152	assert_eq!(u64::try_from(buf.len()), Ok(self.total_bytes()));
153
154	#[allow(deprecated)]
155	for chunk in buf.chunks_mut(self.scanline_bytes().max(`1`) as usize) {
156	self.read_scanline(chunk)?;
157	}
158	Ok(())
159	}
160	}
161
162	impl<'a, R: 'a + Read + Seek> ImageDecoderRect<'a> for DxtDecoder<R> {
163	fn read_rect_with_progress<F: Fn(Progress)>(
164	&mut self,
165	x: u32,
166	y: u32,
167	width: u32,
168	height: u32,
169	buf: &mut [u8],
170	progress_callback: F,
171	) -> ImageResult<()> {
172	let encoded_scanline_bytes =
173	self.variant.encoded_bytes_per_block() as u64 * u64::from(self.width_blocks);
174
175	let start = self.inner.stream_position()?;
176	image::load_rect(
177	x,
178	y,
179	width,
180	height,
181	buf,
182	progress_callback,
183	self,
184	\|s, scanline\| {
185	s.inner
186	.seek(SeekFrom::Start(start + scanline * encoded_scanline_bytes))?;
187	Ok(())
188	},
189	\|s, buf\| s.read_scanline(buf).map(\|_\| ()),
190	)?;
191	self.inner.seek(SeekFrom::Start(start))?;
192	Ok(())
193	}
194	}
195
196	/// DXT reader
197	pub struct DxtReader<R: Read> {
198	buffer: ImageReadBuffer,
199	decoder: DxtDecoder<R>,
200	}
201
202	impl<R: Read> Read for DxtReader<R> {
203	fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
204	let decoder: &mut DxtDecoder = &mut self.decoder;
205	self.buffer.read(buf, \|buf: &mut [u8]\| decoder.read_scanline(buf))
206	}
207	}
208
209	/// DXT encoder
210	pub struct DxtEncoder<W: Write> {
211	w: W,
212	}
213
214	impl<W: Write> DxtEncoder<W> {
215	/// Create a new encoder that writes its output to ```w```
216	pub fn new(w: W) -> DxtEncoder<W> {
217	DxtEncoder { w }
218	}
219
220	/// Encodes the image data ```data```
221	/// that has dimensions ```width``` and ```height```
222	/// in ```DxtVariant``` ```variant```
223	/// data is assumed to be in variant.color_type()
224	pub fn encode(
225	mut self,
226	data: &[u8],
227	width: u32,
228	height: u32,
229	variant: DxtVariant,
230	) -> ImageResult<()> {
231	if width % `4` != `0` \|\| height % `4` != `0` {
232	// TODO: this is not very idiomatic yet. Should return an EncodingError.
233	return Err(ImageError::Parameter(ParameterError::from_kind(
234	ParameterErrorKind::DimensionMismatch,
235	)));
236	}
237	let width_blocks = width / `4`;
238	let height_blocks = height / `4`;
239
240	let stride = variant.decoded_bytes_per_block();
241
242	assert!(data.len() >= width_blocks as usize * height_blocks as usize * stride);
243
244	for chunk in data.chunks(width_blocks as usize * stride) {
245	let data = match variant {
246	DxtVariant::DXT1 => encode_dxt1_row(chunk),
247	DxtVariant::DXT3 => encode_dxt3_row(chunk),
248	DxtVariant::DXT5 => encode_dxt5_row(chunk),
249	};
250	self.w.write_all(&data)?;
251	}
252	Ok(())
253	}
254	}
255
256	/**
257	* Actual encoding/decoding logic below.
258	*/
259	use std::mem::swap;
260
261	type Rgb = [u8; `3`];
262
263	/// decodes a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value into 8-bit RGB
264	/// mapping is done so min/max range values are preserved. So for 5-bit
265	/// values 0x00 -> 0x00 and 0x1F -> 0xFF
266	fn enc565_decode(value: u16) -> Rgb {
267	let red: u16 = (value >> `11`) & `0x1F`;
268	let green: u16 = (value >> `5`) & `0x3F`;
269	let blue: u16 = (value) & `0x1F`;
270	[
271	(red * `0xFF` / `0x1F`) as u8,
272	(green * `0xFF` / `0x3F`) as u8,
273	(blue * `0xFF` / `0x1F`) as u8,
274	]
275	}
276
277	/// encodes an 8-bit RGB value into a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value
278	/// mapping preserves min/max values. It is guaranteed that i == encode(decode(i)) for all i
279	fn enc565_encode(rgb: Rgb) -> u16 {
280	let red: u16 = (u16::from(rgb[`0`]) * `0x1F` + `0x7E`) / `0xFF`;
281	let green: u16 = (u16::from(rgb[`1`]) * `0x3F` + `0x7E`) / `0xFF`;
282	let blue: u16 = (u16::from(rgb[`2`]) * `0x1F` + `0x7E`) / `0xFF`;
283	(red << `11`) \| (green << `5`) \| blue
284	}
285
286	/// utility function: squares a value
287	fn square(a: i32) -> i32 {
288	a * a
289	}
290
291	/// returns the squared error between two RGB values
292	fn diff(a: Rgb, b: Rgb) -> i32 {
293	square(i32::from(a[`0`]) - i32::from(b[`0`]))
294	+ square(i32::from(a[`1`]) - i32::from(b[`1`]))
295	+ square(i32::from(a[`2`]) - i32::from(b[`2`]))
296	}
297
298	/*
299	* Functions for decoding DXT compression
300	*/
301
302	/// Constructs the DXT5 alpha lookup table from the two alpha entries
303	/// if alpha0 > alpha1, constructs a table of [a0, a1, 6 linearly interpolated values from a0 to a1]
304	/// if alpha0 <= alpha1, constructs a table of [a0, a1, 4 linearly interpolated values from a0 to a1, 0, 0xFF]
305	fn alpha_table_dxt5(alpha0: u8, alpha1: u8) -> [u8; `8`] {
306	let mut table: [u8; 8] = [alpha0, alpha1, `0`, `0`, `0`, `0`, `0`, `0xFF`];
307	if alpha0 > alpha1 {
308	for i: u16 in `2`..`8u16` {
309	table[i as usize] =
310	(((`8` - i) * u16::from(alpha0) + (i - `1`) * u16::from(alpha1)) / `7`) as u8;
311	}
312	} else {
313	for i: u16 in `2`..`6u16` {
314	table[i as usize] =
315	(((`6` - i) * u16::from(alpha0) + (i - `1`) * u16::from(alpha1)) / `5`) as u8;
316	}
317	}
318	table
319	}
320
321	/// decodes an 8-byte dxt color block into the RGB channels of a 16xRGB or 16xRGBA block.
322	/// source should have a length of 8, dest a length of 48 (RGB) or 64 (RGBA)
323	fn decode_dxt_colors(source: &[u8], dest: &mut [u8], is_dxt1: bool) {
324	// sanity checks, also enable the compiler to elide all following bound checks
325	assert!(source.len() == `8` && (dest.len() == `48` \|\| dest.len() == `64`));
326	// calculate pitch to store RGB values in dest (3 for RGB, 4 for RGBA)
327	let pitch = dest.len() / `16`;
328
329	// extract color data
330	let color0 = u16::from(source[`0`]) \| (u16::from(source[`1`]) << `8`);
331	let color1 = u16::from(source[`2`]) \| (u16::from(source[`3`]) << `8`);
332	let color_table = u32::from(source[`4`])
333	\| (u32::from(source[`5`]) << `8`)
334	\| (u32::from(source[`6`]) << `16`)
335	\| (u32::from(source[`7`]) << `24`);
336	// let color_table = source[4..8].iter().rev().fold(0, \|t, &b\| (t << 8) \| b as u32);
337
338	// decode the colors to rgb format
339	let mut colors = [[`0`; `3`]; `4`];
340	colors[`0`] = enc565_decode(color0);
341	colors[`1`] = enc565_decode(color1);
342
343	// determine color interpolation method
344	if color0 > color1 \|\| !is_dxt1 {
345	// linearly interpolate the other two color table entries
346	for i in `0`..`3` {
347	colors[`2`][i] = ((u16::from(colors[`0`][i]) * `2` + u16::from(colors[`1`][i]) + `1`) / `3`) as u8;
348	colors[`3`][i] = ((u16::from(colors[`0`][i]) + u16::from(colors[`1`][i]) * `2` + `1`) / `3`) as u8;
349	}
350	} else {
351	// linearly interpolate one other entry, keep the other at 0
352	for i in `0`..`3` {
353	colors[`2`][i] = ((u16::from(colors[`0`][i]) + u16::from(colors[`1`][i]) + `1`) / `2`) as u8;
354	}
355	}
356
357	// serialize the result. Every color is determined by looking up
358	// two bits in color_table which identify which color to actually pick from the 4 possible colors
359	for i in `0`..`16` {
360	dest[i * pitch..i * pitch + `3`]
361	.copy_from_slice(&colors[(color_table >> (i * `2`)) as usize & `3`]);
362	}
363	}
364
365	/// Decodes a 16-byte bock of dxt5 data to a 16xRGBA block
366	fn decode_dxt5_block(source: &[u8], dest: &mut [u8]) {
367	assert!(source.len() == `16` && dest.len() == `64`);
368
369	// extract alpha index table (stored as little endian 64-bit value)
370	let alpha_table: u64 = source[`2`..`8`]
371	.iter()
372	.rev()
373	.fold(init:`0`, \|t: u64, &b: u8\| (t << `8`) \| u64::from(b));
374
375	// alhpa level decode
376	let alphas: [u8; 8] = alpha_table_dxt5(alpha0:source[`0`], alpha1:source[`1`]);
377
378	// serialize alpha
379	for i: usize in `0`..`16` {
380	dest[i * `4` + `3`] = alphas[(alpha_table >> (i * `3`)) as usize & `7`];
381	}
382
383	// handle colors
384	decode_dxt_colors(&source[`8`..`16`], dest, is_dxt1:`false`);
385	}
386
387	/// Decodes a 16-byte bock of dxt3 data to a 16xRGBA block
388	fn decode_dxt3_block(source: &[u8], dest: &mut [u8]) {
389	assert!(source.len() == `16` && dest.len() == `64`);
390
391	// extract alpha index table (stored as little endian 64-bit value)
392	let alpha_table: u64 = source[`0`..`8`]
393	.iter()
394	.rev()
395	.fold(init:`0`, \|t: u64, &b: u8\| (t << `8`) \| u64::from(b));
396
397	// serialize alpha (stored as 4-bit values)
398	for i: usize in `0`..`16` {
399	dest[i * `4` + `3`] = ((alpha_table >> (i * `4`)) as u8 & `0xF`) * `0x11`;
400	}
401
402	// handle colors
403	decode_dxt_colors(&source[`8`..`16`], dest, is_dxt1:`false`);
404	}
405
406	/// Decodes a 8-byte bock of dxt5 data to a 16xRGB block
407	fn decode_dxt1_block(source: &[u8], dest: &mut [u8]) {
408	assert!(source.len() == `8` && dest.len() == `48`);
409	decode_dxt_colors(source, dest, is_dxt1:`true`);
410	}
411
412	/// Decode a row of DXT1 data to four rows of RGB data.
413	/// source.len() should be a multiple of 8, otherwise this panics.
414	fn decode_dxt1_row(source: &[u8], dest: &mut [u8]) {
415	assert!(source.len() % `8` == `0`);
416	let block_count: usize = source.len() / `8`;
417	assert!(dest.len() >= block_count * `48`);
418
419	// contains the 16 decoded pixels per block
420	let mut decoded_block: [u8; 48] = [`0u8`; `48`];
421
422	for (x: usize, encoded_block: &[u8]) in source.chunks(chunk_size:`8`).enumerate() {
423	decode_dxt1_block(source:encoded_block, &mut decoded_block);
424
425	// copy the values from the decoded block to linewise RGB layout
426	for line: usize in `0`..`4` {
427	let offset: usize = (block_count * line + x) * `12`;
428	dest[offset..offset + `12`].copy_from_slice(&decoded_block[line * `12`..(line + `1`) * `12`]);
429	}
430	}
431	}
432
433	/// Decode a row of DXT3 data to four rows of RGBA data.
434	/// source.len() should be a multiple of 16, otherwise this panics.
435	fn decode_dxt3_row(source: &[u8], dest: &mut [u8]) {
436	assert!(source.len() % `16` == `0`);
437	let block_count: usize = source.len() / `16`;
438	assert!(dest.len() >= block_count * `64`);
439
440	// contains the 16 decoded pixels per block
441	let mut decoded_block: [u8; 64] = [`0u8`; `64`];
442
443	for (x: usize, encoded_block: &[u8]) in source.chunks(chunk_size:`16`).enumerate() {
444	decode_dxt3_block(source:encoded_block, &mut decoded_block);
445
446	// copy the values from the decoded block to linewise RGB layout
447	for line: usize in `0`..`4` {
448	let offset: usize = (block_count * line + x) * `16`;
449	dest[offset..offset + `16`].copy_from_slice(&decoded_block[line * `16`..(line + `1`) * `16`]);
450	}
451	}
452	}
453
454	/// Decode a row of DXT5 data to four rows of RGBA data.
455	/// source.len() should be a multiple of 16, otherwise this panics.
456	fn decode_dxt5_row(source: &[u8], dest: &mut [u8]) {
457	assert!(source.len() % `16` == `0`);
458	let block_count: usize = source.len() / `16`;
459	assert!(dest.len() >= block_count * `64`);
460
461	// contains the 16 decoded pixels per block
462	let mut decoded_block: [u8; 64] = [`0u8`; `64`];
463
464	for (x: usize, encoded_block: &[u8]) in source.chunks(chunk_size:`16`).enumerate() {
465	decode_dxt5_block(source:encoded_block, &mut decoded_block);
466
467	// copy the values from the decoded block to linewise RGB layout
468	for line: usize in `0`..`4` {
469	let offset: usize = (block_count * line + x) * `16`;
470	dest[offset..offset + `16`].copy_from_slice(&decoded_block[line * `16`..(line + `1`) * `16`]);
471	}
472	}
473	}
474
475	/*
476	* Functions for encoding DXT compression
477	*/
478
479	/// Tries to perform the color encoding part of dxt compression
480	/// the approach taken is simple, it picks unique combinations
481	/// of the colors present in the block, and attempts to encode the
482	/// block with each, picking the encoding that yields the least
483	/// squared error out of all of them.
484	///
485	/// This could probably be faster but is already reasonably fast
486	/// and a good reference impl to optimize others against.
487	///
488	/// Another way to perform this analysis would be to perform a
489	/// singular value decomposition of the different colors, and
490	/// then pick 2 points on this line as the base colors. But
491	/// this is still rather unwieldy math and has issues
492	/// with the 3-linear-colors-and-0 case, it's also worse
493	/// at conserving the original colors.
494	///
495	/// source: should be RGBAx16 or RGBx16 bytes of data,
496	/// dest 8 bytes of resulting encoded color data
497	fn encode_dxt_colors(source: &[u8], dest: &mut [u8], is_dxt1: bool) {
498	// sanity checks and determine stride when parsing the source data
499	assert!((source.len() == `64` \|\| source.len() == `48`) && dest.len() == `8`);
500	let stride = source.len() / `16`;
501
502	// reference colors array
503	let mut colors = [[`0u8`; `3`]; `4`];
504
505	// Put the colors we're going to be processing in an array with pure RGB layout
506	// note: we reverse the pixel order here. The reason for this is found in the inner quantization loop.
507	let mut targets = [[`0u8`; `3`]; `16`];
508	for (s, d) in source.chunks(stride).rev().zip(&mut targets) {
509	*d = [s[`0`], s[`1`], s[`2`]];
510	}
511
512	// roundtrip all colors through the r5g6b5 encoding
513	for rgb in &mut targets {
514	rgb = enc565_decode(enc565_encode(rgb));
515	}
516
517	// and deduplicate the set of colors to choose from as the algorithm is O(N^2) in this
518	let mut colorspace_ = [[`0u8`; `3`]; `16`];
519	let mut colorspace_len = `0`;
520	for color in &targets {
521	if !colorspace_[..colorspace_len].contains(color) {
522	colorspace_[colorspace_len] = *color;
523	colorspace_len += `1`;
524	}
525	}
526	let mut colorspace = &colorspace_[..colorspace_len];
527
528	// in case of slight gradients it can happen that there's only one entry left in the color table.
529	// as the resulting banding can be quite bad if we would just left the block at the closest
530	// encodable color, we have a special path here that tries to emulate the wanted color
531	// using the linear interpolation between gradients
532	if colorspace.len() == `1` {
533	// the base color we got from colorspace reduction
534	let ref_rgb = colorspace[`0`];
535	// the unreduced color in this block that's the furthest away from the actual block
536	let mut rgb = targets
537	.iter()
538	.cloned()
539	.max_by_key(\|rgb\| diff(*rgb, ref_rgb))
540	.unwrap();
541	// amplify differences by 2.5, which should push them to the next quantized value
542	// if possible without overshoot
543	for i in `0`..`3` {
544	rgb[i] =
545	((i16::from(rgb[i]) - i16::from(ref_rgb[i])) * `5` / `2` + i16::from(ref_rgb[i])) as u8;
546	}
547
548	// roundtrip it through quantization
549	let encoded = enc565_encode(rgb);
550	let rgb = enc565_decode(encoded);
551
552	// in case this didn't land us a different color the best way to represent this field is
553	// as a single color block
554	if rgb == ref_rgb {
555	dest[`0`] = encoded as u8;
556	dest[`1`] = (encoded >> `8`) as u8;
557
558	for d in dest.iter_mut().take(`8`).skip(`2`) {
559	*d = `0`;
560	}
561	return;
562	}
563
564	// we did find a separate value: add it to the options so after one round of quantization
565	// we're done
566	colorspace_[`1`] = rgb;
567	colorspace = &colorspace_[..`2`];
568	}
569
570	// block quantization loop: we basically just try every possible combination, returning
571	// the combination with the least squared error
572	// stores the best candidate colors
573	let mut chosen_colors = [[`0`; `3`]; `4`];
574	// did this index table use the [0,0,0] variant
575	let mut chosen_use_0 = `false`;
576	// error calculated for the last entry
577	let mut chosen_error = `0xFFFF_FFFFu32`;
578
579	// loop through unique permutations of the colorspace, where c1 != c2
580	'search: for (i, &c1) in colorspace.iter().enumerate() {
581	colors[`0`] = c1;
582
583	for &c2 in &colorspace[`0`..i] {
584	colors[`1`] = c2;
585
586	if is_dxt1 {
587	// what's inside here is ran at most 120 times.
588	for use_0 in `0`..`2` {
589	// and 240 times here.
590
591	if use_0 != `0` {
592	// interpolate one color, set the other to 0
593	for i in `0`..`3` {
594	colors[`2`][i] =
595	((u16::from(colors[`0`][i]) + u16::from(colors[`1`][i]) + `1`) / `2`) as u8;
596	}
597	colors[`3`] = [`0`, `0`, `0`];
598	} else {
599	// interpolate to get 2 more colors
600	for i in `0`..`3` {
601	colors[`2`][i] =
602	((u16::from(colors[`0`][i]) * `2` + u16::from(colors[`1`][i]) + `1`) / `3`)
603	as u8;
604	colors[`3`][i] =
605	((u16::from(colors[`0`][i]) + u16::from(colors[`1`][i]) * `2` + `1`) / `3`)
606	as u8;
607	}
608	}
609
610	// calculate the total error if we were to quantize the block with these color combinations
611	// both these loops have statically known iteration counts and are well vectorizable
612	// note that the inside of this can be run about 15360 times worst case, i.e. 960 times per
613	// pixel.
614	let total_error = targets
615	.iter()
616	.map(\|t\| colors.iter().map(\|c\| diff(c, t) as u32).min().unwrap())
617	.sum();
618
619	// update the match if we found a better one
620	if total_error < chosen_error {
621	chosen_colors = colors;
622	chosen_use_0 = use_0 != `0`;
623	chosen_error = total_error;
624
625	// if we've got a perfect or at most 1 LSB off match, we're done
626	if total_error < `4` {
627	break 'search;
628	}
629	}
630	}
631	} else {
632	// what's inside here is ran at most 120 times.
633
634	// interpolate to get 2 more colors
635	for i in `0`..`3` {
636	colors[`2`][i] =
637	((u16::from(colors[`0`][i]) * `2` + u16::from(colors[`1`][i]) + `1`) / `3`) as u8;
638	colors[`3`][i] =
639	((u16::from(colors[`0`][i]) + u16::from(colors[`1`][i]) * `2` + `1`) / `3`) as u8;
640	}
641
642	// calculate the total error if we were to quantize the block with these color combinations
643	// both these loops have statically known iteration counts and are well vectorizable
644	// note that the inside of this can be run about 15360 times worst case, i.e. 960 times per
645	// pixel.
646	let total_error = targets
647	.iter()
648	.map(\|t\| colors.iter().map(\|c\| diff(c, t) as u32).min().unwrap())
649	.sum();
650
651	// update the match if we found a better one
652	if total_error < chosen_error {
653	chosen_colors = colors;
654	chosen_error = total_error;
655
656	// if we've got a perfect or at most 1 LSB off match, we're done
657	if total_error < `4` {
658	break 'search;
659	}
660	}
661	}
662	}
663	}
664
665	// calculate the final indices
666	// note that targets is already in reverse pixel order, to make the index computation easy.
667	let mut chosen_indices = `0u32`;
668	for t in &targets {
669	let (idx, _) = chosen_colors
670	.iter()
671	.enumerate()
672	.min_by_key(\|&(_, c)\| diff(c, t))
673	.unwrap();
674	chosen_indices = (chosen_indices << `2`) \| idx as u32;
675	}
676
677	// encode the colors
678	let mut color0 = enc565_encode(chosen_colors[`0`]);
679	let mut color1 = enc565_encode(chosen_colors[`1`]);
680
681	// determine encoding. Note that color0 == color1 is impossible at this point
682	if is_dxt1 {
683	if color0 > color1 {
684	if chosen_use_0 {
685	swap(&mut color0, &mut color1);
686	// Indexes are packed 2 bits wide, swap index 0/1 but preserve 2/3.
687	let filter = (chosen_indices & `0xAAAA_AAAA`) >> `1`;
688	chosen_indices ^= filter ^ `0x5555_5555`;
689	}
690	} else if !chosen_use_0 {
691	swap(&mut color0, &mut color1);
692	// Indexes are packed 2 bits wide, swap index 0/1 and 2/3.
693	chosen_indices ^= `0x5555_5555`;
694	}
695	}
696
697	// encode everything.
698	dest[`0`] = color0 as u8;
699	dest[`1`] = (color0 >> `8`) as u8;
700	dest[`2`] = color1 as u8;
701	dest[`3`] = (color1 >> `8`) as u8;
702	for i in `0`..`4` {
703	dest[i + `4`] = (chosen_indices >> (i * `8`)) as u8;
704	}
705	}
706
707	/// Encodes a buffer of 16 alpha bytes into a dxt5 alpha index table,
708	/// where the alpha table they are indexed against is created by
709	/// calling alpha_table_dxt5(alpha0, alpha1)
710	/// returns the resulting error and alpha table
711	fn encode_dxt5_alpha(alpha0: u8, alpha1: u8, alphas: &[u8; `16`]) -> (i32, u64) {
712	// create a table for the given alpha ranges
713	let table: [u8; 8] = alpha_table_dxt5(alpha0, alpha1);
714	let mut indices: u64 = `0u64`;
715	let mut total_error: i32 = `0i32`;
716
717	// least error brute force search
718	for (i: usize, &a: u8) in alphas.iter().enumerate() {
719	let (index: usize, error: i32) = tableOption<(usize, i32)>
720	.iter()
721	.enumerate()
722	.map(\|(i: usize, &e: u8)\| (i, square(i32::from(e) - i32::from(a))))
723	.min_by_key(\|&(_, e: i32)\| e)
724	.unwrap();
725	total_error += error;
726	indices \|= (index as u64) << (i * `3`);
727	}
728
729	(total_error, indices)
730	}
731
732	/// Encodes a RGBAx16 sequence of bytes to a 16 bytes DXT5 block
733	fn encode_dxt5_block(source: &[u8], dest: &mut [u8]) {
734	assert!(source.len() == `64` && dest.len() == `16`);
735
736	// perform dxt color encoding
737	encode_dxt_colors(source, &mut dest[`8`..`16`], `false`);
738
739	// copy out the alpha bytes
740	let mut alphas = [`0`; `16`];
741	for i in `0`..`16` {
742	alphas[i] = source[i * `4` + `3`];
743	}
744
745	// try both alpha compression methods, see which has the least error.
746	let alpha07 = alphas.iter().cloned().min().unwrap();
747	let alpha17 = alphas.iter().cloned().max().unwrap();
748	let (error7, indices7) = encode_dxt5_alpha(alpha07, alpha17, &alphas);
749
750	// if all alphas are 0 or 255 it doesn't particularly matter what we do here.
751	let alpha05 = alphas
752	.iter()
753	.cloned()
754	.filter(\|&i\| i != `255`)
755	.max()
756	.unwrap_or(`255`);
757	let alpha15 = alphas
758	.iter()
759	.cloned()
760	.filter(\|&i\| i != `0`)
761	.min()
762	.unwrap_or(`0`);
763	let (error5, indices5) = encode_dxt5_alpha(alpha05, alpha15, &alphas);
764
765	// pick the best one, encode the min/max values
766	let mut alpha_table = if error5 < error7 {
767	dest[`0`] = alpha05;
768	dest[`1`] = alpha15;
769	indices5
770	} else {
771	dest[`0`] = alpha07;
772	dest[`1`] = alpha17;
773	indices7
774	};
775
776	// encode the alphas
777	for byte in dest[`2`..`8`].iter_mut() {
778	*byte = alpha_table as u8;
779	alpha_table >>= `8`;
780	}
781	}
782
783	/// Encodes a RGBAx16 sequence of bytes into a 16 bytes DXT3 block
784	fn encode_dxt3_block(source: &[u8], dest: &mut [u8]) {
785	assert!(source.len() == `64` && dest.len() == `16`);
786
787	// perform dxt color encoding
788	encode_dxt_colors(source, &mut dest[`8`..`16`], is_dxt1:`false`);
789
790	// DXT3 alpha compression is very simple, just round towards the nearest value
791
792	// index the alpha values into the 64bit alpha table
793	let mut alpha_table: u64 = `0u64`;
794	for i: usize in `0`..`16` {
795	let alpha: u64 = u64::from(source[i * `4` + `3`]);
796	let alpha: u64 = (alpha + `0x8`) / `0x11`;
797	alpha_table \|= alpha << (i * `4`);
798	}
799
800	// encode the alpha values
801	for byte: &mut u8 in &mut dest[`0`..`8`] {
802	*byte = alpha_table as u8;
803	alpha_table >>= `8`;
804	}
805	}
806
807	/// Encodes a RGBx16 sequence of bytes into a 8 bytes DXT1 block
808	fn encode_dxt1_block(source: &[u8], dest: &mut [u8]) {
809	assert!(source.len() == `48` && dest.len() == `8`);
810
811	// perform dxt color encoding
812	encode_dxt_colors(source, dest, is_dxt1:`true`);
813	}
814
815	/// Decode a row of DXT1 data to four rows of RGBA data.
816	/// source.len() should be a multiple of 8, otherwise this panics.
817	fn encode_dxt1_row(source: &[u8]) -> Vec<u8> {
818	assert!(source.len() % `48` == `0`);
819	let block_count: usize = source.len() / `48`;
820
821	let mut dest: Vec = vec![`0u8`; block_count * `8`];
822	// contains the 16 decoded pixels per block
823	let mut decoded_block: [u8; 48] = [`0u8`; `48`];
824
825	for (x: usize, encoded_block: &mut [u8]) in dest.chunks_mut(chunk_size:`8`).enumerate() {
826	// copy the values from the decoded block to linewise RGB layout
827	for line: usize in `0`..`4` {
828	let offset: usize = (block_count * line + x) * `12`;
829	decoded_block[line * `12`..(line + `1`) * `12`].copy_from_slice(&source[offset..offset + `12`]);
830	}
831
832	encode_dxt1_block(&decoded_block, dest:encoded_block);
833	}
834	dest
835	}
836
837	/// Decode a row of DXT3 data to four rows of RGBA data.
838	/// source.len() should be a multiple of 16, otherwise this panics.
839	fn encode_dxt3_row(source: &[u8]) -> Vec<u8> {
840	assert!(source.len() % `64` == `0`);
841	let block_count: usize = source.len() / `64`;
842
843	let mut dest: Vec = vec![`0u8`; block_count * `16`];
844	// contains the 16 decoded pixels per block
845	let mut decoded_block: [u8; 64] = [`0u8`; `64`];
846
847	for (x: usize, encoded_block: &mut [u8]) in dest.chunks_mut(chunk_size:`16`).enumerate() {
848	// copy the values from the decoded block to linewise RGB layout
849	for line: usize in `0`..`4` {
850	let offset: usize = (block_count * line + x) * `16`;
851	decoded_block[line * `16`..(line + `1`) * `16`].copy_from_slice(&source[offset..offset + `16`]);
852	}
853
854	encode_dxt3_block(&decoded_block, dest:encoded_block);
855	}
856	dest
857	}
858
859	/// Decode a row of DXT5 data to four rows of RGBA data.
860	/// source.len() should be a multiple of 16, otherwise this panics.
861	fn encode_dxt5_row(source: &[u8]) -> Vec<u8> {
862	assert!(source.len() % `64` == `0`);
863	let block_count: usize = source.len() / `64`;
864
865	let mut dest: Vec = vec![`0u8`; block_count * `16`];
866	// contains the 16 decoded pixels per block
867	let mut decoded_block: [u8; 64] = [`0u8`; `64`];
868
869	for (x: usize, encoded_block: &mut [u8]) in dest.chunks_mut(chunk_size:`16`).enumerate() {
870	// copy the values from the decoded block to linewise RGB layout
871	for line: usize in `0`..`4` {
872	let offset: usize = (block_count * line + x) * `16`;
873	decoded_block[line * `16`..(line + `1`) * `16`].copy_from_slice(&source[offset..offset + `16`]);
874	}
875
876	encode_dxt5_block(&decoded_block, dest:encoded_block);
877	}
878	dest
879	}
880