1//! Decoding of DXT (S3TC) compression
2//!
3//! DXT is an image format that supports lossy compression
4//!
5//! # Related Links
6//! * <https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_compression_s3tc.txt> - Description of the DXT compression OpenGL extensions.
7//!
8//! Note: this module only implements bare DXT encoding/decoding, it does not parse formats that can contain DXT files like .dds
9
10use std::io::{self, Read, Seek, SeekFrom, Write};
11
12use crate::color::ColorType;
13use crate::error::{ImageError, ImageResult, ParameterError, ParameterErrorKind};
14use crate::image::{self, ImageDecoder, ImageDecoderRect, ImageReadBuffer, Progress};
15
16/// What version of DXT compression are we using?
17/// Note that DXT2 and DXT4 are left away as they're
18/// just DXT3 and DXT5 with premultiplied alpha
19#[derive(Clone, Copy, Debug, PartialEq, Eq)]
20pub enum DxtVariant {
21 /// The DXT1 format. 48 bytes of RGB data in a 4x4 pixel square is
22 /// compressed into an 8 byte block of DXT1 data
23 DXT1,
24 /// The DXT3 format. 64 bytes of RGBA data in a 4x4 pixel square is
25 /// compressed into a 16 byte block of DXT3 data
26 DXT3,
27 /// The DXT5 format. 64 bytes of RGBA data in a 4x4 pixel square is
28 /// compressed into a 16 byte block of DXT5 data
29 DXT5,
30}
31
32impl DxtVariant {
33 /// Returns the amount of bytes of raw image data
34 /// that is encoded in a single DXTn block
35 fn decoded_bytes_per_block(self) -> usize {
36 match self {
37 DxtVariant::DXT1 => 48,
38 DxtVariant::DXT3 | DxtVariant::DXT5 => 64,
39 }
40 }
41
42 /// Returns the amount of bytes per block of encoded DXTn data
43 fn encoded_bytes_per_block(self) -> usize {
44 match self {
45 DxtVariant::DXT1 => 8,
46 DxtVariant::DXT3 | DxtVariant::DXT5 => 16,
47 }
48 }
49
50 /// Returns the color type that is stored in this DXT variant
51 pub fn color_type(self) -> ColorType {
52 match self {
53 DxtVariant::DXT1 => ColorType::Rgb8,
54 DxtVariant::DXT3 | DxtVariant::DXT5 => ColorType::Rgba8,
55 }
56 }
57}
58
59/// DXT decoder
60pub struct DxtDecoder<R: Read> {
61 inner: R,
62 width_blocks: u32,
63 height_blocks: u32,
64 variant: DxtVariant,
65 row: u32,
66}
67
68impl<R: Read> DxtDecoder<R> {
69 /// Create a new DXT decoder that decodes from the stream ```r```.
70 /// As DXT is often stored as raw buffers with the width/height
71 /// somewhere else the width and height of the image need
72 /// to be passed in ```width``` and ```height```, as well as the
73 /// DXT variant in ```variant```.
74 /// width and height are required to be powers of 2 and at least 4.
75 /// otherwise an error will be returned
76 pub fn new(
77 r: R,
78 width: u32,
79 height: u32,
80 variant: DxtVariant,
81 ) -> Result<DxtDecoder<R>, ImageError> {
82 if width % 4 != 0 || height % 4 != 0 {
83 // TODO: this is actually a bit of a weird case. We could return `DecodingError` but
84 // it's not really the format that is wrong However, the encoder should surely return
85 // `EncodingError` so it would be the logical choice for symmetry.
86 return Err(ImageError::Parameter(ParameterError::from_kind(
87 ParameterErrorKind::DimensionMismatch,
88 )));
89 }
90 let width_blocks = width / 4;
91 let height_blocks = height / 4;
92 Ok(DxtDecoder {
93 inner: r,
94 width_blocks,
95 height_blocks,
96 variant,
97 row: 0,
98 })
99 }
100
101 fn read_scanline(&mut self, buf: &mut [u8]) -> io::Result<usize> {
102 assert_eq!(
103 u64::try_from(buf.len()),
104 Ok(
105 #[allow(deprecated)]
106 self.scanline_bytes()
107 )
108 );
109
110 let mut src =
111 vec![0u8; self.variant.encoded_bytes_per_block() * self.width_blocks as usize];
112 self.inner.read_exact(&mut src)?;
113 match self.variant {
114 DxtVariant::DXT1 => decode_dxt1_row(&src, buf),
115 DxtVariant::DXT3 => decode_dxt3_row(&src, buf),
116 DxtVariant::DXT5 => decode_dxt5_row(&src, buf),
117 }
118 self.row += 1;
119 Ok(buf.len())
120 }
121}
122
123// Note that, due to the way that DXT compression works, a scanline is considered to consist out of
124// 4 lines of pixels.
125impl<'a, R: 'a + Read> ImageDecoder<'a> for DxtDecoder<R> {
126 type Reader = DxtReader<R>;
127
128 fn dimensions(&self) -> (u32, u32) {
129 (self.width_blocks * 4, self.height_blocks * 4)
130 }
131
132 fn color_type(&self) -> ColorType {
133 self.variant.color_type()
134 }
135
136 fn scanline_bytes(&self) -> u64 {
137 self.variant.decoded_bytes_per_block() as u64 * u64::from(self.width_blocks)
138 }
139
140 fn into_reader(self) -> ImageResult<Self::Reader> {
141 Ok(DxtReader {
142 buffer: ImageReadBuffer::new(
143 #[allow(deprecated)]
144 self.scanline_bytes(),
145 self.total_bytes(),
146 ),
147 decoder: self,
148 })
149 }
150
151 fn read_image(mut self, buf: &mut [u8]) -> ImageResult<()> {
152 assert_eq!(u64::try_from(buf.len()), Ok(self.total_bytes()));
153
154 #[allow(deprecated)]
155 for chunk in buf.chunks_mut(self.scanline_bytes().max(1) as usize) {
156 self.read_scanline(chunk)?;
157 }
158 Ok(())
159 }
160}
161
162impl<'a, R: 'a + Read + Seek> ImageDecoderRect<'a> for DxtDecoder<R> {
163 fn read_rect_with_progress<F: Fn(Progress)>(
164 &mut self,
165 x: u32,
166 y: u32,
167 width: u32,
168 height: u32,
169 buf: &mut [u8],
170 progress_callback: F,
171 ) -> ImageResult<()> {
172 let encoded_scanline_bytes =
173 self.variant.encoded_bytes_per_block() as u64 * u64::from(self.width_blocks);
174
175 let start = self.inner.stream_position()?;
176 image::load_rect(
177 x,
178 y,
179 width,
180 height,
181 buf,
182 progress_callback,
183 self,
184 |s, scanline| {
185 s.inner
186 .seek(SeekFrom::Start(start + scanline * encoded_scanline_bytes))?;
187 Ok(())
188 },
189 |s, buf| s.read_scanline(buf).map(|_| ()),
190 )?;
191 self.inner.seek(SeekFrom::Start(start))?;
192 Ok(())
193 }
194}
195
196/// DXT reader
197pub struct DxtReader<R: Read> {
198 buffer: ImageReadBuffer,
199 decoder: DxtDecoder<R>,
200}
201
202impl<R: Read> Read for DxtReader<R> {
203 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
204 let decoder: &mut DxtDecoder = &mut self.decoder;
205 self.buffer.read(buf, |buf: &mut [u8]| decoder.read_scanline(buf))
206 }
207}
208
209/// DXT encoder
210pub struct DxtEncoder<W: Write> {
211 w: W,
212}
213
214impl<W: Write> DxtEncoder<W> {
215 /// Create a new encoder that writes its output to ```w```
216 pub fn new(w: W) -> DxtEncoder<W> {
217 DxtEncoder { w }
218 }
219
220 /// Encodes the image data ```data```
221 /// that has dimensions ```width``` and ```height```
222 /// in ```DxtVariant``` ```variant```
223 /// data is assumed to be in variant.color_type()
224 pub fn encode(
225 mut self,
226 data: &[u8],
227 width: u32,
228 height: u32,
229 variant: DxtVariant,
230 ) -> ImageResult<()> {
231 if width % 4 != 0 || height % 4 != 0 {
232 // TODO: this is not very idiomatic yet. Should return an EncodingError.
233 return Err(ImageError::Parameter(ParameterError::from_kind(
234 ParameterErrorKind::DimensionMismatch,
235 )));
236 }
237 let width_blocks = width / 4;
238 let height_blocks = height / 4;
239
240 let stride = variant.decoded_bytes_per_block();
241
242 assert!(data.len() >= width_blocks as usize * height_blocks as usize * stride);
243
244 for chunk in data.chunks(width_blocks as usize * stride) {
245 let data = match variant {
246 DxtVariant::DXT1 => encode_dxt1_row(chunk),
247 DxtVariant::DXT3 => encode_dxt3_row(chunk),
248 DxtVariant::DXT5 => encode_dxt5_row(chunk),
249 };
250 self.w.write_all(&data)?;
251 }
252 Ok(())
253 }
254}
255
256/**
257 * Actual encoding/decoding logic below.
258 */
259use std::mem::swap;
260
261type Rgb = [u8; 3];
262
263/// decodes a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value into 8-bit RGB
264/// mapping is done so min/max range values are preserved. So for 5-bit
265/// values 0x00 -> 0x00 and 0x1F -> 0xFF
266fn enc565_decode(value: u16) -> Rgb {
267 let red: u16 = (value >> 11) & 0x1F;
268 let green: u16 = (value >> 5) & 0x3F;
269 let blue: u16 = (value) & 0x1F;
270 [
271 (red * 0xFF / 0x1F) as u8,
272 (green * 0xFF / 0x3F) as u8,
273 (blue * 0xFF / 0x1F) as u8,
274 ]
275}
276
277/// encodes an 8-bit RGB value into a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value
278/// mapping preserves min/max values. It is guaranteed that i == encode(decode(i)) for all i
279fn enc565_encode(rgb: Rgb) -> u16 {
280 let red: u16 = (u16::from(rgb[0]) * 0x1F + 0x7E) / 0xFF;
281 let green: u16 = (u16::from(rgb[1]) * 0x3F + 0x7E) / 0xFF;
282 let blue: u16 = (u16::from(rgb[2]) * 0x1F + 0x7E) / 0xFF;
283 (red << 11) | (green << 5) | blue
284}
285
286/// utility function: squares a value
287fn square(a: i32) -> i32 {
288 a * a
289}
290
291/// returns the squared error between two RGB values
292fn diff(a: Rgb, b: Rgb) -> i32 {
293 square(i32::from(a[0]) - i32::from(b[0]))
294 + square(i32::from(a[1]) - i32::from(b[1]))
295 + square(i32::from(a[2]) - i32::from(b[2]))
296}
297
298/*
299 * Functions for decoding DXT compression
300 */
301
302/// Constructs the DXT5 alpha lookup table from the two alpha entries
303/// if alpha0 > alpha1, constructs a table of [a0, a1, 6 linearly interpolated values from a0 to a1]
304/// if alpha0 <= alpha1, constructs a table of [a0, a1, 4 linearly interpolated values from a0 to a1, 0, 0xFF]
305fn alpha_table_dxt5(alpha0: u8, alpha1: u8) -> [u8; 8] {
306 let mut table: [u8; 8] = [alpha0, alpha1, 0, 0, 0, 0, 0, 0xFF];
307 if alpha0 > alpha1 {
308 for i: u16 in 2..8u16 {
309 table[i as usize] =
310 (((8 - i) * u16::from(alpha0) + (i - 1) * u16::from(alpha1)) / 7) as u8;
311 }
312 } else {
313 for i: u16 in 2..6u16 {
314 table[i as usize] =
315 (((6 - i) * u16::from(alpha0) + (i - 1) * u16::from(alpha1)) / 5) as u8;
316 }
317 }
318 table
319}
320
321/// decodes an 8-byte dxt color block into the RGB channels of a 16xRGB or 16xRGBA block.
322/// source should have a length of 8, dest a length of 48 (RGB) or 64 (RGBA)
323fn decode_dxt_colors(source: &[u8], dest: &mut [u8], is_dxt1: bool) {
324 // sanity checks, also enable the compiler to elide all following bound checks
325 assert!(source.len() == 8 && (dest.len() == 48 || dest.len() == 64));
326 // calculate pitch to store RGB values in dest (3 for RGB, 4 for RGBA)
327 let pitch = dest.len() / 16;
328
329 // extract color data
330 let color0 = u16::from(source[0]) | (u16::from(source[1]) << 8);
331 let color1 = u16::from(source[2]) | (u16::from(source[3]) << 8);
332 let color_table = u32::from(source[4])
333 | (u32::from(source[5]) << 8)
334 | (u32::from(source[6]) << 16)
335 | (u32::from(source[7]) << 24);
336 // let color_table = source[4..8].iter().rev().fold(0, |t, &b| (t << 8) | b as u32);
337
338 // decode the colors to rgb format
339 let mut colors = [[0; 3]; 4];
340 colors[0] = enc565_decode(color0);
341 colors[1] = enc565_decode(color1);
342
343 // determine color interpolation method
344 if color0 > color1 || !is_dxt1 {
345 // linearly interpolate the other two color table entries
346 for i in 0..3 {
347 colors[2][i] = ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8;
348 colors[3][i] = ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8;
349 }
350 } else {
351 // linearly interpolate one other entry, keep the other at 0
352 for i in 0..3 {
353 colors[2][i] = ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8;
354 }
355 }
356
357 // serialize the result. Every color is determined by looking up
358 // two bits in color_table which identify which color to actually pick from the 4 possible colors
359 for i in 0..16 {
360 dest[i * pitch..i * pitch + 3]
361 .copy_from_slice(&colors[(color_table >> (i * 2)) as usize & 3]);
362 }
363}
364
365/// Decodes a 16-byte bock of dxt5 data to a 16xRGBA block
366fn decode_dxt5_block(source: &[u8], dest: &mut [u8]) {
367 assert!(source.len() == 16 && dest.len() == 64);
368
369 // extract alpha index table (stored as little endian 64-bit value)
370 let alpha_table: u64 = source[2..8]
371 .iter()
372 .rev()
373 .fold(init:0, |t: u64, &b: u8| (t << 8) | u64::from(b));
374
375 // alhpa level decode
376 let alphas: [u8; 8] = alpha_table_dxt5(alpha0:source[0], alpha1:source[1]);
377
378 // serialize alpha
379 for i: usize in 0..16 {
380 dest[i * 4 + 3] = alphas[(alpha_table >> (i * 3)) as usize & 7];
381 }
382
383 // handle colors
384 decode_dxt_colors(&source[8..16], dest, is_dxt1:false);
385}
386
387/// Decodes a 16-byte bock of dxt3 data to a 16xRGBA block
388fn decode_dxt3_block(source: &[u8], dest: &mut [u8]) {
389 assert!(source.len() == 16 && dest.len() == 64);
390
391 // extract alpha index table (stored as little endian 64-bit value)
392 let alpha_table: u64 = source[0..8]
393 .iter()
394 .rev()
395 .fold(init:0, |t: u64, &b: u8| (t << 8) | u64::from(b));
396
397 // serialize alpha (stored as 4-bit values)
398 for i: usize in 0..16 {
399 dest[i * 4 + 3] = ((alpha_table >> (i * 4)) as u8 & 0xF) * 0x11;
400 }
401
402 // handle colors
403 decode_dxt_colors(&source[8..16], dest, is_dxt1:false);
404}
405
406/// Decodes a 8-byte bock of dxt5 data to a 16xRGB block
407fn decode_dxt1_block(source: &[u8], dest: &mut [u8]) {
408 assert!(source.len() == 8 && dest.len() == 48);
409 decode_dxt_colors(source, dest, is_dxt1:true);
410}
411
412/// Decode a row of DXT1 data to four rows of RGB data.
413/// source.len() should be a multiple of 8, otherwise this panics.
414fn decode_dxt1_row(source: &[u8], dest: &mut [u8]) {
415 assert!(source.len() % 8 == 0);
416 let block_count: usize = source.len() / 8;
417 assert!(dest.len() >= block_count * 48);
418
419 // contains the 16 decoded pixels per block
420 let mut decoded_block: [u8; 48] = [0u8; 48];
421
422 for (x: usize, encoded_block: &[u8]) in source.chunks(chunk_size:8).enumerate() {
423 decode_dxt1_block(source:encoded_block, &mut decoded_block);
424
425 // copy the values from the decoded block to linewise RGB layout
426 for line: usize in 0..4 {
427 let offset: usize = (block_count * line + x) * 12;
428 dest[offset..offset + 12].copy_from_slice(&decoded_block[line * 12..(line + 1) * 12]);
429 }
430 }
431}
432
433/// Decode a row of DXT3 data to four rows of RGBA data.
434/// source.len() should be a multiple of 16, otherwise this panics.
435fn decode_dxt3_row(source: &[u8], dest: &mut [u8]) {
436 assert!(source.len() % 16 == 0);
437 let block_count: usize = source.len() / 16;
438 assert!(dest.len() >= block_count * 64);
439
440 // contains the 16 decoded pixels per block
441 let mut decoded_block: [u8; 64] = [0u8; 64];
442
443 for (x: usize, encoded_block: &[u8]) in source.chunks(chunk_size:16).enumerate() {
444 decode_dxt3_block(source:encoded_block, &mut decoded_block);
445
446 // copy the values from the decoded block to linewise RGB layout
447 for line: usize in 0..4 {
448 let offset: usize = (block_count * line + x) * 16;
449 dest[offset..offset + 16].copy_from_slice(&decoded_block[line * 16..(line + 1) * 16]);
450 }
451 }
452}
453
454/// Decode a row of DXT5 data to four rows of RGBA data.
455/// source.len() should be a multiple of 16, otherwise this panics.
456fn decode_dxt5_row(source: &[u8], dest: &mut [u8]) {
457 assert!(source.len() % 16 == 0);
458 let block_count: usize = source.len() / 16;
459 assert!(dest.len() >= block_count * 64);
460
461 // contains the 16 decoded pixels per block
462 let mut decoded_block: [u8; 64] = [0u8; 64];
463
464 for (x: usize, encoded_block: &[u8]) in source.chunks(chunk_size:16).enumerate() {
465 decode_dxt5_block(source:encoded_block, &mut decoded_block);
466
467 // copy the values from the decoded block to linewise RGB layout
468 for line: usize in 0..4 {
469 let offset: usize = (block_count * line + x) * 16;
470 dest[offset..offset + 16].copy_from_slice(&decoded_block[line * 16..(line + 1) * 16]);
471 }
472 }
473}
474
475/*
476 * Functions for encoding DXT compression
477 */
478
479/// Tries to perform the color encoding part of dxt compression
480/// the approach taken is simple, it picks unique combinations
481/// of the colors present in the block, and attempts to encode the
482/// block with each, picking the encoding that yields the least
483/// squared error out of all of them.
484///
485/// This could probably be faster but is already reasonably fast
486/// and a good reference impl to optimize others against.
487///
488/// Another way to perform this analysis would be to perform a
489/// singular value decomposition of the different colors, and
490/// then pick 2 points on this line as the base colors. But
491/// this is still rather unwieldy math and has issues
492/// with the 3-linear-colors-and-0 case, it's also worse
493/// at conserving the original colors.
494///
495/// source: should be RGBAx16 or RGBx16 bytes of data,
496/// dest 8 bytes of resulting encoded color data
497fn encode_dxt_colors(source: &[u8], dest: &mut [u8], is_dxt1: bool) {
498 // sanity checks and determine stride when parsing the source data
499 assert!((source.len() == 64 || source.len() == 48) && dest.len() == 8);
500 let stride = source.len() / 16;
501
502 // reference colors array
503 let mut colors = [[0u8; 3]; 4];
504
505 // Put the colors we're going to be processing in an array with pure RGB layout
506 // note: we reverse the pixel order here. The reason for this is found in the inner quantization loop.
507 let mut targets = [[0u8; 3]; 16];
508 for (s, d) in source.chunks(stride).rev().zip(&mut targets) {
509 *d = [s[0], s[1], s[2]];
510 }
511
512 // roundtrip all colors through the r5g6b5 encoding
513 for rgb in &mut targets {
514 *rgb = enc565_decode(enc565_encode(*rgb));
515 }
516
517 // and deduplicate the set of colors to choose from as the algorithm is O(N^2) in this
518 let mut colorspace_ = [[0u8; 3]; 16];
519 let mut colorspace_len = 0;
520 for color in &targets {
521 if !colorspace_[..colorspace_len].contains(color) {
522 colorspace_[colorspace_len] = *color;
523 colorspace_len += 1;
524 }
525 }
526 let mut colorspace = &colorspace_[..colorspace_len];
527
528 // in case of slight gradients it can happen that there's only one entry left in the color table.
529 // as the resulting banding can be quite bad if we would just left the block at the closest
530 // encodable color, we have a special path here that tries to emulate the wanted color
531 // using the linear interpolation between gradients
532 if colorspace.len() == 1 {
533 // the base color we got from colorspace reduction
534 let ref_rgb = colorspace[0];
535 // the unreduced color in this block that's the furthest away from the actual block
536 let mut rgb = targets
537 .iter()
538 .cloned()
539 .max_by_key(|rgb| diff(*rgb, ref_rgb))
540 .unwrap();
541 // amplify differences by 2.5, which should push them to the next quantized value
542 // if possible without overshoot
543 for i in 0..3 {
544 rgb[i] =
545 ((i16::from(rgb[i]) - i16::from(ref_rgb[i])) * 5 / 2 + i16::from(ref_rgb[i])) as u8;
546 }
547
548 // roundtrip it through quantization
549 let encoded = enc565_encode(rgb);
550 let rgb = enc565_decode(encoded);
551
552 // in case this didn't land us a different color the best way to represent this field is
553 // as a single color block
554 if rgb == ref_rgb {
555 dest[0] = encoded as u8;
556 dest[1] = (encoded >> 8) as u8;
557
558 for d in dest.iter_mut().take(8).skip(2) {
559 *d = 0;
560 }
561 return;
562 }
563
564 // we did find a separate value: add it to the options so after one round of quantization
565 // we're done
566 colorspace_[1] = rgb;
567 colorspace = &colorspace_[..2];
568 }
569
570 // block quantization loop: we basically just try every possible combination, returning
571 // the combination with the least squared error
572 // stores the best candidate colors
573 let mut chosen_colors = [[0; 3]; 4];
574 // did this index table use the [0,0,0] variant
575 let mut chosen_use_0 = false;
576 // error calculated for the last entry
577 let mut chosen_error = 0xFFFF_FFFFu32;
578
579 // loop through unique permutations of the colorspace, where c1 != c2
580 'search: for (i, &c1) in colorspace.iter().enumerate() {
581 colors[0] = c1;
582
583 for &c2 in &colorspace[0..i] {
584 colors[1] = c2;
585
586 if is_dxt1 {
587 // what's inside here is ran at most 120 times.
588 for use_0 in 0..2 {
589 // and 240 times here.
590
591 if use_0 != 0 {
592 // interpolate one color, set the other to 0
593 for i in 0..3 {
594 colors[2][i] =
595 ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8;
596 }
597 colors[3] = [0, 0, 0];
598 } else {
599 // interpolate to get 2 more colors
600 for i in 0..3 {
601 colors[2][i] =
602 ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3)
603 as u8;
604 colors[3][i] =
605 ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3)
606 as u8;
607 }
608 }
609
610 // calculate the total error if we were to quantize the block with these color combinations
611 // both these loops have statically known iteration counts and are well vectorizable
612 // note that the inside of this can be run about 15360 times worst case, i.e. 960 times per
613 // pixel.
614 let total_error = targets
615 .iter()
616 .map(|t| colors.iter().map(|c| diff(*c, *t) as u32).min().unwrap())
617 .sum();
618
619 // update the match if we found a better one
620 if total_error < chosen_error {
621 chosen_colors = colors;
622 chosen_use_0 = use_0 != 0;
623 chosen_error = total_error;
624
625 // if we've got a perfect or at most 1 LSB off match, we're done
626 if total_error < 4 {
627 break 'search;
628 }
629 }
630 }
631 } else {
632 // what's inside here is ran at most 120 times.
633
634 // interpolate to get 2 more colors
635 for i in 0..3 {
636 colors[2][i] =
637 ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8;
638 colors[3][i] =
639 ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8;
640 }
641
642 // calculate the total error if we were to quantize the block with these color combinations
643 // both these loops have statically known iteration counts and are well vectorizable
644 // note that the inside of this can be run about 15360 times worst case, i.e. 960 times per
645 // pixel.
646 let total_error = targets
647 .iter()
648 .map(|t| colors.iter().map(|c| diff(*c, *t) as u32).min().unwrap())
649 .sum();
650
651 // update the match if we found a better one
652 if total_error < chosen_error {
653 chosen_colors = colors;
654 chosen_error = total_error;
655
656 // if we've got a perfect or at most 1 LSB off match, we're done
657 if total_error < 4 {
658 break 'search;
659 }
660 }
661 }
662 }
663 }
664
665 // calculate the final indices
666 // note that targets is already in reverse pixel order, to make the index computation easy.
667 let mut chosen_indices = 0u32;
668 for t in &targets {
669 let (idx, _) = chosen_colors
670 .iter()
671 .enumerate()
672 .min_by_key(|&(_, c)| diff(*c, *t))
673 .unwrap();
674 chosen_indices = (chosen_indices << 2) | idx as u32;
675 }
676
677 // encode the colors
678 let mut color0 = enc565_encode(chosen_colors[0]);
679 let mut color1 = enc565_encode(chosen_colors[1]);
680
681 // determine encoding. Note that color0 == color1 is impossible at this point
682 if is_dxt1 {
683 if color0 > color1 {
684 if chosen_use_0 {
685 swap(&mut color0, &mut color1);
686 // Indexes are packed 2 bits wide, swap index 0/1 but preserve 2/3.
687 let filter = (chosen_indices & 0xAAAA_AAAA) >> 1;
688 chosen_indices ^= filter ^ 0x5555_5555;
689 }
690 } else if !chosen_use_0 {
691 swap(&mut color0, &mut color1);
692 // Indexes are packed 2 bits wide, swap index 0/1 and 2/3.
693 chosen_indices ^= 0x5555_5555;
694 }
695 }
696
697 // encode everything.
698 dest[0] = color0 as u8;
699 dest[1] = (color0 >> 8) as u8;
700 dest[2] = color1 as u8;
701 dest[3] = (color1 >> 8) as u8;
702 for i in 0..4 {
703 dest[i + 4] = (chosen_indices >> (i * 8)) as u8;
704 }
705}
706
707/// Encodes a buffer of 16 alpha bytes into a dxt5 alpha index table,
708/// where the alpha table they are indexed against is created by
709/// calling alpha_table_dxt5(alpha0, alpha1)
710/// returns the resulting error and alpha table
711fn encode_dxt5_alpha(alpha0: u8, alpha1: u8, alphas: &[u8; 16]) -> (i32, u64) {
712 // create a table for the given alpha ranges
713 let table: [u8; 8] = alpha_table_dxt5(alpha0, alpha1);
714 let mut indices: u64 = 0u64;
715 let mut total_error: i32 = 0i32;
716
717 // least error brute force search
718 for (i: usize, &a: u8) in alphas.iter().enumerate() {
719 let (index: usize, error: i32) = tableOption<(usize, i32)>
720 .iter()
721 .enumerate()
722 .map(|(i: usize, &e: u8)| (i, square(i32::from(e) - i32::from(a))))
723 .min_by_key(|&(_, e: i32)| e)
724 .unwrap();
725 total_error += error;
726 indices |= (index as u64) << (i * 3);
727 }
728
729 (total_error, indices)
730}
731
732/// Encodes a RGBAx16 sequence of bytes to a 16 bytes DXT5 block
733fn encode_dxt5_block(source: &[u8], dest: &mut [u8]) {
734 assert!(source.len() == 64 && dest.len() == 16);
735
736 // perform dxt color encoding
737 encode_dxt_colors(source, &mut dest[8..16], false);
738
739 // copy out the alpha bytes
740 let mut alphas = [0; 16];
741 for i in 0..16 {
742 alphas[i] = source[i * 4 + 3];
743 }
744
745 // try both alpha compression methods, see which has the least error.
746 let alpha07 = alphas.iter().cloned().min().unwrap();
747 let alpha17 = alphas.iter().cloned().max().unwrap();
748 let (error7, indices7) = encode_dxt5_alpha(alpha07, alpha17, &alphas);
749
750 // if all alphas are 0 or 255 it doesn't particularly matter what we do here.
751 let alpha05 = alphas
752 .iter()
753 .cloned()
754 .filter(|&i| i != 255)
755 .max()
756 .unwrap_or(255);
757 let alpha15 = alphas
758 .iter()
759 .cloned()
760 .filter(|&i| i != 0)
761 .min()
762 .unwrap_or(0);
763 let (error5, indices5) = encode_dxt5_alpha(alpha05, alpha15, &alphas);
764
765 // pick the best one, encode the min/max values
766 let mut alpha_table = if error5 < error7 {
767 dest[0] = alpha05;
768 dest[1] = alpha15;
769 indices5
770 } else {
771 dest[0] = alpha07;
772 dest[1] = alpha17;
773 indices7
774 };
775
776 // encode the alphas
777 for byte in dest[2..8].iter_mut() {
778 *byte = alpha_table as u8;
779 alpha_table >>= 8;
780 }
781}
782
783/// Encodes a RGBAx16 sequence of bytes into a 16 bytes DXT3 block
784fn encode_dxt3_block(source: &[u8], dest: &mut [u8]) {
785 assert!(source.len() == 64 && dest.len() == 16);
786
787 // perform dxt color encoding
788 encode_dxt_colors(source, &mut dest[8..16], is_dxt1:false);
789
790 // DXT3 alpha compression is very simple, just round towards the nearest value
791
792 // index the alpha values into the 64bit alpha table
793 let mut alpha_table: u64 = 0u64;
794 for i: usize in 0..16 {
795 let alpha: u64 = u64::from(source[i * 4 + 3]);
796 let alpha: u64 = (alpha + 0x8) / 0x11;
797 alpha_table |= alpha << (i * 4);
798 }
799
800 // encode the alpha values
801 for byte: &mut u8 in &mut dest[0..8] {
802 *byte = alpha_table as u8;
803 alpha_table >>= 8;
804 }
805}
806
807/// Encodes a RGBx16 sequence of bytes into a 8 bytes DXT1 block
808fn encode_dxt1_block(source: &[u8], dest: &mut [u8]) {
809 assert!(source.len() == 48 && dest.len() == 8);
810
811 // perform dxt color encoding
812 encode_dxt_colors(source, dest, is_dxt1:true);
813}
814
815/// Decode a row of DXT1 data to four rows of RGBA data.
816/// source.len() should be a multiple of 8, otherwise this panics.
817fn encode_dxt1_row(source: &[u8]) -> Vec<u8> {
818 assert!(source.len() % 48 == 0);
819 let block_count: usize = source.len() / 48;
820
821 let mut dest: Vec = vec![0u8; block_count * 8];
822 // contains the 16 decoded pixels per block
823 let mut decoded_block: [u8; 48] = [0u8; 48];
824
825 for (x: usize, encoded_block: &mut [u8]) in dest.chunks_mut(chunk_size:8).enumerate() {
826 // copy the values from the decoded block to linewise RGB layout
827 for line: usize in 0..4 {
828 let offset: usize = (block_count * line + x) * 12;
829 decoded_block[line * 12..(line + 1) * 12].copy_from_slice(&source[offset..offset + 12]);
830 }
831
832 encode_dxt1_block(&decoded_block, dest:encoded_block);
833 }
834 dest
835}
836
837/// Decode a row of DXT3 data to four rows of RGBA data.
838/// source.len() should be a multiple of 16, otherwise this panics.
839fn encode_dxt3_row(source: &[u8]) -> Vec<u8> {
840 assert!(source.len() % 64 == 0);
841 let block_count: usize = source.len() / 64;
842
843 let mut dest: Vec = vec![0u8; block_count * 16];
844 // contains the 16 decoded pixels per block
845 let mut decoded_block: [u8; 64] = [0u8; 64];
846
847 for (x: usize, encoded_block: &mut [u8]) in dest.chunks_mut(chunk_size:16).enumerate() {
848 // copy the values from the decoded block to linewise RGB layout
849 for line: usize in 0..4 {
850 let offset: usize = (block_count * line + x) * 16;
851 decoded_block[line * 16..(line + 1) * 16].copy_from_slice(&source[offset..offset + 16]);
852 }
853
854 encode_dxt3_block(&decoded_block, dest:encoded_block);
855 }
856 dest
857}
858
859/// Decode a row of DXT5 data to four rows of RGBA data.
860/// source.len() should be a multiple of 16, otherwise this panics.
861fn encode_dxt5_row(source: &[u8]) -> Vec<u8> {
862 assert!(source.len() % 64 == 0);
863 let block_count: usize = source.len() / 64;
864
865 let mut dest: Vec = vec![0u8; block_count * 16];
866 // contains the 16 decoded pixels per block
867 let mut decoded_block: [u8; 64] = [0u8; 64];
868
869 for (x: usize, encoded_block: &mut [u8]) in dest.chunks_mut(chunk_size:16).enumerate() {
870 // copy the values from the decoded block to linewise RGB layout
871 for line: usize in 0..4 {
872 let offset: usize = (block_count * line + x) * 16;
873 decoded_block[line * 16..(line + 1) * 16].copy_from_slice(&source[offset..offset + 16]);
874 }
875
876 encode_dxt5_block(&decoded_block, dest:encoded_block);
877 }
878 dest
879}
880