1 | //! Decoding of DXT (S3TC) compression |
2 | //! |
3 | //! DXT is an image format that supports lossy compression |
4 | //! |
5 | //! # Related Links |
6 | //! * <https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_compression_s3tc.txt> - Description of the DXT compression OpenGL extensions. |
7 | //! |
8 | //! Note: this module only implements bare DXT encoding/decoding, it does not parse formats that can contain DXT files like .dds |
9 | |
10 | use std::io::{self, Read, Seek, SeekFrom, Write}; |
11 | |
12 | use crate::color::ColorType; |
13 | use crate::error::{ImageError, ImageResult, ParameterError, ParameterErrorKind}; |
14 | use crate::image::{self, ImageDecoder, ImageDecoderRect, ImageReadBuffer, Progress}; |
15 | |
16 | /// What version of DXT compression are we using? |
17 | /// Note that DXT2 and DXT4 are left away as they're |
18 | /// just DXT3 and DXT5 with premultiplied alpha |
19 | #[derive (Clone, Copy, Debug, PartialEq, Eq)] |
20 | pub enum DxtVariant { |
21 | /// The DXT1 format. 48 bytes of RGB data in a 4x4 pixel square is |
22 | /// compressed into an 8 byte block of DXT1 data |
23 | DXT1, |
24 | /// The DXT3 format. 64 bytes of RGBA data in a 4x4 pixel square is |
25 | /// compressed into a 16 byte block of DXT3 data |
26 | DXT3, |
27 | /// The DXT5 format. 64 bytes of RGBA data in a 4x4 pixel square is |
28 | /// compressed into a 16 byte block of DXT5 data |
29 | DXT5, |
30 | } |
31 | |
32 | impl DxtVariant { |
33 | /// Returns the amount of bytes of raw image data |
34 | /// that is encoded in a single DXTn block |
35 | fn decoded_bytes_per_block(self) -> usize { |
36 | match self { |
37 | DxtVariant::DXT1 => 48, |
38 | DxtVariant::DXT3 | DxtVariant::DXT5 => 64, |
39 | } |
40 | } |
41 | |
42 | /// Returns the amount of bytes per block of encoded DXTn data |
43 | fn encoded_bytes_per_block(self) -> usize { |
44 | match self { |
45 | DxtVariant::DXT1 => 8, |
46 | DxtVariant::DXT3 | DxtVariant::DXT5 => 16, |
47 | } |
48 | } |
49 | |
50 | /// Returns the color type that is stored in this DXT variant |
51 | pub fn color_type(self) -> ColorType { |
52 | match self { |
53 | DxtVariant::DXT1 => ColorType::Rgb8, |
54 | DxtVariant::DXT3 | DxtVariant::DXT5 => ColorType::Rgba8, |
55 | } |
56 | } |
57 | } |
58 | |
59 | /// DXT decoder |
60 | pub struct DxtDecoder<R: Read> { |
61 | inner: R, |
62 | width_blocks: u32, |
63 | height_blocks: u32, |
64 | variant: DxtVariant, |
65 | row: u32, |
66 | } |
67 | |
68 | impl<R: Read> DxtDecoder<R> { |
69 | /// Create a new DXT decoder that decodes from the stream ```r```. |
70 | /// As DXT is often stored as raw buffers with the width/height |
71 | /// somewhere else the width and height of the image need |
72 | /// to be passed in ```width``` and ```height```, as well as the |
73 | /// DXT variant in ```variant```. |
74 | /// width and height are required to be powers of 2 and at least 4. |
75 | /// otherwise an error will be returned |
76 | pub fn new( |
77 | r: R, |
78 | width: u32, |
79 | height: u32, |
80 | variant: DxtVariant, |
81 | ) -> Result<DxtDecoder<R>, ImageError> { |
82 | if width % 4 != 0 || height % 4 != 0 { |
83 | // TODO: this is actually a bit of a weird case. We could return `DecodingError` but |
84 | // it's not really the format that is wrong However, the encoder should surely return |
85 | // `EncodingError` so it would be the logical choice for symmetry. |
86 | return Err(ImageError::Parameter(ParameterError::from_kind( |
87 | ParameterErrorKind::DimensionMismatch, |
88 | ))); |
89 | } |
90 | let width_blocks = width / 4; |
91 | let height_blocks = height / 4; |
92 | Ok(DxtDecoder { |
93 | inner: r, |
94 | width_blocks, |
95 | height_blocks, |
96 | variant, |
97 | row: 0, |
98 | }) |
99 | } |
100 | |
101 | fn read_scanline(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
102 | assert_eq!( |
103 | u64::try_from(buf.len()), |
104 | Ok( |
105 | #[allow (deprecated)] |
106 | self.scanline_bytes() |
107 | ) |
108 | ); |
109 | |
110 | let mut src = |
111 | vec![0u8; self.variant.encoded_bytes_per_block() * self.width_blocks as usize]; |
112 | self.inner.read_exact(&mut src)?; |
113 | match self.variant { |
114 | DxtVariant::DXT1 => decode_dxt1_row(&src, buf), |
115 | DxtVariant::DXT3 => decode_dxt3_row(&src, buf), |
116 | DxtVariant::DXT5 => decode_dxt5_row(&src, buf), |
117 | } |
118 | self.row += 1; |
119 | Ok(buf.len()) |
120 | } |
121 | } |
122 | |
123 | // Note that, due to the way that DXT compression works, a scanline is considered to consist out of |
124 | // 4 lines of pixels. |
125 | impl<'a, R: 'a + Read> ImageDecoder<'a> for DxtDecoder<R> { |
126 | type Reader = DxtReader<R>; |
127 | |
128 | fn dimensions(&self) -> (u32, u32) { |
129 | (self.width_blocks * 4, self.height_blocks * 4) |
130 | } |
131 | |
132 | fn color_type(&self) -> ColorType { |
133 | self.variant.color_type() |
134 | } |
135 | |
136 | fn scanline_bytes(&self) -> u64 { |
137 | self.variant.decoded_bytes_per_block() as u64 * u64::from(self.width_blocks) |
138 | } |
139 | |
140 | fn into_reader(self) -> ImageResult<Self::Reader> { |
141 | Ok(DxtReader { |
142 | buffer: ImageReadBuffer::new( |
143 | #[allow (deprecated)] |
144 | self.scanline_bytes(), |
145 | self.total_bytes(), |
146 | ), |
147 | decoder: self, |
148 | }) |
149 | } |
150 | |
151 | fn read_image(mut self, buf: &mut [u8]) -> ImageResult<()> { |
152 | assert_eq!(u64::try_from(buf.len()), Ok(self.total_bytes())); |
153 | |
154 | #[allow (deprecated)] |
155 | for chunk in buf.chunks_mut(self.scanline_bytes().max(1) as usize) { |
156 | self.read_scanline(chunk)?; |
157 | } |
158 | Ok(()) |
159 | } |
160 | } |
161 | |
162 | impl<'a, R: 'a + Read + Seek> ImageDecoderRect<'a> for DxtDecoder<R> { |
163 | fn read_rect_with_progress<F: Fn(Progress)>( |
164 | &mut self, |
165 | x: u32, |
166 | y: u32, |
167 | width: u32, |
168 | height: u32, |
169 | buf: &mut [u8], |
170 | progress_callback: F, |
171 | ) -> ImageResult<()> { |
172 | let encoded_scanline_bytes = |
173 | self.variant.encoded_bytes_per_block() as u64 * u64::from(self.width_blocks); |
174 | |
175 | let start = self.inner.stream_position()?; |
176 | image::load_rect( |
177 | x, |
178 | y, |
179 | width, |
180 | height, |
181 | buf, |
182 | progress_callback, |
183 | self, |
184 | |s, scanline| { |
185 | s.inner |
186 | .seek(SeekFrom::Start(start + scanline * encoded_scanline_bytes))?; |
187 | Ok(()) |
188 | }, |
189 | |s, buf| s.read_scanline(buf).map(|_| ()), |
190 | )?; |
191 | self.inner.seek(SeekFrom::Start(start))?; |
192 | Ok(()) |
193 | } |
194 | } |
195 | |
196 | /// DXT reader |
197 | pub struct DxtReader<R: Read> { |
198 | buffer: ImageReadBuffer, |
199 | decoder: DxtDecoder<R>, |
200 | } |
201 | |
202 | impl<R: Read> Read for DxtReader<R> { |
203 | fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
204 | let decoder: &mut DxtDecoder = &mut self.decoder; |
205 | self.buffer.read(buf, |buf: &mut [u8]| decoder.read_scanline(buf)) |
206 | } |
207 | } |
208 | |
209 | /// DXT encoder |
210 | pub struct DxtEncoder<W: Write> { |
211 | w: W, |
212 | } |
213 | |
214 | impl<W: Write> DxtEncoder<W> { |
215 | /// Create a new encoder that writes its output to ```w``` |
216 | pub fn new(w: W) -> DxtEncoder<W> { |
217 | DxtEncoder { w } |
218 | } |
219 | |
220 | /// Encodes the image data ```data``` |
221 | /// that has dimensions ```width``` and ```height``` |
222 | /// in ```DxtVariant``` ```variant``` |
223 | /// data is assumed to be in variant.color_type() |
224 | pub fn encode( |
225 | mut self, |
226 | data: &[u8], |
227 | width: u32, |
228 | height: u32, |
229 | variant: DxtVariant, |
230 | ) -> ImageResult<()> { |
231 | if width % 4 != 0 || height % 4 != 0 { |
232 | // TODO: this is not very idiomatic yet. Should return an EncodingError. |
233 | return Err(ImageError::Parameter(ParameterError::from_kind( |
234 | ParameterErrorKind::DimensionMismatch, |
235 | ))); |
236 | } |
237 | let width_blocks = width / 4; |
238 | let height_blocks = height / 4; |
239 | |
240 | let stride = variant.decoded_bytes_per_block(); |
241 | |
242 | assert!(data.len() >= width_blocks as usize * height_blocks as usize * stride); |
243 | |
244 | for chunk in data.chunks(width_blocks as usize * stride) { |
245 | let data = match variant { |
246 | DxtVariant::DXT1 => encode_dxt1_row(chunk), |
247 | DxtVariant::DXT3 => encode_dxt3_row(chunk), |
248 | DxtVariant::DXT5 => encode_dxt5_row(chunk), |
249 | }; |
250 | self.w.write_all(&data)?; |
251 | } |
252 | Ok(()) |
253 | } |
254 | } |
255 | |
256 | /** |
257 | * Actual encoding/decoding logic below. |
258 | */ |
259 | use std::mem::swap; |
260 | |
261 | type Rgb = [u8; 3]; |
262 | |
263 | /// decodes a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value into 8-bit RGB |
264 | /// mapping is done so min/max range values are preserved. So for 5-bit |
265 | /// values 0x00 -> 0x00 and 0x1F -> 0xFF |
266 | fn enc565_decode(value: u16) -> Rgb { |
267 | let red: u16 = (value >> 11) & 0x1F; |
268 | let green: u16 = (value >> 5) & 0x3F; |
269 | let blue: u16 = (value) & 0x1F; |
270 | [ |
271 | (red * 0xFF / 0x1F) as u8, |
272 | (green * 0xFF / 0x3F) as u8, |
273 | (blue * 0xFF / 0x1F) as u8, |
274 | ] |
275 | } |
276 | |
277 | /// encodes an 8-bit RGB value into a 5-bit R, 6-bit G, 5-bit B 16-bit packed color value |
278 | /// mapping preserves min/max values. It is guaranteed that i == encode(decode(i)) for all i |
279 | fn enc565_encode(rgb: Rgb) -> u16 { |
280 | let red: u16 = (u16::from(rgb[0]) * 0x1F + 0x7E) / 0xFF; |
281 | let green: u16 = (u16::from(rgb[1]) * 0x3F + 0x7E) / 0xFF; |
282 | let blue: u16 = (u16::from(rgb[2]) * 0x1F + 0x7E) / 0xFF; |
283 | (red << 11) | (green << 5) | blue |
284 | } |
285 | |
286 | /// utility function: squares a value |
287 | fn square(a: i32) -> i32 { |
288 | a * a |
289 | } |
290 | |
291 | /// returns the squared error between two RGB values |
292 | fn diff(a: Rgb, b: Rgb) -> i32 { |
293 | square(i32::from(a[0]) - i32::from(b[0])) |
294 | + square(i32::from(a[1]) - i32::from(b[1])) |
295 | + square(i32::from(a[2]) - i32::from(b[2])) |
296 | } |
297 | |
298 | /* |
299 | * Functions for decoding DXT compression |
300 | */ |
301 | |
302 | /// Constructs the DXT5 alpha lookup table from the two alpha entries |
303 | /// if alpha0 > alpha1, constructs a table of [a0, a1, 6 linearly interpolated values from a0 to a1] |
304 | /// if alpha0 <= alpha1, constructs a table of [a0, a1, 4 linearly interpolated values from a0 to a1, 0, 0xFF] |
305 | fn alpha_table_dxt5(alpha0: u8, alpha1: u8) -> [u8; 8] { |
306 | let mut table: [u8; 8] = [alpha0, alpha1, 0, 0, 0, 0, 0, 0xFF]; |
307 | if alpha0 > alpha1 { |
308 | for i: u16 in 2..8u16 { |
309 | table[i as usize] = |
310 | (((8 - i) * u16::from(alpha0) + (i - 1) * u16::from(alpha1)) / 7) as u8; |
311 | } |
312 | } else { |
313 | for i: u16 in 2..6u16 { |
314 | table[i as usize] = |
315 | (((6 - i) * u16::from(alpha0) + (i - 1) * u16::from(alpha1)) / 5) as u8; |
316 | } |
317 | } |
318 | table |
319 | } |
320 | |
321 | /// decodes an 8-byte dxt color block into the RGB channels of a 16xRGB or 16xRGBA block. |
322 | /// source should have a length of 8, dest a length of 48 (RGB) or 64 (RGBA) |
323 | fn decode_dxt_colors(source: &[u8], dest: &mut [u8], is_dxt1: bool) { |
324 | // sanity checks, also enable the compiler to elide all following bound checks |
325 | assert!(source.len() == 8 && (dest.len() == 48 || dest.len() == 64)); |
326 | // calculate pitch to store RGB values in dest (3 for RGB, 4 for RGBA) |
327 | let pitch = dest.len() / 16; |
328 | |
329 | // extract color data |
330 | let color0 = u16::from(source[0]) | (u16::from(source[1]) << 8); |
331 | let color1 = u16::from(source[2]) | (u16::from(source[3]) << 8); |
332 | let color_table = u32::from(source[4]) |
333 | | (u32::from(source[5]) << 8) |
334 | | (u32::from(source[6]) << 16) |
335 | | (u32::from(source[7]) << 24); |
336 | // let color_table = source[4..8].iter().rev().fold(0, |t, &b| (t << 8) | b as u32); |
337 | |
338 | // decode the colors to rgb format |
339 | let mut colors = [[0; 3]; 4]; |
340 | colors[0] = enc565_decode(color0); |
341 | colors[1] = enc565_decode(color1); |
342 | |
343 | // determine color interpolation method |
344 | if color0 > color1 || !is_dxt1 { |
345 | // linearly interpolate the other two color table entries |
346 | for i in 0..3 { |
347 | colors[2][i] = ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8; |
348 | colors[3][i] = ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8; |
349 | } |
350 | } else { |
351 | // linearly interpolate one other entry, keep the other at 0 |
352 | for i in 0..3 { |
353 | colors[2][i] = ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8; |
354 | } |
355 | } |
356 | |
357 | // serialize the result. Every color is determined by looking up |
358 | // two bits in color_table which identify which color to actually pick from the 4 possible colors |
359 | for i in 0..16 { |
360 | dest[i * pitch..i * pitch + 3] |
361 | .copy_from_slice(&colors[(color_table >> (i * 2)) as usize & 3]); |
362 | } |
363 | } |
364 | |
365 | /// Decodes a 16-byte bock of dxt5 data to a 16xRGBA block |
366 | fn decode_dxt5_block(source: &[u8], dest: &mut [u8]) { |
367 | assert!(source.len() == 16 && dest.len() == 64); |
368 | |
369 | // extract alpha index table (stored as little endian 64-bit value) |
370 | let alpha_table: u64 = source[2..8] |
371 | .iter() |
372 | .rev() |
373 | .fold(init:0, |t: u64, &b: u8| (t << 8) | u64::from(b)); |
374 | |
375 | // alhpa level decode |
376 | let alphas: [u8; 8] = alpha_table_dxt5(alpha0:source[0], alpha1:source[1]); |
377 | |
378 | // serialize alpha |
379 | for i: usize in 0..16 { |
380 | dest[i * 4 + 3] = alphas[(alpha_table >> (i * 3)) as usize & 7]; |
381 | } |
382 | |
383 | // handle colors |
384 | decode_dxt_colors(&source[8..16], dest, is_dxt1:false); |
385 | } |
386 | |
387 | /// Decodes a 16-byte bock of dxt3 data to a 16xRGBA block |
388 | fn decode_dxt3_block(source: &[u8], dest: &mut [u8]) { |
389 | assert!(source.len() == 16 && dest.len() == 64); |
390 | |
391 | // extract alpha index table (stored as little endian 64-bit value) |
392 | let alpha_table: u64 = source[0..8] |
393 | .iter() |
394 | .rev() |
395 | .fold(init:0, |t: u64, &b: u8| (t << 8) | u64::from(b)); |
396 | |
397 | // serialize alpha (stored as 4-bit values) |
398 | for i: usize in 0..16 { |
399 | dest[i * 4 + 3] = ((alpha_table >> (i * 4)) as u8 & 0xF) * 0x11; |
400 | } |
401 | |
402 | // handle colors |
403 | decode_dxt_colors(&source[8..16], dest, is_dxt1:false); |
404 | } |
405 | |
406 | /// Decodes a 8-byte bock of dxt5 data to a 16xRGB block |
407 | fn decode_dxt1_block(source: &[u8], dest: &mut [u8]) { |
408 | assert!(source.len() == 8 && dest.len() == 48); |
409 | decode_dxt_colors(source, dest, is_dxt1:true); |
410 | } |
411 | |
412 | /// Decode a row of DXT1 data to four rows of RGB data. |
413 | /// source.len() should be a multiple of 8, otherwise this panics. |
414 | fn decode_dxt1_row(source: &[u8], dest: &mut [u8]) { |
415 | assert!(source.len() % 8 == 0); |
416 | let block_count: usize = source.len() / 8; |
417 | assert!(dest.len() >= block_count * 48); |
418 | |
419 | // contains the 16 decoded pixels per block |
420 | let mut decoded_block: [u8; 48] = [0u8; 48]; |
421 | |
422 | for (x: usize, encoded_block: &[u8]) in source.chunks(chunk_size:8).enumerate() { |
423 | decode_dxt1_block(source:encoded_block, &mut decoded_block); |
424 | |
425 | // copy the values from the decoded block to linewise RGB layout |
426 | for line: usize in 0..4 { |
427 | let offset: usize = (block_count * line + x) * 12; |
428 | dest[offset..offset + 12].copy_from_slice(&decoded_block[line * 12..(line + 1) * 12]); |
429 | } |
430 | } |
431 | } |
432 | |
433 | /// Decode a row of DXT3 data to four rows of RGBA data. |
434 | /// source.len() should be a multiple of 16, otherwise this panics. |
435 | fn decode_dxt3_row(source: &[u8], dest: &mut [u8]) { |
436 | assert!(source.len() % 16 == 0); |
437 | let block_count: usize = source.len() / 16; |
438 | assert!(dest.len() >= block_count * 64); |
439 | |
440 | // contains the 16 decoded pixels per block |
441 | let mut decoded_block: [u8; 64] = [0u8; 64]; |
442 | |
443 | for (x: usize, encoded_block: &[u8]) in source.chunks(chunk_size:16).enumerate() { |
444 | decode_dxt3_block(source:encoded_block, &mut decoded_block); |
445 | |
446 | // copy the values from the decoded block to linewise RGB layout |
447 | for line: usize in 0..4 { |
448 | let offset: usize = (block_count * line + x) * 16; |
449 | dest[offset..offset + 16].copy_from_slice(&decoded_block[line * 16..(line + 1) * 16]); |
450 | } |
451 | } |
452 | } |
453 | |
454 | /// Decode a row of DXT5 data to four rows of RGBA data. |
455 | /// source.len() should be a multiple of 16, otherwise this panics. |
456 | fn decode_dxt5_row(source: &[u8], dest: &mut [u8]) { |
457 | assert!(source.len() % 16 == 0); |
458 | let block_count: usize = source.len() / 16; |
459 | assert!(dest.len() >= block_count * 64); |
460 | |
461 | // contains the 16 decoded pixels per block |
462 | let mut decoded_block: [u8; 64] = [0u8; 64]; |
463 | |
464 | for (x: usize, encoded_block: &[u8]) in source.chunks(chunk_size:16).enumerate() { |
465 | decode_dxt5_block(source:encoded_block, &mut decoded_block); |
466 | |
467 | // copy the values from the decoded block to linewise RGB layout |
468 | for line: usize in 0..4 { |
469 | let offset: usize = (block_count * line + x) * 16; |
470 | dest[offset..offset + 16].copy_from_slice(&decoded_block[line * 16..(line + 1) * 16]); |
471 | } |
472 | } |
473 | } |
474 | |
475 | /* |
476 | * Functions for encoding DXT compression |
477 | */ |
478 | |
479 | /// Tries to perform the color encoding part of dxt compression |
480 | /// the approach taken is simple, it picks unique combinations |
481 | /// of the colors present in the block, and attempts to encode the |
482 | /// block with each, picking the encoding that yields the least |
483 | /// squared error out of all of them. |
484 | /// |
485 | /// This could probably be faster but is already reasonably fast |
486 | /// and a good reference impl to optimize others against. |
487 | /// |
488 | /// Another way to perform this analysis would be to perform a |
489 | /// singular value decomposition of the different colors, and |
490 | /// then pick 2 points on this line as the base colors. But |
491 | /// this is still rather unwieldy math and has issues |
492 | /// with the 3-linear-colors-and-0 case, it's also worse |
493 | /// at conserving the original colors. |
494 | /// |
495 | /// source: should be RGBAx16 or RGBx16 bytes of data, |
496 | /// dest 8 bytes of resulting encoded color data |
497 | fn encode_dxt_colors(source: &[u8], dest: &mut [u8], is_dxt1: bool) { |
498 | // sanity checks and determine stride when parsing the source data |
499 | assert!((source.len() == 64 || source.len() == 48) && dest.len() == 8); |
500 | let stride = source.len() / 16; |
501 | |
502 | // reference colors array |
503 | let mut colors = [[0u8; 3]; 4]; |
504 | |
505 | // Put the colors we're going to be processing in an array with pure RGB layout |
506 | // note: we reverse the pixel order here. The reason for this is found in the inner quantization loop. |
507 | let mut targets = [[0u8; 3]; 16]; |
508 | for (s, d) in source.chunks(stride).rev().zip(&mut targets) { |
509 | *d = [s[0], s[1], s[2]]; |
510 | } |
511 | |
512 | // roundtrip all colors through the r5g6b5 encoding |
513 | for rgb in &mut targets { |
514 | *rgb = enc565_decode(enc565_encode(*rgb)); |
515 | } |
516 | |
517 | // and deduplicate the set of colors to choose from as the algorithm is O(N^2) in this |
518 | let mut colorspace_ = [[0u8; 3]; 16]; |
519 | let mut colorspace_len = 0; |
520 | for color in &targets { |
521 | if !colorspace_[..colorspace_len].contains(color) { |
522 | colorspace_[colorspace_len] = *color; |
523 | colorspace_len += 1; |
524 | } |
525 | } |
526 | let mut colorspace = &colorspace_[..colorspace_len]; |
527 | |
528 | // in case of slight gradients it can happen that there's only one entry left in the color table. |
529 | // as the resulting banding can be quite bad if we would just left the block at the closest |
530 | // encodable color, we have a special path here that tries to emulate the wanted color |
531 | // using the linear interpolation between gradients |
532 | if colorspace.len() == 1 { |
533 | // the base color we got from colorspace reduction |
534 | let ref_rgb = colorspace[0]; |
535 | // the unreduced color in this block that's the furthest away from the actual block |
536 | let mut rgb = targets |
537 | .iter() |
538 | .cloned() |
539 | .max_by_key(|rgb| diff(*rgb, ref_rgb)) |
540 | .unwrap(); |
541 | // amplify differences by 2.5, which should push them to the next quantized value |
542 | // if possible without overshoot |
543 | for i in 0..3 { |
544 | rgb[i] = |
545 | ((i16::from(rgb[i]) - i16::from(ref_rgb[i])) * 5 / 2 + i16::from(ref_rgb[i])) as u8; |
546 | } |
547 | |
548 | // roundtrip it through quantization |
549 | let encoded = enc565_encode(rgb); |
550 | let rgb = enc565_decode(encoded); |
551 | |
552 | // in case this didn't land us a different color the best way to represent this field is |
553 | // as a single color block |
554 | if rgb == ref_rgb { |
555 | dest[0] = encoded as u8; |
556 | dest[1] = (encoded >> 8) as u8; |
557 | |
558 | for d in dest.iter_mut().take(8).skip(2) { |
559 | *d = 0; |
560 | } |
561 | return; |
562 | } |
563 | |
564 | // we did find a separate value: add it to the options so after one round of quantization |
565 | // we're done |
566 | colorspace_[1] = rgb; |
567 | colorspace = &colorspace_[..2]; |
568 | } |
569 | |
570 | // block quantization loop: we basically just try every possible combination, returning |
571 | // the combination with the least squared error |
572 | // stores the best candidate colors |
573 | let mut chosen_colors = [[0; 3]; 4]; |
574 | // did this index table use the [0,0,0] variant |
575 | let mut chosen_use_0 = false; |
576 | // error calculated for the last entry |
577 | let mut chosen_error = 0xFFFF_FFFFu32; |
578 | |
579 | // loop through unique permutations of the colorspace, where c1 != c2 |
580 | 'search: for (i, &c1) in colorspace.iter().enumerate() { |
581 | colors[0] = c1; |
582 | |
583 | for &c2 in &colorspace[0..i] { |
584 | colors[1] = c2; |
585 | |
586 | if is_dxt1 { |
587 | // what's inside here is ran at most 120 times. |
588 | for use_0 in 0..2 { |
589 | // and 240 times here. |
590 | |
591 | if use_0 != 0 { |
592 | // interpolate one color, set the other to 0 |
593 | for i in 0..3 { |
594 | colors[2][i] = |
595 | ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8; |
596 | } |
597 | colors[3] = [0, 0, 0]; |
598 | } else { |
599 | // interpolate to get 2 more colors |
600 | for i in 0..3 { |
601 | colors[2][i] = |
602 | ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) |
603 | as u8; |
604 | colors[3][i] = |
605 | ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) |
606 | as u8; |
607 | } |
608 | } |
609 | |
610 | // calculate the total error if we were to quantize the block with these color combinations |
611 | // both these loops have statically known iteration counts and are well vectorizable |
612 | // note that the inside of this can be run about 15360 times worst case, i.e. 960 times per |
613 | // pixel. |
614 | let total_error = targets |
615 | .iter() |
616 | .map(|t| colors.iter().map(|c| diff(*c, *t) as u32).min().unwrap()) |
617 | .sum(); |
618 | |
619 | // update the match if we found a better one |
620 | if total_error < chosen_error { |
621 | chosen_colors = colors; |
622 | chosen_use_0 = use_0 != 0; |
623 | chosen_error = total_error; |
624 | |
625 | // if we've got a perfect or at most 1 LSB off match, we're done |
626 | if total_error < 4 { |
627 | break 'search; |
628 | } |
629 | } |
630 | } |
631 | } else { |
632 | // what's inside here is ran at most 120 times. |
633 | |
634 | // interpolate to get 2 more colors |
635 | for i in 0..3 { |
636 | colors[2][i] = |
637 | ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8; |
638 | colors[3][i] = |
639 | ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8; |
640 | } |
641 | |
642 | // calculate the total error if we were to quantize the block with these color combinations |
643 | // both these loops have statically known iteration counts and are well vectorizable |
644 | // note that the inside of this can be run about 15360 times worst case, i.e. 960 times per |
645 | // pixel. |
646 | let total_error = targets |
647 | .iter() |
648 | .map(|t| colors.iter().map(|c| diff(*c, *t) as u32).min().unwrap()) |
649 | .sum(); |
650 | |
651 | // update the match if we found a better one |
652 | if total_error < chosen_error { |
653 | chosen_colors = colors; |
654 | chosen_error = total_error; |
655 | |
656 | // if we've got a perfect or at most 1 LSB off match, we're done |
657 | if total_error < 4 { |
658 | break 'search; |
659 | } |
660 | } |
661 | } |
662 | } |
663 | } |
664 | |
665 | // calculate the final indices |
666 | // note that targets is already in reverse pixel order, to make the index computation easy. |
667 | let mut chosen_indices = 0u32; |
668 | for t in &targets { |
669 | let (idx, _) = chosen_colors |
670 | .iter() |
671 | .enumerate() |
672 | .min_by_key(|&(_, c)| diff(*c, *t)) |
673 | .unwrap(); |
674 | chosen_indices = (chosen_indices << 2) | idx as u32; |
675 | } |
676 | |
677 | // encode the colors |
678 | let mut color0 = enc565_encode(chosen_colors[0]); |
679 | let mut color1 = enc565_encode(chosen_colors[1]); |
680 | |
681 | // determine encoding. Note that color0 == color1 is impossible at this point |
682 | if is_dxt1 { |
683 | if color0 > color1 { |
684 | if chosen_use_0 { |
685 | swap(&mut color0, &mut color1); |
686 | // Indexes are packed 2 bits wide, swap index 0/1 but preserve 2/3. |
687 | let filter = (chosen_indices & 0xAAAA_AAAA) >> 1; |
688 | chosen_indices ^= filter ^ 0x5555_5555; |
689 | } |
690 | } else if !chosen_use_0 { |
691 | swap(&mut color0, &mut color1); |
692 | // Indexes are packed 2 bits wide, swap index 0/1 and 2/3. |
693 | chosen_indices ^= 0x5555_5555; |
694 | } |
695 | } |
696 | |
697 | // encode everything. |
698 | dest[0] = color0 as u8; |
699 | dest[1] = (color0 >> 8) as u8; |
700 | dest[2] = color1 as u8; |
701 | dest[3] = (color1 >> 8) as u8; |
702 | for i in 0..4 { |
703 | dest[i + 4] = (chosen_indices >> (i * 8)) as u8; |
704 | } |
705 | } |
706 | |
707 | /// Encodes a buffer of 16 alpha bytes into a dxt5 alpha index table, |
708 | /// where the alpha table they are indexed against is created by |
709 | /// calling alpha_table_dxt5(alpha0, alpha1) |
710 | /// returns the resulting error and alpha table |
711 | fn encode_dxt5_alpha(alpha0: u8, alpha1: u8, alphas: &[u8; 16]) -> (i32, u64) { |
712 | // create a table for the given alpha ranges |
713 | let table: [u8; 8] = alpha_table_dxt5(alpha0, alpha1); |
714 | let mut indices: u64 = 0u64; |
715 | let mut total_error: i32 = 0i32; |
716 | |
717 | // least error brute force search |
718 | for (i: usize, &a: u8) in alphas.iter().enumerate() { |
719 | let (index: usize, error: i32) = tableOption<(usize, i32)> |
720 | .iter() |
721 | .enumerate() |
722 | .map(|(i: usize, &e: u8)| (i, square(i32::from(e) - i32::from(a)))) |
723 | .min_by_key(|&(_, e: i32)| e) |
724 | .unwrap(); |
725 | total_error += error; |
726 | indices |= (index as u64) << (i * 3); |
727 | } |
728 | |
729 | (total_error, indices) |
730 | } |
731 | |
732 | /// Encodes a RGBAx16 sequence of bytes to a 16 bytes DXT5 block |
733 | fn encode_dxt5_block(source: &[u8], dest: &mut [u8]) { |
734 | assert!(source.len() == 64 && dest.len() == 16); |
735 | |
736 | // perform dxt color encoding |
737 | encode_dxt_colors(source, &mut dest[8..16], false); |
738 | |
739 | // copy out the alpha bytes |
740 | let mut alphas = [0; 16]; |
741 | for i in 0..16 { |
742 | alphas[i] = source[i * 4 + 3]; |
743 | } |
744 | |
745 | // try both alpha compression methods, see which has the least error. |
746 | let alpha07 = alphas.iter().cloned().min().unwrap(); |
747 | let alpha17 = alphas.iter().cloned().max().unwrap(); |
748 | let (error7, indices7) = encode_dxt5_alpha(alpha07, alpha17, &alphas); |
749 | |
750 | // if all alphas are 0 or 255 it doesn't particularly matter what we do here. |
751 | let alpha05 = alphas |
752 | .iter() |
753 | .cloned() |
754 | .filter(|&i| i != 255) |
755 | .max() |
756 | .unwrap_or(255); |
757 | let alpha15 = alphas |
758 | .iter() |
759 | .cloned() |
760 | .filter(|&i| i != 0) |
761 | .min() |
762 | .unwrap_or(0); |
763 | let (error5, indices5) = encode_dxt5_alpha(alpha05, alpha15, &alphas); |
764 | |
765 | // pick the best one, encode the min/max values |
766 | let mut alpha_table = if error5 < error7 { |
767 | dest[0] = alpha05; |
768 | dest[1] = alpha15; |
769 | indices5 |
770 | } else { |
771 | dest[0] = alpha07; |
772 | dest[1] = alpha17; |
773 | indices7 |
774 | }; |
775 | |
776 | // encode the alphas |
777 | for byte in dest[2..8].iter_mut() { |
778 | *byte = alpha_table as u8; |
779 | alpha_table >>= 8; |
780 | } |
781 | } |
782 | |
783 | /// Encodes a RGBAx16 sequence of bytes into a 16 bytes DXT3 block |
784 | fn encode_dxt3_block(source: &[u8], dest: &mut [u8]) { |
785 | assert!(source.len() == 64 && dest.len() == 16); |
786 | |
787 | // perform dxt color encoding |
788 | encode_dxt_colors(source, &mut dest[8..16], is_dxt1:false); |
789 | |
790 | // DXT3 alpha compression is very simple, just round towards the nearest value |
791 | |
792 | // index the alpha values into the 64bit alpha table |
793 | let mut alpha_table: u64 = 0u64; |
794 | for i: usize in 0..16 { |
795 | let alpha: u64 = u64::from(source[i * 4 + 3]); |
796 | let alpha: u64 = (alpha + 0x8) / 0x11; |
797 | alpha_table |= alpha << (i * 4); |
798 | } |
799 | |
800 | // encode the alpha values |
801 | for byte: &mut u8 in &mut dest[0..8] { |
802 | *byte = alpha_table as u8; |
803 | alpha_table >>= 8; |
804 | } |
805 | } |
806 | |
807 | /// Encodes a RGBx16 sequence of bytes into a 8 bytes DXT1 block |
808 | fn encode_dxt1_block(source: &[u8], dest: &mut [u8]) { |
809 | assert!(source.len() == 48 && dest.len() == 8); |
810 | |
811 | // perform dxt color encoding |
812 | encode_dxt_colors(source, dest, is_dxt1:true); |
813 | } |
814 | |
815 | /// Decode a row of DXT1 data to four rows of RGBA data. |
816 | /// source.len() should be a multiple of 8, otherwise this panics. |
817 | fn encode_dxt1_row(source: &[u8]) -> Vec<u8> { |
818 | assert!(source.len() % 48 == 0); |
819 | let block_count: usize = source.len() / 48; |
820 | |
821 | let mut dest: Vec = vec![0u8; block_count * 8]; |
822 | // contains the 16 decoded pixels per block |
823 | let mut decoded_block: [u8; 48] = [0u8; 48]; |
824 | |
825 | for (x: usize, encoded_block: &mut [u8]) in dest.chunks_mut(chunk_size:8).enumerate() { |
826 | // copy the values from the decoded block to linewise RGB layout |
827 | for line: usize in 0..4 { |
828 | let offset: usize = (block_count * line + x) * 12; |
829 | decoded_block[line * 12..(line + 1) * 12].copy_from_slice(&source[offset..offset + 12]); |
830 | } |
831 | |
832 | encode_dxt1_block(&decoded_block, dest:encoded_block); |
833 | } |
834 | dest |
835 | } |
836 | |
837 | /// Decode a row of DXT3 data to four rows of RGBA data. |
838 | /// source.len() should be a multiple of 16, otherwise this panics. |
839 | fn encode_dxt3_row(source: &[u8]) -> Vec<u8> { |
840 | assert!(source.len() % 64 == 0); |
841 | let block_count: usize = source.len() / 64; |
842 | |
843 | let mut dest: Vec = vec![0u8; block_count * 16]; |
844 | // contains the 16 decoded pixels per block |
845 | let mut decoded_block: [u8; 64] = [0u8; 64]; |
846 | |
847 | for (x: usize, encoded_block: &mut [u8]) in dest.chunks_mut(chunk_size:16).enumerate() { |
848 | // copy the values from the decoded block to linewise RGB layout |
849 | for line: usize in 0..4 { |
850 | let offset: usize = (block_count * line + x) * 16; |
851 | decoded_block[line * 16..(line + 1) * 16].copy_from_slice(&source[offset..offset + 16]); |
852 | } |
853 | |
854 | encode_dxt3_block(&decoded_block, dest:encoded_block); |
855 | } |
856 | dest |
857 | } |
858 | |
859 | /// Decode a row of DXT5 data to four rows of RGBA data. |
860 | /// source.len() should be a multiple of 16, otherwise this panics. |
861 | fn encode_dxt5_row(source: &[u8]) -> Vec<u8> { |
862 | assert!(source.len() % 64 == 0); |
863 | let block_count: usize = source.len() / 64; |
864 | |
865 | let mut dest: Vec = vec![0u8; block_count * 16]; |
866 | // contains the 16 decoded pixels per block |
867 | let mut decoded_block: [u8; 64] = [0u8; 64]; |
868 | |
869 | for (x: usize, encoded_block: &mut [u8]) in dest.chunks_mut(chunk_size:16).enumerate() { |
870 | // copy the values from the decoded block to linewise RGB layout |
871 | for line: usize in 0..4 { |
872 | let offset: usize = (block_count * line + x) * 16; |
873 | decoded_block[line * 16..(line + 1) * 16].copy_from_slice(&source[offset..offset + 16]); |
874 | } |
875 | |
876 | encode_dxt5_block(&decoded_block, dest:encoded_block); |
877 | } |
878 | dest |
879 | } |
880 | |