1 | use crate::{encoder::compression::*, tags::CompressionMethod}; |
2 | use std::io::{BufWriter, Error, ErrorKind, Write}; |
3 | |
4 | /// Compressor that uses the Packbits[^note] algorithm to compress bytes. |
5 | /// |
6 | /// [^note]: PackBits is often ineffective on continuous tone images, |
7 | /// including many grayscale images. In such cases, it is better |
8 | /// to leave the image uncompressed. |
9 | #[derive (Debug, Clone, Copy, PartialEq, Eq, Default)] |
10 | pub struct Packbits; |
11 | |
12 | impl Compression for Packbits { |
13 | const COMPRESSION_METHOD: CompressionMethod = CompressionMethod::PackBits; |
14 | |
15 | fn get_algorithm(&self) -> Compressor { |
16 | Compressor::Packbits(*self) |
17 | } |
18 | } |
19 | |
20 | impl CompressionAlgorithm for Packbits { |
21 | fn write_to<W: Write>(&mut self, writer: &mut W, bytes: &[u8]) -> Result<u64, io::Error> { |
22 | // Inspired by https://github.com/skirridsystems/packbits |
23 | |
24 | const MIN_REPT: u8 = 3; // Minimum run to compress between differ blocks |
25 | const MAX_BYTES: u8 = 128; // Maximum number of bytes that can be encoded in a header byte |
26 | |
27 | // Encoding for header byte based on number of bytes represented. |
28 | fn encode_diff(n: u8) -> u8 { |
29 | n - 1 |
30 | } |
31 | fn encode_rept(n: u8) -> u8 { |
32 | let var = 256 - (n - 1) as u16; |
33 | var as u8 |
34 | } |
35 | |
36 | fn write_u8<W: Write>(writer: &mut W, byte: u8) -> Result<u64, Error> { |
37 | writer.write(&[byte]).map(|byte_count| byte_count as u64) |
38 | } |
39 | |
40 | let mut bufwriter = BufWriter::new(writer); |
41 | let mut bytes_written = 0u64; // The number of bytes written into the writer |
42 | let mut offset: Option<u64> = None; // The index of the first byte written into the writer |
43 | |
44 | let mut src_index: usize = 0; // Index of the current byte |
45 | let mut src_count = bytes.len(); //The number of bytes remaining to be compressed |
46 | |
47 | let mut in_run = false; // Indication whether counting of similar bytes is performed |
48 | let mut run_index = 0u8; // Distance into pending bytes that a run starts |
49 | |
50 | let mut bytes_pending = 0u8; // Bytes looked at but not yet output |
51 | let mut pending_index = 0usize; // Index of the first pending byte |
52 | |
53 | let mut curr_byte: u8; // Byte currently being considered |
54 | let mut last_byte: u8; // Previous byte |
55 | |
56 | // Need at least one byte to compress |
57 | if src_count == 0 { |
58 | return Err(Error::new(ErrorKind::WriteZero, "write zero" )); |
59 | } |
60 | |
61 | // Prime compressor with first character. |
62 | last_byte = bytes[src_index]; |
63 | src_index += 1; |
64 | bytes_pending += 1; |
65 | |
66 | while src_count - 1 != 0 { |
67 | src_count -= 1; |
68 | curr_byte = bytes[src_index]; |
69 | src_index += 1; |
70 | bytes_pending += 1; |
71 | |
72 | if in_run { |
73 | if (curr_byte != last_byte) || (bytes_pending > MAX_BYTES) { |
74 | offset.get_or_insert(write_u8(&mut bufwriter, encode_rept(bytes_pending - 1))?); |
75 | write_u8(&mut bufwriter, last_byte)?; |
76 | bytes_written += 2; |
77 | |
78 | bytes_pending = 1; |
79 | pending_index = src_index - 1; |
80 | run_index = 0; |
81 | in_run = false; |
82 | } |
83 | } else if bytes_pending > MAX_BYTES { |
84 | // We have as much differing data as we can output in one chunk. |
85 | // Output MAX_BYTES leaving one byte. |
86 | offset.get_or_insert(write_u8(&mut bufwriter, encode_diff(MAX_BYTES))?); |
87 | bufwriter.write_all(&bytes[pending_index..pending_index + MAX_BYTES as usize])?; |
88 | bytes_written += 1 + MAX_BYTES as u64; |
89 | |
90 | pending_index += MAX_BYTES as usize; |
91 | bytes_pending -= MAX_BYTES; |
92 | run_index = bytes_pending - 1; // A run could start here |
93 | } else if curr_byte == last_byte { |
94 | if (bytes_pending - run_index >= MIN_REPT) || (run_index == 0) { |
95 | // This is a worthwhile run |
96 | if run_index != 0 { |
97 | // Flush differing data out of input buffer |
98 | offset.get_or_insert(write_u8(&mut bufwriter, encode_diff(run_index))?); |
99 | bufwriter |
100 | .write_all(&bytes[pending_index..pending_index + run_index as usize])?; |
101 | bytes_written += 1 + run_index as u64; |
102 | } |
103 | bytes_pending -= run_index; // Length of run |
104 | in_run = true; |
105 | } |
106 | } else { |
107 | run_index = bytes_pending - 1; // A run could start here |
108 | } |
109 | last_byte = curr_byte; |
110 | } |
111 | |
112 | // Output the remainder |
113 | if in_run { |
114 | bytes_written += 2; |
115 | offset.get_or_insert(write_u8(&mut bufwriter, encode_rept(bytes_pending))?); |
116 | write_u8(&mut bufwriter, last_byte)?; |
117 | } else { |
118 | bytes_written += 1 + bytes_pending as u64; |
119 | offset.get_or_insert(write_u8(&mut bufwriter, encode_diff(bytes_pending))?); |
120 | bufwriter.write_all(&bytes[pending_index..pending_index + bytes_pending as usize])?; |
121 | } |
122 | |
123 | bufwriter.flush()?; |
124 | Ok(bytes_written) |
125 | } |
126 | } |
127 | |
128 | #[cfg (test)] |
129 | mod tests { |
130 | use super::*; |
131 | use crate::encoder::compression::tests::TEST_DATA; |
132 | use std::io::Cursor; |
133 | |
134 | #[test ] |
135 | fn test_packbits_single_byte() { |
136 | // compress single byte |
137 | const UNCOMPRESSED_DATA: [u8; 1] = [0x3F]; |
138 | const EXPECTED_COMPRESSED_DATA: [u8; 2] = [0x00, 0x3F]; |
139 | |
140 | let mut compressed_data = Vec::<u8>::new(); |
141 | let mut writer = Cursor::new(&mut compressed_data); |
142 | Packbits.write_to(&mut writer, &UNCOMPRESSED_DATA).unwrap(); |
143 | assert_eq!(compressed_data, EXPECTED_COMPRESSED_DATA); |
144 | } |
145 | |
146 | #[test ] |
147 | fn test_packbits_rept() { |
148 | // compress buffer with repetitive sequence |
149 | const UNCOMPRESSED_DATA: &[u8] = |
150 | b"This strrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrring hangs." ; |
151 | const EXPECTED_COMPRESSED_DATA: &[u8] = b" \x06This st \xD1r \x09ing hangs." ; |
152 | |
153 | let mut compressed_data = Vec::<u8>::new(); |
154 | let mut writer = Cursor::new(&mut compressed_data); |
155 | Packbits.write_to(&mut writer, UNCOMPRESSED_DATA).unwrap(); |
156 | assert_eq!(compressed_data, EXPECTED_COMPRESSED_DATA); |
157 | } |
158 | |
159 | #[test ] |
160 | fn test_packbits_large_rept_nonrept() { |
161 | // compress buffer with large repetitive and non-repetitive sequence |
162 | let mut data = b"This st" .to_vec(); |
163 | for _i in 0..158 { |
164 | data.push(b'r' ); |
165 | } |
166 | data.extend_from_slice(b"ing hangs." ); |
167 | for i in 0..158 { |
168 | data.push(i); |
169 | } |
170 | |
171 | const EXPECTED_COMPRESSED_DATA: [u8; 182] = [ |
172 | 0x06, 0x54, 0x68, 0x69, 0x73, 0x20, 0x73, 0x74, 0x81, 0x72, 0xE3, 0x72, 0x7F, 0x69, |
173 | 0x6E, 0x67, 0x20, 0x68, 0x61, 0x6E, 0x67, 0x73, 0x2E, 0x00, 0x01, 0x02, 0x03, 0x04, |
174 | 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, |
175 | 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, |
176 | 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, |
177 | 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, |
178 | 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, |
179 | 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, |
180 | 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, |
181 | 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, |
182 | 0x75, 0x27, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, |
183 | 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, |
184 | 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, |
185 | ]; |
186 | |
187 | let mut compressed_data = Vec::<u8>::new(); |
188 | let mut writer = Cursor::new(&mut compressed_data); |
189 | Packbits.write_to(&mut writer, data.as_slice()).unwrap(); |
190 | assert_eq!(compressed_data, EXPECTED_COMPRESSED_DATA); |
191 | } |
192 | |
193 | #[test ] |
194 | fn test_packbits() { |
195 | // compress teststring |
196 | const EXPECTED_COMPRESSED_DATA: &[u8] = |
197 | b" \x3CThis is a string for checking various compression algorithms." ; |
198 | |
199 | let mut compressed_data = Vec::<u8>::new(); |
200 | let mut writer = Cursor::new(&mut compressed_data); |
201 | Packbits.write_to(&mut writer, TEST_DATA).unwrap(); |
202 | assert_eq!(compressed_data, EXPECTED_COMPRESSED_DATA); |
203 | } |
204 | } |
205 | |