1//! A module for all encoding needs.
2use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult};
3use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE};
4
5use crate::alloc::{boxed::Box, vec::Vec};
6#[cfg(feature = "std")]
7use crate::error::StreamResult;
8#[cfg(feature = "std")]
9use std::io::{self, BufRead, Write};
10
11/// The state for encoding data with an LZW algorithm.
12///
13/// The same structure can be utilized with streams as well as your own buffers and driver logic.
14/// It may even be possible to mix them if you are sufficiently careful not to lose any written
15/// data in the process.
16///
17/// This is a sans-IO implementation, meaning that it only contains the state of the encoder and
18/// the caller will provide buffers for input and output data when calling the basic
19/// [`encode_bytes`] method. Nevertheless, a number of _adapters_ are provided in the `into_*`
20/// methods for enoding with a particular style of common IO.
21///
22/// * [`encode`] for encoding once without any IO-loop.
23/// * [`into_async`] for encoding with the `futures` traits for asynchronous IO.
24/// * [`into_stream`] for encoding with the standard `io` traits.
25/// * [`into_vec`] for in-memory encoding.
26///
27/// [`encode_bytes`]: #method.encode_bytes
28/// [`encode`]: #method.encode
29/// [`into_async`]: #method.into_async
30/// [`into_stream`]: #method.into_stream
31/// [`into_vec`]: #method.into_vec
32pub struct Encoder {
33 /// Internally dispatch via a dynamic trait object. This did not have any significant
34 /// performance impact as we batch data internally and this pointer does not change after
35 /// creation!
36 state: Box<dyn Stateful + Send + 'static>,
37}
38
39/// A encoding stream sink.
40///
41/// See [`Encoder::into_stream`] on how to create this type.
42///
43/// [`Encoder::into_stream`]: struct.Encoder.html#method.into_stream
44#[cfg_attr(
45 not(feature = "std"),
46 deprecated = "This type is only useful with the `std` feature."
47)]
48#[cfg_attr(not(feature = "std"), allow(dead_code))]
49pub struct IntoStream<'d, W> {
50 encoder: &'d mut Encoder,
51 writer: W,
52 buffer: Option<StreamBuf<'d>>,
53 default_size: usize,
54}
55
56/// An async decoding sink.
57///
58/// See [`Encoder::into_async`] on how to create this type.
59///
60/// [`Encoder::into_async`]: struct.Encoder.html#method.into_async
61#[cfg(feature = "async")]
62pub struct IntoAsync<'d, W> {
63 encoder: &'d mut Encoder,
64 writer: W,
65 buffer: Option<StreamBuf<'d>>,
66 default_size: usize,
67}
68
69/// A encoding sink into a vector.
70///
71/// See [`Encoder::into_vec`] on how to create this type.
72///
73/// [`Encoder::into_vec`]: struct.Encoder.html#method.into_vec
74pub struct IntoVec<'d> {
75 encoder: &'d mut Encoder,
76 vector: &'d mut Vec<u8>,
77}
78
79trait Stateful {
80 fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult;
81 fn mark_ended(&mut self) -> bool;
82 /// Reset the state tracking if end code has been written.
83 fn restart(&mut self);
84 /// Reset the encoder to the beginning, dropping all buffers etc.
85 fn reset(&mut self);
86}
87
88struct EncodeState<B: Buffer> {
89 /// The configured minimal code size.
90 min_size: u8,
91 /// The current encoding symbol tree.
92 tree: Tree,
93 /// If we have pushed the end code.
94 has_ended: bool,
95 /// If tiff then bumps are a single code sooner.
96 is_tiff: bool,
97 /// The code corresponding to the currently read characters.
98 current_code: Code,
99 /// The clear code for resetting the dictionary.
100 clear_code: Code,
101 /// The bit buffer for encoding.
102 buffer: B,
103}
104
105struct MsbBuffer {
106 /// The current code length.
107 code_size: u8,
108 /// The buffer bits.
109 buffer: u64,
110 /// The number of valid buffer bits.
111 bits_in_buffer: u8,
112}
113
114struct LsbBuffer {
115 /// The current code length.
116 code_size: u8,
117 /// The buffer bits.
118 buffer: u64,
119 /// The number of valid buffer bits.
120 bits_in_buffer: u8,
121}
122
123trait Buffer {
124 fn new(size: u8) -> Self;
125 /// Reset the code size in the buffer.
126 fn reset(&mut self, min_size: u8);
127 /// Apply effects of a Clear Code.
128 fn clear(&mut self, min_size: u8);
129 /// Insert a code into the buffer.
130 fn buffer_code(&mut self, code: Code);
131 /// Push bytes if the buffer space is getting small.
132 fn push_out(&mut self, out: &mut &mut [u8]) -> bool;
133 /// Flush all full bytes, returning if at least one more byte remains.
134 fn flush_out(&mut self, out: &mut &mut [u8]) -> bool;
135 /// Pad the buffer to a full byte.
136 fn buffer_pad(&mut self);
137 /// Increase the maximum code size.
138 fn bump_code_size(&mut self);
139 /// Return the maximum code with the current code size.
140 fn max_code(&self) -> Code;
141 /// Return the current code size in bits.
142 fn code_size(&self) -> u8;
143}
144
145/// One tree node for at most each code.
146/// To avoid using too much memory we keep nodes with few successors in optimized form. This form
147/// doesn't offer lookup by indexing but instead does a linear search.
148#[derive(Default)]
149struct Tree {
150 simples: Vec<Simple>,
151 complex: Vec<Full>,
152 keys: Vec<CompressedKey>,
153}
154
155#[derive(Clone, Copy)]
156enum FullKey {
157 NoSuccessor,
158 Simple(u16),
159 Full(u16),
160}
161
162#[derive(Clone, Copy)]
163struct CompressedKey(u16);
164
165const SHORT: usize = 16;
166
167#[derive(Clone, Copy)]
168struct Simple {
169 codes: [Code; SHORT],
170 chars: [u8; SHORT],
171 count: u8,
172}
173
174#[derive(Clone, Copy)]
175struct Full {
176 char_continuation: [Code; 256],
177}
178
179impl Encoder {
180 /// Create a new encoder with the specified bit order and symbol size.
181 ///
182 /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
183 /// original specification. In particular you will need to specify an `Lsb` bit oder to encode
184 /// the data portion of a compressed `gif` image.
185 ///
186 /// # Panics
187 ///
188 /// The `size` needs to be in the interval `2..=12`.
189 pub fn new(order: BitOrder, size: u8) -> Self {
190 type Boxed = Box<dyn Stateful + Send + 'static>;
191 super::assert_encode_size(size);
192 let state = match order {
193 BitOrder::Lsb => Box::new(EncodeState::<LsbBuffer>::new(size)) as Boxed,
194 BitOrder::Msb => Box::new(EncodeState::<MsbBuffer>::new(size)) as Boxed,
195 };
196
197 Encoder { state }
198 }
199
200 /// Create a TIFF compatible encoder with the specified bit order and symbol size.
201 ///
202 /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
203 /// TIFF specification, which is a misinterpretation of the original algorithm for increasing
204 /// the code size. It switches one symbol sooner.
205 ///
206 /// # Panics
207 ///
208 /// The `size` needs to be in the interval `2..=12`.
209 pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
210 type Boxed = Box<dyn Stateful + Send + 'static>;
211 super::assert_encode_size(size);
212 let state = match order {
213 BitOrder::Lsb => {
214 let mut state = Box::new(EncodeState::<LsbBuffer>::new(size));
215 state.is_tiff = true;
216 state as Boxed
217 }
218 BitOrder::Msb => {
219 let mut state = Box::new(EncodeState::<MsbBuffer>::new(size));
220 state.is_tiff = true;
221 state as Boxed
222 }
223 };
224
225 Encoder { state }
226 }
227
228 /// Encode some bytes from `inp` into `out`.
229 ///
230 /// See [`into_stream`] for high-level functions (this interface is only available with the
231 /// `std` feature) and [`finish`] for marking the input data as complete.
232 ///
233 /// When some input byte is invalid, i.e. is not smaller than `1 << size`, then that byte and
234 /// all following ones will _not_ be consumed and the `status` of the result will signal an
235 /// error. The result will also indicate that all bytes up to but not including the offending
236 /// byte have been consumed. You may try again with a fixed byte.
237 ///
238 /// [`into_stream`]: #method.into_stream
239 /// [`finish`]: #method.finish
240 pub fn encode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult {
241 self.state.advance(inp, out)
242 }
243
244 /// Encode a single chunk of data.
245 ///
246 /// This method will add an end marker to the encoded chunk.
247 ///
248 /// This is a convenience wrapper around [`into_vec`]. Use the `into_vec` adapter to customize
249 /// buffer size, to supply an existing vector, to control whether an end marker is required, or
250 /// to preserve partial data in the case of a decoding error.
251 ///
252 /// [`into_vec`]: #into_vec
253 ///
254 /// # Example
255 ///
256 /// ```
257 /// use weezl::{BitOrder, encode::Encoder};
258 ///
259 /// let data = b"Hello, world";
260 /// let encoded = Encoder::new(BitOrder::Msb, 9)
261 /// .encode(data)
262 /// .expect("All bytes valid for code size");
263 /// ```
264 pub fn encode(&mut self, data: &[u8]) -> Result<Vec<u8>, LzwError> {
265 let mut output = Vec::new();
266 self.into_vec(&mut output).encode_all(data).status?;
267 Ok(output)
268 }
269
270 /// Construct a encoder into a writer.
271 #[cfg(feature = "std")]
272 pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> {
273 IntoStream {
274 encoder: self,
275 writer,
276 buffer: None,
277 default_size: STREAM_BUF_SIZE,
278 }
279 }
280
281 /// Construct a encoder into an async writer.
282 #[cfg(feature = "async")]
283 pub fn into_async<W: futures::io::AsyncWrite>(&mut self, writer: W) -> IntoAsync<'_, W> {
284 IntoAsync {
285 encoder: self,
286 writer,
287 buffer: None,
288 default_size: STREAM_BUF_SIZE,
289 }
290 }
291
292 /// Construct an encoder into a vector.
293 ///
294 /// All encoded data is appended and the vector is __not__ cleared.
295 ///
296 /// Compared to `into_stream` this interface allows a high-level access to encoding without
297 /// requires the `std`-feature. Also, it can make full use of the extra buffer control that the
298 /// special target exposes.
299 pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec<u8>) -> IntoVec<'lt> {
300 IntoVec {
301 encoder: self,
302 vector: vec,
303 }
304 }
305
306 /// Mark the encoding as in the process of finishing.
307 ///
308 /// The next following call to `encode_bytes` which is able to consume the complete input will
309 /// also try to emit an end code. It's not recommended, but also not unsound, to use different
310 /// byte slices in different calls from this point forward and thus to 'delay' the actual end
311 /// of the data stream. The behaviour after the end marker has been written is unspecified but
312 /// sound.
313 pub fn finish(&mut self) {
314 self.state.mark_ended();
315 }
316
317 /// Undo marking this data stream as ending.
318 /// FIXME: clarify how this interacts with padding introduced after end code.
319 #[allow(dead_code)]
320 pub(crate) fn restart(&mut self) {
321 self.state.restart()
322 }
323
324 /// Reset all internal state.
325 ///
326 /// This produce an encoder as if just constructed with `new` but taking slightly less work. In
327 /// particular it will not deallocate any internal allocations. It will also avoid some
328 /// duplicate setup work.
329 pub fn reset(&mut self) {
330 self.state.reset()
331 }
332}
333
334#[cfg(feature = "std")]
335impl<'d, W: Write> IntoStream<'d, W> {
336 /// Encode data from a reader.
337 ///
338 /// This will drain the supplied reader. It will not encode an end marker after all data has
339 /// been processed.
340 pub fn encode(&mut self, read: impl BufRead) -> StreamResult {
341 self.encode_part(read, false)
342 }
343
344 /// Encode data from a reader and an end marker.
345 pub fn encode_all(mut self, read: impl BufRead) -> StreamResult {
346 self.encode_part(read, true)
347 }
348
349 /// Set the size of the intermediate encode buffer.
350 ///
351 /// A buffer of this size is allocated to hold one part of the encoded stream when no buffer is
352 /// available and any encoding method is called. No buffer is allocated if `set_buffer` has
353 /// been called. The buffer is reused.
354 ///
355 /// # Panics
356 /// This method panics if `size` is `0`.
357 pub fn set_buffer_size(&mut self, size: usize) {
358 assert_ne!(size, 0, "Attempted to set empty buffer");
359 self.default_size = size;
360 }
361
362 /// Use a particular buffer as an intermediate encode buffer.
363 ///
364 /// Calling this sets or replaces the buffer. When a buffer has been set then it is used
365 /// instead of a dynamically allocating a buffer. Note that the size of the buffer is relevant
366 /// for efficient encoding as there is additional overhead from `write` calls each time the
367 /// buffer has been filled.
368 ///
369 /// # Panics
370 /// This method panics if the `buffer` is empty.
371 pub fn set_buffer(&mut self, buffer: &'d mut [u8]) {
372 assert_ne!(buffer.len(), 0, "Attempted to set empty buffer");
373 self.buffer = Some(StreamBuf::Borrowed(buffer));
374 }
375
376 fn encode_part(&mut self, mut read: impl BufRead, finish: bool) -> StreamResult {
377 let IntoStream {
378 encoder,
379 writer,
380 buffer,
381 default_size,
382 } = self;
383 enum Progress {
384 Ok,
385 Done,
386 }
387
388 let mut bytes_read = 0;
389 let mut bytes_written = 0;
390
391 let read_bytes = &mut bytes_read;
392 let write_bytes = &mut bytes_written;
393
394 let outbuf: &mut [u8] =
395 match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } {
396 StreamBuf::Borrowed(slice) => &mut *slice,
397 StreamBuf::Owned(vec) => &mut *vec,
398 };
399 assert!(!outbuf.is_empty());
400
401 let once = move || {
402 let data = read.fill_buf()?;
403
404 if data.is_empty() {
405 if finish {
406 encoder.finish();
407 } else {
408 return Ok(Progress::Done);
409 }
410 }
411
412 let result = encoder.encode_bytes(data, &mut outbuf[..]);
413 *read_bytes += result.consumed_in;
414 *write_bytes += result.consumed_out;
415 read.consume(result.consumed_in);
416
417 let done = result.status.map_err(|err| {
418 io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err))
419 })?;
420
421 if let LzwStatus::Done = done {
422 writer.write_all(&outbuf[..result.consumed_out])?;
423 return Ok(Progress::Done);
424 }
425
426 if let LzwStatus::NoProgress = done {
427 return Err(io::Error::new(
428 io::ErrorKind::UnexpectedEof,
429 "No more data but no end marker detected",
430 ));
431 }
432
433 writer.write_all(&outbuf[..result.consumed_out])?;
434 Ok(Progress::Ok)
435 };
436
437 let status = core::iter::repeat_with(once)
438 // scan+fuse can be replaced with map_while
439 .scan((), |(), result| match result {
440 Ok(Progress::Ok) => Some(Ok(())),
441 Err(err) => Some(Err(err)),
442 Ok(Progress::Done) => None,
443 })
444 .fuse()
445 .collect();
446
447 StreamResult {
448 bytes_read,
449 bytes_written,
450 status,
451 }
452 }
453}
454
455impl IntoVec<'_> {
456 /// Encode data from a slice.
457 pub fn encode(&mut self, read: &[u8]) -> VectorResult {
458 self.encode_part(read, false)
459 }
460
461 /// Decode data from a reader, adding an end marker.
462 pub fn encode_all(mut self, read: &[u8]) -> VectorResult {
463 self.encode_part(read, true)
464 }
465
466 fn grab_buffer(&mut self) -> (&mut [u8], &mut Encoder) {
467 const CHUNK_SIZE: usize = 1 << 12;
468 let decoder = &mut self.encoder;
469 let length = self.vector.len();
470
471 // Use the vector to do overflow checks and w/e.
472 self.vector.reserve(CHUNK_SIZE);
473 // FIXME: encoding into uninit buffer?
474 self.vector.resize(length + CHUNK_SIZE, 0u8);
475
476 (&mut self.vector[length..], decoder)
477 }
478
479 fn encode_part(&mut self, part: &[u8], finish: bool) -> VectorResult {
480 let mut result = VectorResult {
481 consumed_in: 0,
482 consumed_out: 0,
483 status: Ok(LzwStatus::Ok),
484 };
485
486 enum Progress {
487 Ok,
488 Done,
489 }
490
491 // Converting to mutable refs to move into the `once` closure.
492 let read_bytes = &mut result.consumed_in;
493 let write_bytes = &mut result.consumed_out;
494 let mut data = part;
495
496 // A 64 MB buffer is quite large but should get alloc_zeroed.
497 // Note that the decoded size can be up to quadratic in code block.
498 let once = move || {
499 // Grab a new output buffer.
500 let (outbuf, encoder) = self.grab_buffer();
501
502 if finish {
503 encoder.finish();
504 }
505
506 // Decode as much of the buffer as fits.
507 let result = encoder.encode_bytes(data, &mut outbuf[..]);
508 // Do the bookkeeping and consume the buffer.
509 *read_bytes += result.consumed_in;
510 *write_bytes += result.consumed_out;
511 data = &data[result.consumed_in..];
512
513 let unfilled = outbuf.len() - result.consumed_out;
514 let filled = self.vector.len() - unfilled;
515 self.vector.truncate(filled);
516
517 // Handle the status in the result.
518 let done = result.status?;
519 if let LzwStatus::Done = done {
520 Ok(Progress::Done)
521 } else {
522 Ok(Progress::Ok)
523 }
524 };
525
526 // Decode chunks of input data until we're done.
527 let status: Result<(), _> = core::iter::repeat_with(once)
528 // scan+fuse can be replaced with map_while
529 .scan((), |(), result| match result {
530 Ok(Progress::Ok) => Some(Ok(())),
531 Err(err) => Some(Err(err)),
532 Ok(Progress::Done) => None,
533 })
534 .fuse()
535 .collect();
536
537 if let Err(err) = status {
538 result.status = Err(err);
539 }
540
541 result
542 }
543}
544
545// This is implemented in a separate file, so that 1.34.2 does not parse it. Otherwise, it would
546// trip over the usage of await, which is a reserved keyword in that edition/version. It only
547// contains an impl block.
548#[cfg(feature = "async")]
549#[path = "encode_into_async.rs"]
550mod impl_encode_into_async;
551
552impl<B: Buffer> EncodeState<B> {
553 fn new(min_size: u8) -> Self {
554 let clear_code: u16 = 1 << min_size;
555 let mut tree: Tree = Tree::default();
556 tree.init(min_size);
557 let mut state: EncodeState = EncodeState {
558 min_size,
559 tree,
560 has_ended: false,
561 is_tiff: false,
562 current_code: clear_code,
563 clear_code,
564 buffer: B::new(min_size),
565 };
566 state.buffer_code(clear_code);
567 state
568 }
569}
570
571impl<B: Buffer> Stateful for EncodeState<B> {
572 fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
573 let c_in = inp.len();
574 let c_out = out.len();
575 let mut status = Ok(LzwStatus::Ok);
576
577 'encoding: loop {
578 if self.push_out(&mut out) {
579 break;
580 }
581
582 if inp.is_empty() && self.has_ended {
583 let end = self.end_code();
584 if self.current_code != end {
585 if self.current_code != self.clear_code {
586 self.buffer_code(self.current_code);
587
588 // When reading this code, the decoder will add an extra entry to its table
589 // before reading th end code. Thusly, it may increase its code size based
590 // on this additional entry.
591 if self.tree.keys.len() + usize::from(self.is_tiff)
592 > usize::from(self.buffer.max_code())
593 && self.buffer.code_size() < MAX_CODESIZE
594 {
595 self.buffer.bump_code_size();
596 }
597 }
598 self.buffer_code(end);
599 self.current_code = end;
600 self.buffer_pad();
601 }
602
603 break;
604 }
605
606 let mut next_code = None;
607 let mut bytes = inp.iter();
608 while let Some(&byte) = bytes.next() {
609 if self.min_size < 8 && byte >= 1 << self.min_size {
610 status = Err(LzwError::InvalidCode);
611 break 'encoding;
612 }
613
614 inp = bytes.as_slice();
615 match self.tree.iterate(self.current_code, byte) {
616 Ok(code) => self.current_code = code,
617 Err(_) => {
618 next_code = Some(self.current_code);
619
620 self.current_code = u16::from(byte);
621 break;
622 }
623 }
624 }
625
626 match next_code {
627 // No more bytes, no code produced.
628 None => break,
629 Some(code) => {
630 self.buffer_code(code);
631
632 if self.tree.keys.len() + usize::from(self.is_tiff)
633 > usize::from(self.buffer.max_code()) + 1
634 && self.buffer.code_size() < MAX_CODESIZE
635 {
636 self.buffer.bump_code_size();
637 }
638
639 if self.tree.keys.len() > MAX_ENTRIES {
640 self.buffer_code(self.clear_code);
641 self.tree.reset(self.min_size);
642 self.buffer.clear(self.min_size);
643 }
644 }
645 }
646 }
647
648 if inp.is_empty() && self.current_code == self.end_code() {
649 if !self.flush_out(&mut out) {
650 status = Ok(LzwStatus::Done);
651 }
652 }
653
654 BufferResult {
655 consumed_in: c_in - inp.len(),
656 consumed_out: c_out - out.len(),
657 status,
658 }
659 }
660
661 fn mark_ended(&mut self) -> bool {
662 core::mem::replace(&mut self.has_ended, true)
663 }
664
665 fn restart(&mut self) {
666 self.has_ended = false;
667 }
668
669 fn reset(&mut self) {
670 self.restart();
671 self.current_code = self.clear_code;
672 self.tree.reset(self.min_size);
673 self.buffer.reset(self.min_size);
674 self.buffer_code(self.clear_code);
675 }
676}
677
678impl<B: Buffer> EncodeState<B> {
679 fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
680 self.buffer.push_out(out)
681 }
682
683 fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
684 self.buffer.flush_out(out)
685 }
686
687 fn end_code(&self) -> Code {
688 self.clear_code + 1
689 }
690
691 fn buffer_pad(&mut self) {
692 self.buffer.buffer_pad();
693 }
694
695 fn buffer_code(&mut self, code: Code) {
696 self.buffer.buffer_code(code);
697 }
698}
699
700impl Buffer for MsbBuffer {
701 fn new(min_size: u8) -> Self {
702 MsbBuffer {
703 code_size: min_size + 1,
704 buffer: 0,
705 bits_in_buffer: 0,
706 }
707 }
708
709 fn reset(&mut self, min_size: u8) {
710 self.code_size = min_size + 1;
711 self.buffer = 0;
712 self.bits_in_buffer = 0;
713 }
714
715 fn clear(&mut self, min_size: u8) {
716 self.code_size = min_size + 1;
717 }
718
719 fn buffer_code(&mut self, code: Code) {
720 let shift = 64 - self.bits_in_buffer - self.code_size;
721 self.buffer |= u64::from(code) << shift;
722 self.bits_in_buffer += self.code_size;
723 }
724
725 fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
726 if self.bits_in_buffer + 2 * self.code_size < 64 {
727 return false;
728 }
729
730 self.flush_out(out)
731 }
732
733 fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
734 let want = usize::from(self.bits_in_buffer / 8);
735 let count = want.min((*out).len());
736 let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count);
737 *out = tail;
738
739 for b in bytes {
740 *b = ((self.buffer & 0xff00_0000_0000_0000) >> 56) as u8;
741 self.buffer <<= 8;
742 self.bits_in_buffer -= 8;
743 }
744
745 count < want
746 }
747
748 fn buffer_pad(&mut self) {
749 let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7;
750 self.bits_in_buffer += to_byte;
751 }
752
753 fn bump_code_size(&mut self) {
754 self.code_size += 1;
755 }
756
757 fn max_code(&self) -> Code {
758 (1 << self.code_size) - 1
759 }
760
761 fn code_size(&self) -> u8 {
762 self.code_size
763 }
764}
765
766impl Buffer for LsbBuffer {
767 fn new(min_size: u8) -> Self {
768 LsbBuffer {
769 code_size: min_size + 1,
770 buffer: 0,
771 bits_in_buffer: 0,
772 }
773 }
774
775 fn reset(&mut self, min_size: u8) {
776 self.code_size = min_size + 1;
777 self.buffer = 0;
778 self.bits_in_buffer = 0;
779 }
780
781 fn clear(&mut self, min_size: u8) {
782 self.code_size = min_size + 1;
783 }
784
785 fn buffer_code(&mut self, code: Code) {
786 self.buffer |= u64::from(code) << self.bits_in_buffer;
787 self.bits_in_buffer += self.code_size;
788 }
789
790 fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
791 if self.bits_in_buffer + 2 * self.code_size < 64 {
792 return false;
793 }
794
795 self.flush_out(out)
796 }
797
798 fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
799 let want = usize::from(self.bits_in_buffer / 8);
800 let count = want.min((*out).len());
801 let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count);
802 *out = tail;
803
804 for b in bytes {
805 *b = (self.buffer & 0x0000_0000_0000_00ff) as u8;
806 self.buffer >>= 8;
807 self.bits_in_buffer -= 8;
808 }
809
810 count < want
811 }
812
813 fn buffer_pad(&mut self) {
814 let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7;
815 self.bits_in_buffer += to_byte;
816 }
817
818 fn bump_code_size(&mut self) {
819 self.code_size += 1;
820 }
821
822 fn max_code(&self) -> Code {
823 (1 << self.code_size) - 1
824 }
825
826 fn code_size(&self) -> u8 {
827 self.code_size
828 }
829}
830
831impl Tree {
832 fn init(&mut self, min_size: u8) {
833 // We need a way to represent the state of a currently empty buffer. We use the clear code
834 // for this, thus create one complex mapping that leads to the one-char base codes.
835 self.keys
836 .resize((1 << min_size) + 2, FullKey::NoSuccessor.into());
837 self.complex.push(Full {
838 char_continuation: [0; 256],
839 });
840 let map_of_begin = self.complex.last_mut().unwrap();
841 for ch in 0u16..256 {
842 map_of_begin.char_continuation[usize::from(ch)] = ch;
843 }
844 self.keys[1 << min_size] = FullKey::Full(0).into();
845 }
846
847 fn reset(&mut self, min_size: u8) {
848 self.simples.clear();
849 self.keys.truncate((1 << min_size) + 2);
850 // Keep entry for clear code.
851 self.complex.truncate(1);
852 // The first complex is not changed..
853 for k in self.keys[..(1 << min_size) + 2].iter_mut() {
854 *k = FullKey::NoSuccessor.into();
855 }
856 self.keys[1 << min_size] = FullKey::Full(0).into();
857 }
858
859 fn at_key(&self, code: Code, ch: u8) -> Option<Code> {
860 let key = self.keys[usize::from(code)];
861 match FullKey::from(key) {
862 FullKey::NoSuccessor => None,
863 FullKey::Simple(idx) => {
864 let nexts = &self.simples[usize::from(idx)];
865 let successors = nexts
866 .codes
867 .iter()
868 .zip(nexts.chars.iter())
869 .take(usize::from(nexts.count));
870 for (&scode, &sch) in successors {
871 if sch == ch {
872 return Some(scode);
873 }
874 }
875
876 None
877 }
878 FullKey::Full(idx) => {
879 let full = &self.complex[usize::from(idx)];
880 let precode = full.char_continuation[usize::from(ch)];
881 if usize::from(precode) < MAX_ENTRIES {
882 Some(precode)
883 } else {
884 None
885 }
886 }
887 }
888 }
889
890 /// Iterate to the next char.
891 /// Return Ok when it was already in the tree or creates a new entry for it and returns Err.
892 fn iterate(&mut self, code: Code, ch: u8) -> Result<Code, Code> {
893 if let Some(next) = self.at_key(code, ch) {
894 Ok(next)
895 } else {
896 Err(self.append(code, ch))
897 }
898 }
899
900 fn append(&mut self, code: Code, ch: u8) -> Code {
901 let next: Code = self.keys.len() as u16;
902 let key = self.keys[usize::from(code)];
903 // TODO: with debug assertions, check for non-existence
904 match FullKey::from(key) {
905 FullKey::NoSuccessor => {
906 let new_key = FullKey::Simple(self.simples.len() as u16);
907 self.simples.push(Simple::default());
908 let simples = self.simples.last_mut().unwrap();
909 simples.codes[0] = next;
910 simples.chars[0] = ch;
911 simples.count = 1;
912 self.keys[usize::from(code)] = new_key.into();
913 }
914 FullKey::Simple(idx) if usize::from(self.simples[usize::from(idx)].count) < SHORT => {
915 let nexts = &mut self.simples[usize::from(idx)];
916 let nidx = usize::from(nexts.count);
917 nexts.chars[nidx] = ch;
918 nexts.codes[nidx] = next;
919 nexts.count += 1;
920 }
921 FullKey::Simple(idx) => {
922 let new_key = FullKey::Full(self.complex.len() as u16);
923 let simples = &self.simples[usize::from(idx)];
924 self.complex.push(Full {
925 char_continuation: [Code::max_value(); 256],
926 });
927 let full = self.complex.last_mut().unwrap();
928 for (&pch, &pcont) in simples.chars.iter().zip(simples.codes.iter()) {
929 full.char_continuation[usize::from(pch)] = pcont;
930 }
931 self.keys[usize::from(code)] = new_key.into();
932 }
933 FullKey::Full(idx) => {
934 let full = &mut self.complex[usize::from(idx)];
935 full.char_continuation[usize::from(ch)] = next;
936 }
937 }
938 self.keys.push(FullKey::NoSuccessor.into());
939 next
940 }
941}
942
943impl Default for FullKey {
944 fn default() -> Self {
945 FullKey::NoSuccessor
946 }
947}
948
949impl Default for Simple {
950 fn default() -> Self {
951 Simple {
952 codes: [0; SHORT],
953 chars: [0; SHORT],
954 count: 0,
955 }
956 }
957}
958
959impl From<CompressedKey> for FullKey {
960 fn from(CompressedKey(key: u16): CompressedKey) -> Self {
961 match (key >> MAX_CODESIZE) & 0xf {
962 0 => FullKey::Full(key & 0xfff),
963 1 => FullKey::Simple(key & 0xfff),
964 _ => FullKey::NoSuccessor,
965 }
966 }
967}
968
969impl From<FullKey> for CompressedKey {
970 fn from(full: FullKey) -> Self {
971 CompressedKey(match full {
972 FullKey::NoSuccessor => 0x2000,
973 FullKey::Simple(code: u16) => 0x1000 | code,
974 FullKey::Full(code: u16) => code,
975 })
976 }
977}
978
979#[cfg(test)]
980mod tests {
981 use super::{BitOrder, Encoder, LzwError, LzwStatus};
982 use crate::alloc::vec::Vec;
983 use crate::decode::Decoder;
984 #[cfg(feature = "std")]
985 use crate::StreamBuf;
986
987 #[test]
988 fn invalid_input_rejected() {
989 const BIT_LEN: u8 = 2;
990 let ref input = [0, 1 << BIT_LEN /* invalid */, 0];
991 let ref mut target = [0u8; 128];
992 let mut encoder = Encoder::new(BitOrder::Msb, BIT_LEN);
993
994 encoder.finish();
995 // We require simulation of normality, that is byte-for-byte compression.
996 let result = encoder.encode_bytes(input, target);
997 assert!(if let Err(LzwError::InvalidCode) = result.status {
998 true
999 } else {
1000 false
1001 });
1002 assert_eq!(result.consumed_in, 1);
1003
1004 let fixed = encoder.encode_bytes(&[1, 0], &mut target[result.consumed_out..]);
1005 assert!(if let Ok(LzwStatus::Done) = fixed.status {
1006 true
1007 } else {
1008 false
1009 });
1010 assert_eq!(fixed.consumed_in, 2);
1011
1012 // Okay, now test we actually fixed it.
1013 let ref mut compare = [0u8; 4];
1014 let mut todo = &target[..result.consumed_out + fixed.consumed_out];
1015 let mut free = &mut compare[..];
1016 let mut decoder = Decoder::new(BitOrder::Msb, BIT_LEN);
1017
1018 // Decode with up to 16 rounds, far too much but inconsequential.
1019 for _ in 0..16 {
1020 if decoder.has_ended() {
1021 break;
1022 }
1023
1024 let result = decoder.decode_bytes(todo, free);
1025 assert!(result.status.is_ok());
1026 todo = &todo[result.consumed_in..];
1027 free = &mut free[result.consumed_out..];
1028 }
1029
1030 let remaining = { free }.len();
1031 let len = compare.len() - remaining;
1032 assert_eq!(todo, &[]);
1033 assert_eq!(compare[..len], [0, 1, 0]);
1034 }
1035
1036 #[test]
1037 #[should_panic]
1038 fn invalid_code_size_low() {
1039 let _ = Encoder::new(BitOrder::Msb, 1);
1040 }
1041
1042 #[test]
1043 #[should_panic]
1044 fn invalid_code_size_high() {
1045 let _ = Encoder::new(BitOrder::Msb, 14);
1046 }
1047
1048 fn make_decoded() -> Vec<u8> {
1049 const FILE: &'static [u8] =
1050 include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/Cargo.lock"));
1051 return Vec::from(FILE);
1052 }
1053
1054 #[test]
1055 #[cfg(feature = "std")]
1056 fn into_stream_buffer_no_alloc() {
1057 let encoded = make_decoded();
1058 let mut encoder = Encoder::new(BitOrder::Msb, 8);
1059
1060 let mut output = vec![];
1061 let mut buffer = [0; 512];
1062 let mut istream = encoder.into_stream(&mut output);
1063 istream.set_buffer(&mut buffer[..]);
1064 istream.encode(&encoded[..]).status.unwrap();
1065
1066 match istream.buffer {
1067 Some(StreamBuf::Borrowed(_)) => {}
1068 None => panic!("Decoded without buffer??"),
1069 Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"),
1070 }
1071 }
1072
1073 #[test]
1074 #[cfg(feature = "std")]
1075 fn into_stream_buffer_small_alloc() {
1076 struct WriteTap<W: std::io::Write>(W);
1077 const BUF_SIZE: usize = 512;
1078
1079 impl<W: std::io::Write> std::io::Write for WriteTap<W> {
1080 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1081 assert!(buf.len() <= BUF_SIZE);
1082 self.0.write(buf)
1083 }
1084 fn flush(&mut self) -> std::io::Result<()> {
1085 self.0.flush()
1086 }
1087 }
1088
1089 let encoded = make_decoded();
1090 let mut encoder = Encoder::new(BitOrder::Msb, 8);
1091
1092 let mut output = vec![];
1093 let mut istream = encoder.into_stream(WriteTap(&mut output));
1094 istream.set_buffer_size(512);
1095 istream.encode(&encoded[..]).status.unwrap();
1096
1097 match istream.buffer {
1098 Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE),
1099 Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"),
1100 None => panic!("Decoded without buffer??"),
1101 }
1102 }
1103
1104 #[test]
1105 #[cfg(feature = "std")]
1106 fn reset() {
1107 let encoded = make_decoded();
1108 let mut encoder = Encoder::new(BitOrder::Msb, 8);
1109 let mut reference = None;
1110
1111 for _ in 0..2 {
1112 let mut output = vec![];
1113 let mut buffer = [0; 512];
1114 let mut istream = encoder.into_stream(&mut output);
1115 istream.set_buffer(&mut buffer[..]);
1116 istream.encode_all(&encoded[..]).status.unwrap();
1117
1118 encoder.reset();
1119 if let Some(reference) = &reference {
1120 assert_eq!(output, *reference);
1121 } else {
1122 reference = Some(output);
1123 }
1124 }
1125 }
1126}
1127