| 1 | //! # LZW decoder and encoder |
| 2 | //! |
| 3 | //! This crates provides an `Encoder` and a `Decoder` in their respective modules. The code words |
| 4 | //! are written from and to bit byte slices (or streams) where it is possible to write either the |
| 5 | //! most or least significant bits first. The maximum possible code size is 12 bits, the smallest |
| 6 | //! available code size is 2 bits. |
| 7 | //! |
| 8 | //! ## Example |
| 9 | //! |
| 10 | //! These two code blocks show the compression and corresponding decompression. Note that you must |
| 11 | //! use the same arguments to `Encoder` and `Decoder`, otherwise the decoding might fail or produce |
| 12 | //! bad results. |
| 13 | //! |
| 14 | #![cfg_attr (feature = "std" , doc = "```" )] |
| 15 | #![cfg_attr (not(feature = "std" ), doc = "```ignore" )] |
| 16 | //! use weezl::{BitOrder, encode::Encoder}; |
| 17 | //! |
| 18 | //! let data = b"Hello, world" ; |
| 19 | //! let compressed = Encoder::new(BitOrder::Msb, 9) |
| 20 | //! .encode(data) |
| 21 | //! .unwrap(); |
| 22 | //! ``` |
| 23 | //! |
| 24 | #![cfg_attr (feature = "std" , doc = "```" )] |
| 25 | #![cfg_attr (not(feature = "std" ), doc = "```ignore" )] |
| 26 | //! use weezl::{BitOrder, decode::Decoder}; |
| 27 | //! # let compressed = b" \x80\x04\x81\x94l \x1b\x06\xf0\xb0 \x1d\xc6\xf1\xc8l \x19 \x10" .to_vec(); |
| 28 | //! # let data = b"Hello, world" ; |
| 29 | //! |
| 30 | //! let decompressed = Decoder::new(BitOrder::Msb, 9) |
| 31 | //! .decode(&compressed) |
| 32 | //! .unwrap(); |
| 33 | //! assert_eq!(decompressed, data); |
| 34 | //! ``` |
| 35 | //! |
| 36 | //! ## LZW Details |
| 37 | //! |
| 38 | //! The de- and encoder expect the LZW stream to start with a clear code and end with an |
| 39 | //! end code which are defined as follows: |
| 40 | //! |
| 41 | //! * `CLEAR_CODE == 1 << min_code_size` |
| 42 | //! * `END_CODE == CLEAR_CODE + 1` |
| 43 | //! |
| 44 | //! For optimal performance, all buffers and input and output slices should be as large as possible |
| 45 | //! and at least 2048 bytes long. This extends to input streams which should have similarly sized |
| 46 | //! buffers. This library uses Rust's standard allocation interfaces (`Box` and `Vec` to be |
| 47 | //! precise). Since there are no ways to handle allocation errors it is not recommended to operate |
| 48 | //! it on 16-bit targets. |
| 49 | //! |
| 50 | //! ## Allocations and standard library |
| 51 | //! |
| 52 | //! The main algorithm can be used in `no_std` as well, although it requires an allocator. This |
| 53 | //! restriction might be lifted at a later stage. For this you should deactivate the `std` feature. |
| 54 | //! The main interfaces stay intact but the `into_stream` combinator is no available. |
| 55 | #![cfg_attr (not(feature = "std" ), no_std)] |
| 56 | #![forbid (unsafe_code)] |
| 57 | #![forbid (missing_docs)] |
| 58 | |
| 59 | #[cfg (all(feature = "alloc" , not(feature = "std" )))] |
| 60 | extern crate alloc; |
| 61 | #[cfg (all(feature = "alloc" , feature = "std" ))] |
| 62 | use std as alloc; |
| 63 | |
| 64 | pub(crate) const MAX_CODESIZE: u8 = 12; |
| 65 | pub(crate) const MAX_ENTRIES: usize = 1 << MAX_CODESIZE as usize; |
| 66 | |
| 67 | /// Alias for a LZW code point |
| 68 | pub(crate) type Code = u16; |
| 69 | |
| 70 | /// A default buffer size for encoding/decoding buffer. |
| 71 | /// |
| 72 | /// Note that this is larger than the default size for buffers (usually 4K) since each code word |
| 73 | /// can expand to multiple bytes. Expanding one buffer would yield multiple and require a costly |
| 74 | /// break in the decoding loop. Note that the decoded size can be up to quadratic in code block. |
| 75 | pub(crate) const STREAM_BUF_SIZE: usize = 1 << 24; |
| 76 | |
| 77 | /// The order of bits in bytes. |
| 78 | #[derive (Clone, Copy, Debug)] |
| 79 | pub enum BitOrder { |
| 80 | /// The most significant bit is processed first. |
| 81 | Msb, |
| 82 | /// The least significant bit is processed first. |
| 83 | Lsb, |
| 84 | } |
| 85 | |
| 86 | /// An owned or borrowed buffer for stream operations. |
| 87 | #[cfg (feature = "alloc" )] |
| 88 | pub(crate) enum StreamBuf<'d> { |
| 89 | Borrowed(&'d mut [u8]), |
| 90 | Owned(crate::alloc::vec::Vec<u8>), |
| 91 | } |
| 92 | |
| 93 | #[cold ] |
| 94 | fn assert_decode_size(size: u8) { |
| 95 | assert!( |
| 96 | size <= MAX_CODESIZE, |
| 97 | "Maximum code size 12 required, got {}" , |
| 98 | size |
| 99 | ); |
| 100 | } |
| 101 | |
| 102 | #[cold ] |
| 103 | fn assert_encode_size(size: u8) { |
| 104 | assert!(size >= 2, "Minimum code size 2 required, got {}" , size); |
| 105 | assert!( |
| 106 | size <= MAX_CODESIZE, |
| 107 | "Maximum code size 12 required, got {}" , |
| 108 | size |
| 109 | ); |
| 110 | } |
| 111 | |
| 112 | #[cfg (feature = "alloc" )] |
| 113 | pub mod decode; |
| 114 | #[cfg (feature = "alloc" )] |
| 115 | pub mod encode; |
| 116 | mod error; |
| 117 | |
| 118 | #[cfg (feature = "std" )] |
| 119 | pub use self::error::StreamResult; |
| 120 | pub use self::error::{BufferResult, LzwError, LzwStatus}; |
| 121 | |
| 122 | #[cfg (all(test, feature = "alloc" ))] |
| 123 | mod tests { |
| 124 | use crate::decode::Decoder; |
| 125 | use crate::encode::Encoder; |
| 126 | |
| 127 | #[cfg (feature = "std" )] |
| 128 | use crate::{decode, encode}; |
| 129 | |
| 130 | #[test ] |
| 131 | fn stable_send() { |
| 132 | fn must_be_send<T: Send + 'static>() {} |
| 133 | must_be_send::<Decoder>(); |
| 134 | must_be_send::<Encoder>(); |
| 135 | |
| 136 | #[cfg (feature = "std" )] |
| 137 | fn _send_and_lt<'lt, T: Send + 'lt>() {} |
| 138 | |
| 139 | // Check that the inference `W: Send + 'd` => `IntoStream: Send + 'd` works. |
| 140 | #[cfg (feature = "std" )] |
| 141 | fn _all_send_writer<'d, W: std::io::Write + Send + 'd>() { |
| 142 | _send_and_lt::<'d, decode::IntoStream<'d, W>>(); |
| 143 | _send_and_lt::<'d, encode::IntoStream<'d, W>>(); |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | |