encode.rs source code [crates/weezl-0.1.8/src/encode.rs]

1	//! A module for all encoding needs.
2	use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult};
3	use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE};
4
5	use crate::alloc::{boxed::Box, vec::Vec};
6	#[cfg(feature = "std")]
7	use crate::error::StreamResult;
8	#[cfg(feature = "std")]
9	use std::io::{self, BufRead, Write};
10
11	/// The state for encoding data with an LZW algorithm.
12	///
13	/// The same structure can be utilized with streams as well as your own buffers and driver logic.
14	/// It may even be possible to mix them if you are sufficiently careful not to lose any written
15	/// data in the process.
16	///
17	/// This is a sans-IO implementation, meaning that it only contains the state of the encoder and
18	/// the caller will provide buffers for input and output data when calling the basic
19	/// [`encode_bytes`] method. Nevertheless, a number of _adapters_ are provided in the `into_`*
20	/// methods for enoding with a particular style of common IO.
21	///
22	/// [`encode`] for encoding once without any IO-loop.*
23	/// [`into_async`] for encoding with the `futures` traits for asynchronous IO.*
24	/// [`into_stream`] for encoding with the standard `io` traits.*
25	/// [`into_vec`] for in-memory encoding.*
26	///
27	/// [`encode_bytes`]: #method.encode_bytes
28	/// [`encode`]: #method.encode
29	/// [`into_async`]: #method.into_async
30	/// [`into_stream`]: #method.into_stream
31	/// [`into_vec`]: #method.into_vec
32	pub struct Encoder {
33	/// Internally dispatch via a dynamic trait object. This did not have any significant
34	/// performance impact as we batch data internally and this pointer does not change after
35	/// creation!
36	state: Box<dyn Stateful + Send + 'static>,
37	}
38
39	/// A encoding stream sink.
40	///
41	/// See [`Encoder::into_stream`] on how to create this type.
42	///
43	/// [`Encoder::into_stream`]: struct.Encoder.html#method.into_stream
44	#[cfg_attr(
45	not(feature = "std"),
46	deprecated = "This type is only useful with the `std` feature."
47	)]
48	#[cfg_attr(not(feature = "std"), allow(dead_code))]
49	pub struct IntoStream<'d, W> {
50	encoder: &'d mut Encoder,
51	writer: W,
52	buffer: Option<StreamBuf<'d>>,
53	default_size: usize,
54	}
55
56	/// An async decoding sink.
57	///
58	/// See [`Encoder::into_async`] on how to create this type.
59	///
60	/// [`Encoder::into_async`]: struct.Encoder.html#method.into_async
61	#[cfg(feature = "async")]
62	pub struct IntoAsync<'d, W> {
63	encoder: &'d mut Encoder,
64	writer: W,
65	buffer: Option<StreamBuf<'d>>,
66	default_size: usize,
67	}
68
69	/// A encoding sink into a vector.
70	///
71	/// See [`Encoder::into_vec`] on how to create this type.
72	///
73	/// [`Encoder::into_vec`]: struct.Encoder.html#method.into_vec
74	pub struct IntoVec<'d> {
75	encoder: &'d mut Encoder,
76	vector: &'d mut Vec<u8>,
77	}
78
79	trait Stateful {
80	fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult;
81	fn mark_ended(&mut self) -> bool;
82	/// Reset the state tracking if end code has been written.
83	fn restart(&mut self);
84	/// Reset the encoder to the beginning, dropping all buffers etc.
85	fn reset(&mut self);
86	}
87
88	struct EncodeState<B: Buffer> {
89	/// The configured minimal code size.
90	min_size: u8,
91	/// The current encoding symbol tree.
92	tree: Tree,
93	/// If we have pushed the end code.
94	has_ended: bool,
95	/// If tiff then bumps are a single code sooner.
96	is_tiff: bool,
97	/// The code corresponding to the currently read characters.
98	current_code: Code,
99	/// The clear code for resetting the dictionary.
100	clear_code: Code,
101	/// The bit buffer for encoding.
102	buffer: B,
103	}
104
105	struct MsbBuffer {
106	/// The current code length.
107	code_size: u8,
108	/// The buffer bits.
109	buffer: u64,
110	/// The number of valid buffer bits.
111	bits_in_buffer: u8,
112	}
113
114	struct LsbBuffer {
115	/// The current code length.
116	code_size: u8,
117	/// The buffer bits.
118	buffer: u64,
119	/// The number of valid buffer bits.
120	bits_in_buffer: u8,
121	}
122
123	trait Buffer {
124	fn new(size: u8) -> Self;
125	/// Reset the code size in the buffer.
126	fn reset(&mut self, min_size: u8);
127	/// Apply effects of a Clear Code.
128	fn clear(&mut self, min_size: u8);
129	/// Insert a code into the buffer.
130	fn buffer_code(&mut self, code: Code);
131	/// Push bytes if the buffer space is getting small.
132	fn push_out(&mut self, out: &mut &mut [u8]) -> bool;
133	/// Flush all full bytes, returning if at least one more byte remains.
134	fn flush_out(&mut self, out: &mut &mut [u8]) -> bool;
135	/// Pad the buffer to a full byte.
136	fn buffer_pad(&mut self);
137	/// Increase the maximum code size.
138	fn bump_code_size(&mut self);
139	/// Return the maximum code with the current code size.
140	fn max_code(&self) -> Code;
141	/// Return the current code size in bits.
142	fn code_size(&self) -> u8;
143	}
144
145	/// One tree node for at most each code.
146	/// To avoid using too much memory we keep nodes with few successors in optimized form. This form
147	/// doesn't offer lookup by indexing but instead does a linear search.
148	#[derive(Default)]
149	struct Tree {
150	simples: Vec<Simple>,
151	complex: Vec<Full>,
152	keys: Vec<CompressedKey>,
153	}
154
155	#[derive(Clone, Copy)]
156	enum FullKey {
157	NoSuccessor,
158	Simple(u16),
159	Full(u16),
160	}
161
162	#[derive(Clone, Copy)]
163	struct CompressedKey(u16);
164
165	const SHORT: usize = `16`;
166
167	#[derive(Clone, Copy)]
168	struct Simple {
169	codes: [Code; SHORT],
170	chars: [u8; SHORT],
171	count: u8,
172	}
173
174	#[derive(Clone, Copy)]
175	struct Full {
176	char_continuation: [Code; `256`],
177	}
178
179	impl Encoder {
180	/// Create a new encoder with the specified bit order and symbol size.
181	///
182	/// The algorithm for dynamically increasing the code symbol bit width is compatible with the
183	/// original specification. In particular you will need to specify an `Lsb` bit oder to encode
184	/// the data portion of a compressed `gif` image.
185	///
186	/// # Panics
187	///
188	/// The `size` needs to be in the interval `2..=12`.
189	pub fn new(order: BitOrder, size: u8) -> Self {
190	type Boxed = Box<dyn Stateful + Send + 'static>;
191	super::assert_encode_size(size);
192	let state = match order {
193	BitOrder::Lsb => Box::new(EncodeState::<LsbBuffer>::new(size)) as Boxed,
194	BitOrder::Msb => Box::new(EncodeState::<MsbBuffer>::new(size)) as Boxed,
195	};
196
197	Encoder { state }
198	}
199
200	/// Create a TIFF compatible encoder with the specified bit order and symbol size.
201	///
202	/// The algorithm for dynamically increasing the code symbol bit width is compatible with the
203	/// TIFF specification, which is a misinterpretation of the original algorithm for increasing
204	/// the code size. It switches one symbol sooner.
205	///
206	/// # Panics
207	///
208	/// The `size` needs to be in the interval `2..=12`.
209	pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
210	type Boxed = Box<dyn Stateful + Send + 'static>;
211	super::assert_encode_size(size);
212	let state = match order {
213	BitOrder::Lsb => {
214	let mut state = Box::new(EncodeState::<LsbBuffer>::new(size));
215	state.is_tiff = `true`;
216	state as Boxed
217	}
218	BitOrder::Msb => {
219	let mut state = Box::new(EncodeState::<MsbBuffer>::new(size));
220	state.is_tiff = `true`;
221	state as Boxed
222	}
223	};
224
225	Encoder { state }
226	}
227
228	/// Encode some bytes from `inp` into `out`.
229	///
230	/// See [`into_stream`] for high-level functions (this interface is only available with the
231	/// `std` feature) and [`finish`] for marking the input data as complete.
232	///
233	/// When some input byte is invalid, i.e. is not smaller than `1 << size`, then that byte and
234	/// all following ones will _not_ be consumed and the `status` of the result will signal an
235	/// error. The result will also indicate that all bytes up to but not including the offending
236	/// byte have been consumed. You may try again with a fixed byte.
237	///
238	/// [`into_stream`]: #method.into_stream
239	/// [`finish`]: #method.finish
240	pub fn encode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult {
241	self.state.advance(inp, out)
242	}
243
244	/// Encode a single chunk of data.
245	///
246	/// This method will add an end marker to the encoded chunk.
247	///
248	/// This is a convenience wrapper around [`into_vec`]. Use the `into_vec` adapter to customize
249	/// buffer size, to supply an existing vector, to control whether an end marker is required, or
250	/// to preserve partial data in the case of a decoding error.
251	///
252	/// [`into_vec`]: #into_vec
253	///
254	/// # Example
255	///
256	/// ```
257	/// use weezl::{BitOrder, encode::Encoder};
258	///
259	/// let data = b"Hello, world";
260	/// let encoded = Encoder::new(BitOrder::Msb, `9`)
261	/// .encode(data)
262	/// .expect("All bytes valid for code size");
263	/// ```
264	pub fn encode(&mut self, data: &[u8]) -> Result<Vec<u8>, LzwError> {
265	let mut output = Vec::new();
266	self.into_vec(&mut output).encode_all(data).status?;
267	Ok(output)
268	}
269
270	/// Construct a encoder into a writer.
271	#[cfg(feature = "std")]
272	pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> {
273	IntoStream {
274	encoder: self,
275	writer,
276	buffer: None,
277	default_size: STREAM_BUF_SIZE,
278	}
279	}
280
281	/// Construct a encoder into an async writer.
282	#[cfg(feature = "async")]
283	pub fn into_async<W: futures::io::AsyncWrite>(&mut self, writer: W) -> IntoAsync<'_, W> {
284	IntoAsync {
285	encoder: self,
286	writer,
287	buffer: None,
288	default_size: STREAM_BUF_SIZE,
289	}
290	}
291
292	/// Construct an encoder into a vector.
293	///
294	/// All encoded data is appended and the vector is __not__ cleared.
295	///
296	/// Compared to `into_stream` this interface allows a high-level access to encoding without
297	/// requires the `std`-feature. Also, it can make full use of the extra buffer control that the
298	/// special target exposes.
299	pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec<u8>) -> IntoVec<'lt> {
300	IntoVec {
301	encoder: self,
302	vector: vec,
303	}
304	}
305
306	/// Mark the encoding as in the process of finishing.
307	///
308	/// The next following call to `encode_bytes` which is able to consume the complete input will
309	/// also try to emit an end code. It's not recommended, but also not unsound, to use different
310	/// byte slices in different calls from this point forward and thus to 'delay' the actual end
311	/// of the data stream. The behaviour after the end marker has been written is unspecified but
312	/// sound.
313	pub fn finish(&mut self) {
314	self.state.mark_ended();
315	}
316
317	/// Undo marking this data stream as ending.
318	/// FIXME: clarify how this interacts with padding introduced after end code.
319	#[allow(dead_code)]
320	pub(crate) fn restart(&mut self) {
321	self.state.restart()
322	}
323
324	/// Reset all internal state.
325	///
326	/// This produce an encoder as if just constructed with `new` but taking slightly less work. In
327	/// particular it will not deallocate any internal allocations. It will also avoid some
328	/// duplicate setup work.
329	pub fn reset(&mut self) {
330	self.state.reset()
331	}
332	}
333
334	#[cfg(feature = "std")]
335	impl<'d, W: Write> IntoStream<'d, W> {
336	/// Encode data from a reader.
337	///
338	/// This will drain the supplied reader. It will not encode an end marker after all data has
339	/// been processed.
340	pub fn encode(&mut self, read: impl BufRead) -> StreamResult {
341	self.encode_part(read, `false`)
342	}
343
344	/// Encode data from a reader and an end marker.
345	pub fn encode_all(mut self, read: impl BufRead) -> StreamResult {
346	self.encode_part(read, `true`)
347	}
348
349	/// Set the size of the intermediate encode buffer.
350	///
351	/// A buffer of this size is allocated to hold one part of the encoded stream when no buffer is
352	/// available and any encoding method is called. No buffer is allocated if `set_buffer` has
353	/// been called. The buffer is reused.
354	///
355	/// # Panics
356	/// This method panics if `size` is `0`.
357	pub fn set_buffer_size(&mut self, size: usize) {
358	assert_ne!(size, `0`, "Attempted to set empty buffer");
359	self.default_size = size;
360	}
361
362	/// Use a particular buffer as an intermediate encode buffer.
363	///
364	/// Calling this sets or replaces the buffer. When a buffer has been set then it is used
365	/// instead of a dynamically allocating a buffer. Note that the size of the buffer is relevant
366	/// for efficient encoding as there is additional overhead from `write` calls each time the
367	/// buffer has been filled.
368	///
369	/// # Panics
370	/// This method panics if the `buffer` is empty.
371	pub fn set_buffer(&mut self, buffer: &'d mut [u8]) {
372	assert_ne!(buffer.len(), `0`, "Attempted to set empty buffer");
373	self.buffer = Some(StreamBuf::Borrowed(buffer));
374	}
375
376	fn encode_part(&mut self, mut read: impl BufRead, finish: bool) -> StreamResult {
377	let IntoStream {
378	encoder,
379	writer,
380	buffer,
381	default_size,
382	} = self;
383	enum Progress {
384	Ok,
385	Done,
386	}
387
388	let mut bytes_read = `0`;
389	let mut bytes_written = `0`;
390
391	let read_bytes = &mut bytes_read;
392	let write_bytes = &mut bytes_written;
393
394	let outbuf: &mut [u8] =
395	match { buffer.get_or_insert_with(\|\| StreamBuf::Owned(vec![`0u8`; *default_size])) } {
396	StreamBuf::Borrowed(slice) => &mut *slice,
397	StreamBuf::Owned(vec) => &mut *vec,
398	};
399	assert!(!outbuf.is_empty());
400
401	let once = move \|\| {
402	let data = read.fill_buf()?;
403
404	if data.is_empty() {
405	if finish {
406	encoder.finish();
407	} else {
408	return Ok(Progress::Done);
409	}
410	}
411
412	let result = encoder.encode_bytes(data, &mut outbuf[..]);
413	*read_bytes += result.consumed_in;
414	*write_bytes += result.consumed_out;
415	read.consume(result.consumed_in);
416
417	let done = result.status.map_err(\|err\| {
418	io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err))
419	})?;
420
421	if let LzwStatus::Done = done {
422	writer.write_all(&outbuf[..result.consumed_out])?;
423	return Ok(Progress::Done);
424	}
425
426	if let LzwStatus::NoProgress = done {
427	return Err(io::Error::new(
428	io::ErrorKind::UnexpectedEof,
429	"No more data but no end marker detected",
430	));
431	}
432
433	writer.write_all(&outbuf[..result.consumed_out])?;
434	Ok(Progress::Ok)
435	};
436
437	let status = core::iter::repeat_with(once)
438	// scan+fuse can be replaced with map_while
439	.scan((), \|(), result\| match result {
440	Ok(Progress::Ok) => Some(Ok(())),
441	Err(err) => Some(Err(err)),
442	Ok(Progress::Done) => None,
443	})
444	.fuse()
445	.collect();
446
447	StreamResult {
448	bytes_read,
449	bytes_written,
450	status,
451	}
452	}
453	}
454
455	impl IntoVec<'_> {
456	/// Encode data from a slice.
457	pub fn encode(&mut self, read: &[u8]) -> VectorResult {
458	self.encode_part(read, `false`)
459	}
460
461	/// Decode data from a reader, adding an end marker.
462	pub fn encode_all(mut self, read: &[u8]) -> VectorResult {
463	self.encode_part(read, `true`)
464	}
465
466	fn grab_buffer(&mut self) -> (&mut [u8], &mut Encoder) {
467	const CHUNK_SIZE: usize = `1` << `12`;
468	let decoder = &mut self.encoder;
469	let length = self.vector.len();
470
471	// Use the vector to do overflow checks and w/e.
472	self.vector.reserve(CHUNK_SIZE);
473	// FIXME: encoding into uninit buffer?
474	self.vector.resize(length + CHUNK_SIZE, `0u8`);
475
476	(&mut self.vector[length..], decoder)
477	}
478
479	fn encode_part(&mut self, part: &[u8], finish: bool) -> VectorResult {
480	let mut result = VectorResult {
481	consumed_in: `0`,
482	consumed_out: `0`,
483	status: Ok(LzwStatus::Ok),
484	};
485
486	enum Progress {
487	Ok,
488	Done,
489	}
490
491	// Converting to mutable refs to move into the `once` closure.
492	let read_bytes = &mut result.consumed_in;
493	let write_bytes = &mut result.consumed_out;
494	let mut data = part;
495
496	// A 64 MB buffer is quite large but should get alloc_zeroed.
497	// Note that the decoded size can be up to quadratic in code block.
498	let once = move \|\| {
499	// Grab a new output buffer.
500	let (outbuf, encoder) = self.grab_buffer();
501
502	if finish {
503	encoder.finish();
504	}
505
506	// Decode as much of the buffer as fits.
507	let result = encoder.encode_bytes(data, &mut outbuf[..]);
508	// Do the bookkeeping and consume the buffer.
509	*read_bytes += result.consumed_in;
510	*write_bytes += result.consumed_out;
511	data = &data[result.consumed_in..];
512
513	let unfilled = outbuf.len() - result.consumed_out;
514	let filled = self.vector.len() - unfilled;
515	self.vector.truncate(filled);
516
517	// Handle the status in the result.
518	let done = result.status?;
519	if let LzwStatus::Done = done {
520	Ok(Progress::Done)
521	} else {
522	Ok(Progress::Ok)
523	}
524	};
525
526	// Decode chunks of input data until we're done.
527	let status: Result<(), _> = core::iter::repeat_with(once)
528	// scan+fuse can be replaced with map_while
529	.scan((), \|(), result\| match result {
530	Ok(Progress::Ok) => Some(Ok(())),
531	Err(err) => Some(Err(err)),
532	Ok(Progress::Done) => None,
533	})
534	.fuse()
535	.collect();
536
537	if let Err(err) = status {
538	result.status = Err(err);
539	}
540
541	result
542	}
543	}
544
545	// This is implemented in a separate file, so that 1.34.2 does not parse it. Otherwise, it would
546	// trip over the usage of await, which is a reserved keyword in that edition/version. It only
547	// contains an impl block.
548	#[cfg(feature = "async")]
549	#[path = "encode_into_async.rs"]
550	mod impl_encode_into_async;
551
552	impl<B: Buffer> EncodeState<B> {
553	fn new(min_size: u8) -> Self {
554	let clear_code: u16 = `1` << min_size;
555	let mut tree: Tree = Tree::default();
556	tree.init(min_size);
557	let mut state: EncodeState = EncodeState {
558	min_size,
559	tree,
560	has_ended: `false`,
561	is_tiff: `false`,
562	current_code: clear_code,
563	clear_code,
564	buffer: B::new(min_size),
565	};
566	state.buffer_code(clear_code);
567	state
568	}
569	}
570
571	impl<B: Buffer> Stateful for EncodeState<B> {
572	fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
573	let c_in = inp.len();
574	let c_out = out.len();
575	let mut status = Ok(LzwStatus::Ok);
576
577	'encoding: loop {
578	if self.push_out(&mut out) {
579	break;
580	}
581
582	if inp.is_empty() && self.has_ended {
583	let end = self.end_code();
584	if self.current_code != end {
585	if self.current_code != self.clear_code {
586	self.buffer_code(self.current_code);
587
588	// When reading this code, the decoder will add an extra entry to its table
589	// before reading th end code. Thusly, it may increase its code size based
590	// on this additional entry.
591	if self.tree.keys.len() + usize::from(self.is_tiff)
592	> usize::from(self.buffer.max_code())
593	&& self.buffer.code_size() < MAX_CODESIZE
594	{
595	self.buffer.bump_code_size();
596	}
597	}
598	self.buffer_code(end);
599	self.current_code = end;
600	self.buffer_pad();
601	}
602
603	break;
604	}
605
606	let mut next_code = None;
607	let mut bytes = inp.iter();
608	while let Some(&byte) = bytes.next() {
609	if self.min_size < `8` && byte >= `1` << self.min_size {
610	status = Err(LzwError::InvalidCode);
611	break 'encoding;
612	}
613
614	inp = bytes.as_slice();
615	match self.tree.iterate(self.current_code, byte) {
616	Ok(code) => self.current_code = code,
617	Err(_) => {
618	next_code = Some(self.current_code);
619
620	self.current_code = u16::from(byte);
621	break;
622	}
623	}
624	}
625
626	match next_code {
627	// No more bytes, no code produced.
628	None => break,
629	Some(code) => {
630	self.buffer_code(code);
631
632	if self.tree.keys.len() + usize::from(self.is_tiff)
633	> usize::from(self.buffer.max_code()) + `1`
634	&& self.buffer.code_size() < MAX_CODESIZE
635	{
636	self.buffer.bump_code_size();
637	}
638
639	if self.tree.keys.len() > MAX_ENTRIES {
640	self.buffer_code(self.clear_code);
641	self.tree.reset(self.min_size);
642	self.buffer.clear(self.min_size);
643	}
644	}
645	}
646	}
647
648	if inp.is_empty() && self.current_code == self.end_code() {
649	if !self.flush_out(&mut out) {
650	status = Ok(LzwStatus::Done);
651	}
652	}
653
654	BufferResult {
655	consumed_in: c_in - inp.len(),
656	consumed_out: c_out - out.len(),
657	status,
658	}
659	}
660
661	fn mark_ended(&mut self) -> bool {
662	core::mem::replace(&mut self.has_ended, `true`)
663	}
664
665	fn restart(&mut self) {
666	self.has_ended = `false`;
667	}
668
669	fn reset(&mut self) {
670	self.restart();
671	self.current_code = self.clear_code;
672	self.tree.reset(self.min_size);
673	self.buffer.reset(self.min_size);
674	self.buffer_code(self.clear_code);
675	}
676	}
677
678	impl<B: Buffer> EncodeState<B> {
679	fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
680	self.buffer.push_out(out)
681	}
682
683	fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
684	self.buffer.flush_out(out)
685	}
686
687	fn end_code(&self) -> Code {
688	self.clear_code + `1`
689	}
690
691	fn buffer_pad(&mut self) {
692	self.buffer.buffer_pad();
693	}
694
695	fn buffer_code(&mut self, code: Code) {
696	self.buffer.buffer_code(code);
697	}
698	}
699
700	impl Buffer for MsbBuffer {
701	fn new(min_size: u8) -> Self {
702	MsbBuffer {
703	code_size: min_size + `1`,
704	buffer: `0`,
705	bits_in_buffer: `0`,
706	}
707	}
708
709	fn reset(&mut self, min_size: u8) {
710	self.code_size = min_size + `1`;
711	self.buffer = `0`;
712	self.bits_in_buffer = `0`;
713	}
714
715	fn clear(&mut self, min_size: u8) {
716	self.code_size = min_size + `1`;
717	}
718
719	fn buffer_code(&mut self, code: Code) {
720	let shift = `64` - self.bits_in_buffer - self.code_size;
721	self.buffer \|= u64::from(code) << shift;
722	self.bits_in_buffer += self.code_size;
723	}
724
725	fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
726	if self.bits_in_buffer + `2` * self.code_size < `64` {
727	return `false`;
728	}
729
730	self.flush_out(out)
731	}
732
733	fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
734	let want = usize::from(self.bits_in_buffer / `8`);
735	let count = want.min((*out).len());
736	let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count);
737	*out = tail;
738
739	for b in bytes {
740	*b = ((self.buffer & `0xff00_0000_0000_0000`) >> `56`) as u8;
741	self.buffer <<= `8`;
742	self.bits_in_buffer -= `8`;
743	}
744
745	count < want
746	}
747
748	fn buffer_pad(&mut self) {
749	let to_byte = self.bits_in_buffer.wrapping_neg() & `0x7`;
750	self.bits_in_buffer += to_byte;
751	}
752
753	fn bump_code_size(&mut self) {
754	self.code_size += `1`;
755	}
756
757	fn max_code(&self) -> Code {
758	(`1` << self.code_size) - `1`
759	}
760
761	fn code_size(&self) -> u8 {
762	self.code_size
763	}
764	}
765
766	impl Buffer for LsbBuffer {
767	fn new(min_size: u8) -> Self {
768	LsbBuffer {
769	code_size: min_size + `1`,
770	buffer: `0`,
771	bits_in_buffer: `0`,
772	}
773	}
774
775	fn reset(&mut self, min_size: u8) {
776	self.code_size = min_size + `1`;
777	self.buffer = `0`;
778	self.bits_in_buffer = `0`;
779	}
780
781	fn clear(&mut self, min_size: u8) {
782	self.code_size = min_size + `1`;
783	}
784
785	fn buffer_code(&mut self, code: Code) {
786	self.buffer \|= u64::from(code) << self.bits_in_buffer;
787	self.bits_in_buffer += self.code_size;
788	}
789
790	fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
791	if self.bits_in_buffer + `2` * self.code_size < `64` {
792	return `false`;
793	}
794
795	self.flush_out(out)
796	}
797
798	fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
799	let want = usize::from(self.bits_in_buffer / `8`);
800	let count = want.min((*out).len());
801	let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count);
802	*out = tail;
803
804	for b in bytes {
805	*b = (self.buffer & `0x0000_0000_0000_00ff`) as u8;
806	self.buffer >>= `8`;
807	self.bits_in_buffer -= `8`;
808	}
809
810	count < want
811	}
812
813	fn buffer_pad(&mut self) {
814	let to_byte = self.bits_in_buffer.wrapping_neg() & `0x7`;
815	self.bits_in_buffer += to_byte;
816	}
817
818	fn bump_code_size(&mut self) {
819	self.code_size += `1`;
820	}
821
822	fn max_code(&self) -> Code {
823	(`1` << self.code_size) - `1`
824	}
825
826	fn code_size(&self) -> u8 {
827	self.code_size
828	}
829	}
830
831	impl Tree {
832	fn init(&mut self, min_size: u8) {
833	// We need a way to represent the state of a currently empty buffer. We use the clear code
834	// for this, thus create one complex mapping that leads to the one-char base codes.
835	self.keys
836	.resize((`1` << min_size) + `2`, FullKey::NoSuccessor.into());
837	self.complex.push(Full {
838	char_continuation: [`0`; `256`],
839	});
840	let map_of_begin = self.complex.last_mut().unwrap();
841	for ch in `0u16`..`256` {
842	map_of_begin.char_continuation[usize::from(ch)] = ch;
843	}
844	self.keys[`1` << min_size] = FullKey::Full(`0`).into();
845	}
846
847	fn reset(&mut self, min_size: u8) {
848	self.simples.clear();
849	self.keys.truncate((`1` << min_size) + `2`);
850	// Keep entry for clear code.
851	self.complex.truncate(`1`);
852	// The first complex is not changed..
853	for k in self.keys[..(`1` << min_size) + `2`].iter_mut() {
854	*k = FullKey::NoSuccessor.into();
855	}
856	self.keys[`1` << min_size] = FullKey::Full(`0`).into();
857	}
858
859	fn at_key(&self, code: Code, ch: u8) -> Option<Code> {
860	let key = self.keys[usize::from(code)];
861	match FullKey::from(key) {
862	FullKey::NoSuccessor => None,
863	FullKey::Simple(idx) => {
864	let nexts = &self.simples[usize::from(idx)];
865	let successors = nexts
866	.codes
867	.iter()
868	.zip(nexts.chars.iter())
869	.take(usize::from(nexts.count));
870	for (&scode, &sch) in successors {
871	if sch == ch {
872	return Some(scode);
873	}
874	}
875
876	None
877	}
878	FullKey::Full(idx) => {
879	let full = &self.complex[usize::from(idx)];
880	let precode = full.char_continuation[usize::from(ch)];
881	if usize::from(precode) < MAX_ENTRIES {
882	Some(precode)
883	} else {
884	None
885	}
886	}
887	}
888	}
889
890	/// Iterate to the next char.
891	/// Return Ok when it was already in the tree or creates a new entry for it and returns Err.
892	fn iterate(&mut self, code: Code, ch: u8) -> Result<Code, Code> {
893	if let Some(next) = self.at_key(code, ch) {
894	Ok(next)
895	} else {
896	Err(self.append(code, ch))
897	}
898	}
899
900	fn append(&mut self, code: Code, ch: u8) -> Code {
901	let next: Code = self.keys.len() as u16;
902	let key = self.keys[usize::from(code)];
903	// TODO: with debug assertions, check for non-existence
904	match FullKey::from(key) {
905	FullKey::NoSuccessor => {
906	let new_key = FullKey::Simple(self.simples.len() as u16);
907	self.simples.push(Simple::default());
908	let simples = self.simples.last_mut().unwrap();
909	simples.codes[`0`] = next;
910	simples.chars[`0`] = ch;
911	simples.count = `1`;
912	self.keys[usize::from(code)] = new_key.into();
913	}
914	FullKey::Simple(idx) if usize::from(self.simples[usize::from(idx)].count) < SHORT => {
915	let nexts = &mut self.simples[usize::from(idx)];
916	let nidx = usize::from(nexts.count);
917	nexts.chars[nidx] = ch;
918	nexts.codes[nidx] = next;
919	nexts.count += `1`;
920	}
921	FullKey::Simple(idx) => {
922	let new_key = FullKey::Full(self.complex.len() as u16);
923	let simples = &self.simples[usize::from(idx)];
924	self.complex.push(Full {
925	char_continuation: [Code::max_value(); `256`],
926	});
927	let full = self.complex.last_mut().unwrap();
928	for (&pch, &pcont) in simples.chars.iter().zip(simples.codes.iter()) {
929	full.char_continuation[usize::from(pch)] = pcont;
930	}
931	self.keys[usize::from(code)] = new_key.into();
932	}
933	FullKey::Full(idx) => {
934	let full = &mut self.complex[usize::from(idx)];
935	full.char_continuation[usize::from(ch)] = next;
936	}
937	}
938	self.keys.push(FullKey::NoSuccessor.into());
939	next
940	}
941	}
942
943	impl Default for FullKey {
944	fn default() -> Self {
945	FullKey::NoSuccessor
946	}
947	}
948
949	impl Default for Simple {
950	fn default() -> Self {
951	Simple {
952	codes: [`0`; SHORT],
953	chars: [`0`; SHORT],
954	count: `0`,
955	}
956	}
957	}
958
959	impl From<CompressedKey> for FullKey {
960	fn from(CompressedKey(key: u16): CompressedKey) -> Self {
961	match (key >> MAX_CODESIZE) & `0xf` {
962	`0` => FullKey::Full(key & `0xfff`),
963	`1` => FullKey::Simple(key & `0xfff`),
964	_ => FullKey::NoSuccessor,
965	}
966	}
967	}
968
969	impl From<FullKey> for CompressedKey {
970	fn from(full: FullKey) -> Self {
971	CompressedKey(match full {
972	FullKey::NoSuccessor => `0x2000`,
973	FullKey::Simple(code: u16) => `0x1000` \| code,
974	FullKey::Full(code: u16) => code,
975	})
976	}
977	}
978
979	#[cfg(test)]
980	mod tests {
981	use super::{BitOrder, Encoder, LzwError, LzwStatus};
982	use crate::alloc::vec::Vec;
983	use crate::decode::Decoder;
984	#[cfg(feature = "std")]
985	use crate::StreamBuf;
986
987	#[test]
988	fn invalid_input_rejected() {
989	const BIT_LEN: u8 = `2`;
990	let ref input = [`0`, `1` << BIT_LEN / invalid /, `0`];
991	let ref mut target = [`0u8`; `128`];
992	let mut encoder = Encoder::new(BitOrder::Msb, BIT_LEN);
993
994	encoder.finish();
995	// We require simulation of normality, that is byte-for-byte compression.
996	let result = encoder.encode_bytes(input, target);
997	assert!(if let Err(LzwError::InvalidCode) = result.status {
998	`true`
999	} else {
1000	`false`
1001	});
1002	assert_eq!(result.consumed_in, `1`);
1003
1004	let fixed = encoder.encode_bytes(&[`1`, `0`], &mut target[result.consumed_out..]);
1005	assert!(if let Ok(LzwStatus::Done) = fixed.status {
1006	`true`
1007	} else {
1008	`false`
1009	});
1010	assert_eq!(fixed.consumed_in, `2`);
1011
1012	// Okay, now test we actually fixed it.
1013	let ref mut compare = [`0u8`; `4`];
1014	let mut todo = &target[..result.consumed_out + fixed.consumed_out];
1015	let mut free = &mut compare[..];
1016	let mut decoder = Decoder::new(BitOrder::Msb, BIT_LEN);
1017
1018	// Decode with up to 16 rounds, far too much but inconsequential.
1019	for _ in `0`..`16` {
1020	if decoder.has_ended() {
1021	break;
1022	}
1023
1024	let result = decoder.decode_bytes(todo, free);
1025	assert!(result.status.is_ok());
1026	todo = &todo[result.consumed_in..];
1027	free = &mut free[result.consumed_out..];
1028	}
1029
1030	let remaining = { free }.len();
1031	let len = compare.len() - remaining;
1032	assert_eq!(todo, &[]);
1033	assert_eq!(compare[..len], [`0`, `1`, `0`]);
1034	}
1035
1036	#[test]
1037	#[should_panic]
1038	fn invalid_code_size_low() {
1039	let _ = Encoder::new(BitOrder::Msb, `1`);
1040	}
1041
1042	#[test]
1043	#[should_panic]
1044	fn invalid_code_size_high() {
1045	let _ = Encoder::new(BitOrder::Msb, `14`);
1046	}
1047
1048	fn make_decoded() -> Vec<u8> {
1049	const FILE: &'static [u8] =
1050	include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/Cargo.lock"));
1051	return Vec::from(FILE);
1052	}
1053
1054	#[test]
1055	#[cfg(feature = "std")]
1056	fn into_stream_buffer_no_alloc() {
1057	let encoded = make_decoded();
1058	let mut encoder = Encoder::new(BitOrder::Msb, `8`);
1059
1060	let mut output = vec![];
1061	let mut buffer = [`0`; `512`];
1062	let mut istream = encoder.into_stream(&mut output);
1063	istream.set_buffer(&mut buffer[..]);
1064	istream.encode(&encoded[..]).status.unwrap();
1065
1066	match istream.buffer {
1067	Some(StreamBuf::Borrowed(_)) => {}
1068	None => panic!("Decoded without buffer??"),
1069	Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"),
1070	}
1071	}
1072
1073	#[test]
1074	#[cfg(feature = "std")]
1075	fn into_stream_buffer_small_alloc() {
1076	struct WriteTap<W: std::io::Write>(W);
1077	const BUF_SIZE: usize = `512`;
1078
1079	impl<W: std::io::Write> std::io::Write for WriteTap<W> {
1080	fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1081	assert!(buf.len() <= BUF_SIZE);
1082	self.0.write(buf)
1083	}
1084	fn flush(&mut self) -> std::io::Result<()> {
1085	self.0.flush()
1086	}
1087	}
1088
1089	let encoded = make_decoded();
1090	let mut encoder = Encoder::new(BitOrder::Msb, `8`);
1091
1092	let mut output = vec![];
1093	let mut istream = encoder.into_stream(WriteTap(&mut output));
1094	istream.set_buffer_size(`512`);
1095	istream.encode(&encoded[..]).status.unwrap();
1096
1097	match istream.buffer {
1098	Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE),
1099	Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"),
1100	None => panic!("Decoded without buffer??"),
1101	}
1102	}
1103
1104	#[test]
1105	#[cfg(feature = "std")]
1106	fn reset() {
1107	let encoded = make_decoded();
1108	let mut encoder = Encoder::new(BitOrder::Msb, `8`);
1109	let mut reference = None;
1110
1111	for _ in `0`..`2` {
1112	let mut output = vec![];
1113	let mut buffer = [`0`; `512`];
1114	let mut istream = encoder.into_stream(&mut output);
1115	istream.set_buffer(&mut buffer[..]);
1116	istream.encode_all(&encoded[..]).status.unwrap();
1117
1118	encoder.reset();
1119	if let Some(reference) = &reference {
1120	assert_eq!(output, *reference);
1121	} else {
1122	reference = Some(output);
1123	}
1124	}
1125	}
1126	}
1127