1 | //! Raw in-memory LZMA streams. |
2 | //! |
3 | //! The `Stream` type exported by this module is the primary type which performs |
4 | //! encoding/decoding of LZMA streams. Each `Stream` is either an encoder or |
5 | //! decoder and processes data in a streaming fashion. |
6 | |
7 | use std::collections::LinkedList; |
8 | use std::error; |
9 | use std::fmt; |
10 | use std::io; |
11 | use std::mem; |
12 | use std::slice; |
13 | |
14 | use lzma_sys; |
15 | |
16 | /// Representation of an in-memory LZMA encoding or decoding stream. |
17 | /// |
18 | /// Wraps the raw underlying `lzma_stream` type and provides the ability to |
19 | /// create streams which can either decode or encode various LZMA-based formats. |
20 | pub struct Stream { |
21 | raw: lzma_sys::lzma_stream, |
22 | } |
23 | |
24 | unsafe impl Send for Stream {} |
25 | unsafe impl Sync for Stream {} |
26 | |
27 | /// Options that can be used to configure how LZMA encoding happens. |
28 | /// |
29 | /// This builder is consumed by a number of other methods. |
30 | pub struct LzmaOptions { |
31 | raw: lzma_sys::lzma_options_lzma, |
32 | } |
33 | |
34 | /// Builder to create a multi-threaded stream encoder. |
35 | pub struct MtStreamBuilder { |
36 | raw: lzma_sys::lzma_mt, |
37 | filters: Option<Filters>, |
38 | } |
39 | |
40 | /// A custom chain of filters to configure an encoding stream. |
41 | pub struct Filters { |
42 | inner: Vec<lzma_sys::lzma_filter>, |
43 | lzma_opts: LinkedList<lzma_sys::lzma_options_lzma>, |
44 | } |
45 | |
46 | /// The `action` argument for `process`, |
47 | /// |
48 | /// After the first use of SyncFlush, FullFlush, FullBarrier, or Finish, the |
49 | /// same `action' must is used until `process` returns `Status::StreamEnd`. |
50 | /// Also, the amount of input must not be modified by the application until |
51 | /// `process` returns `Status::StreamEnd`. Changing the `action' or modifying |
52 | /// the amount of input will make `process` return `Error::Program`. |
53 | #[derive (Copy, Clone)] |
54 | pub enum Action { |
55 | /// Continue processing |
56 | /// |
57 | /// When encoding, encode as much input as possible. Some internal buffering |
58 | /// will probably be done (depends on the filter chain in use), which causes |
59 | /// latency: the input used won't usually be decodeable from the output of |
60 | /// the same `process` call. |
61 | /// |
62 | /// When decoding, decode as much input as possible and produce as much |
63 | /// output as possible. |
64 | Run = lzma_sys::LZMA_RUN as isize, |
65 | |
66 | /// Make all the input available at output |
67 | /// |
68 | /// Normally the encoder introduces some latency. `SyncFlush` forces all the |
69 | /// buffered data to be available at output without resetting the internal |
70 | /// state of the encoder. This way it is possible to use compressed stream |
71 | /// for example for communication over network. |
72 | /// |
73 | /// Only some filters support `SyncFlush`. Trying to use `SyncFlush` with |
74 | /// filters that don't support it will make `process` return |
75 | /// `Error::Options`. For example, LZMA1 doesn't support `SyncFlush` but |
76 | /// LZMA2 does. |
77 | /// |
78 | /// Using `SyncFlush` very often can dramatically reduce the compression |
79 | /// ratio. With some filters (for example, LZMA2), fine-tuning the |
80 | /// compression options may help mitigate this problem significantly (for |
81 | /// example, match finder with LZMA2). |
82 | /// |
83 | /// Decoders don't support `SyncFlush`. |
84 | SyncFlush = lzma_sys::LZMA_SYNC_FLUSH as isize, |
85 | |
86 | /// Finish encoding of the current block. |
87 | /// |
88 | /// All the input data going to the current block must have been given to |
89 | /// the encoder. Call `process` with `FullFlush` until it returns |
90 | /// `Status::StreamEnd`. Then continue normally with `Run` or finish the |
91 | /// Stream with `Finish`. |
92 | /// |
93 | /// This action is currently supported only by stream encoder and easy |
94 | /// encoder (which uses stream encoder). If there is no unfinished block, no |
95 | /// empty block is created. |
96 | FullFlush = lzma_sys::LZMA_FULL_FLUSH as isize, |
97 | |
98 | /// Finish encoding of the current block. |
99 | /// |
100 | /// This is like `FullFlush` except that this doesn't necessarily wait until |
101 | /// all the input has been made available via the output buffer. That is, |
102 | /// `process` might return `Status::StreamEnd` as soon as all the input has |
103 | /// been consumed. |
104 | /// |
105 | /// `FullBarrier` is useful with a threaded encoder if one wants to split |
106 | /// the .xz Stream into blocks at specific offsets but doesn't care if the |
107 | /// output isn't flushed immediately. Using `FullBarrier` allows keeping the |
108 | /// threads busy while `FullFlush` would make `process` wait until all the |
109 | /// threads have finished until more data could be passed to the encoder. |
110 | /// |
111 | /// With a `Stream` initialized with the single-threaded |
112 | /// `new_stream_encoder` or `new_easy_encoder`, `FullBarrier` is an alias |
113 | /// for `FullFlush`. |
114 | FullBarrier = lzma_sys::LZMA_FULL_BARRIER as isize, |
115 | |
116 | /// Finish the current operation |
117 | /// |
118 | /// All the input data must have been given to the encoder (the last bytes |
119 | /// can still be pending in next_in). Call `process` with `Finish` until it |
120 | /// returns `Status::StreamEnd`. Once `Finish` has been used, the amount of |
121 | /// input must no longer be changed by the application. |
122 | /// |
123 | /// When decoding, using `Finish` is optional unless the concatenated flag |
124 | /// was used when the decoder was initialized. When concatenated was not |
125 | /// used, the only effect of `Finish` is that the amount of input must not |
126 | /// be changed just like in the encoder. |
127 | Finish = lzma_sys::LZMA_FINISH as isize, |
128 | } |
129 | |
130 | /// Return value of a `process` operation. |
131 | #[derive (Debug, Copy, Clone, PartialEq)] |
132 | pub enum Status { |
133 | /// Operation completed successfully. |
134 | Ok, |
135 | |
136 | /// End of stream was reached. |
137 | /// |
138 | /// When encoding, this means that a sync/full flush or `Finish` was |
139 | /// completed. When decoding, this indicates that all data was decoded |
140 | /// successfully. |
141 | StreamEnd, |
142 | |
143 | /// If the TELL_ANY_CHECK flags is specified when constructing a decoder, |
144 | /// this informs that the `check` method will now return the underlying |
145 | /// integrity check algorithm. |
146 | GetCheck, |
147 | |
148 | /// An error has not been encountered, but no progress is possible. |
149 | /// |
150 | /// Processing can be continued normally by providing more input and/or more |
151 | /// output space, if possible. |
152 | /// |
153 | /// Typically the first call to `process` that can do no progress returns |
154 | /// `Ok` instead of `MemNeeded`. Only the second consecutive call doing no |
155 | /// progress will return `MemNeeded`. |
156 | MemNeeded, |
157 | } |
158 | |
159 | /// Possible error codes that can be returned from a processing operation. |
160 | #[derive (Debug, Clone, PartialEq)] |
161 | pub enum Error { |
162 | /// The underlying data was corrupt. |
163 | Data, |
164 | |
165 | /// Invalid or unsupported options were specified. |
166 | Options, |
167 | |
168 | /// File format wasn't recognized. |
169 | Format, |
170 | |
171 | /// Memory usage limit was reached. |
172 | /// |
173 | /// The memory limit can be increased with `set_memlimit` |
174 | MemLimit, |
175 | |
176 | /// Memory couldn't be allocated. |
177 | Mem, |
178 | |
179 | /// A programming error was encountered. |
180 | Program, |
181 | |
182 | /// The `TELL_NO_CHECK` flag was specified and no integrity check was |
183 | /// available for this stream. |
184 | NoCheck, |
185 | |
186 | /// The `TELL_UNSUPPORTED_CHECK` flag was specified and no integrity check |
187 | /// isn't implemented in this build of liblzma for this stream. |
188 | UnsupportedCheck, |
189 | } |
190 | |
191 | /// Possible integrity checks that can be part of a .xz stream. |
192 | #[allow (missing_docs)] // self explanatory mostly |
193 | #[derive (Copy, Clone)] |
194 | pub enum Check { |
195 | None = lzma_sys::LZMA_CHECK_NONE as isize, |
196 | Crc32 = lzma_sys::LZMA_CHECK_CRC32 as isize, |
197 | Crc64 = lzma_sys::LZMA_CHECK_CRC64 as isize, |
198 | Sha256 = lzma_sys::LZMA_CHECK_SHA256 as isize, |
199 | } |
200 | |
201 | /// Compression modes |
202 | /// |
203 | /// This selects the function used to analyze the data produced by the match |
204 | /// finder. |
205 | #[derive (Copy, Clone)] |
206 | pub enum Mode { |
207 | /// Fast compression. |
208 | /// |
209 | /// Fast mode is usually at its best when combined with a hash chain match |
210 | /// finder. |
211 | Fast = lzma_sys::LZMA_MODE_FAST as isize, |
212 | |
213 | /// Normal compression. |
214 | /// |
215 | /// This is usually notably slower than fast mode. Use this together with |
216 | /// binary tree match finders to expose the full potential of the LZMA1 or |
217 | /// LZMA2 encoder. |
218 | Normal = lzma_sys::LZMA_MODE_NORMAL as isize, |
219 | } |
220 | |
221 | /// Match finders |
222 | /// |
223 | /// Match finder has major effect on both speed and compression ratio. Usually |
224 | /// hash chains are faster than binary trees. |
225 | /// |
226 | /// If you will use `SyncFlush` often, the hash chains may be a better choice, |
227 | /// because binary trees get much higher compression ratio penalty with |
228 | /// `SyncFlush`. |
229 | /// |
230 | /// The memory usage formulas are only rough estimates, which are closest to |
231 | /// reality when dict_size is a power of two. The formulas are more complex in |
232 | /// reality, and can also change a little between liblzma versions. |
233 | #[derive (Copy, Clone)] |
234 | pub enum MatchFinder { |
235 | /// Hash Chain with 2- and 3-byte hashing |
236 | HashChain3 = lzma_sys::LZMA_MF_HC3 as isize, |
237 | /// Hash Chain with 2-, 3-, and 4-byte hashing |
238 | HashChain4 = lzma_sys::LZMA_MF_HC4 as isize, |
239 | |
240 | /// Binary Tree with 2-byte hashing |
241 | BinaryTree2 = lzma_sys::LZMA_MF_BT2 as isize, |
242 | /// Binary Tree with 2- and 3-byte hashing |
243 | BinaryTree3 = lzma_sys::LZMA_MF_BT3 as isize, |
244 | /// Binary Tree with 2-, 3-, and 4-byte hashing |
245 | BinaryTree4 = lzma_sys::LZMA_MF_BT4 as isize, |
246 | } |
247 | |
248 | /// A flag passed when initializing a decoder, causes `process` to return |
249 | /// `Status::GetCheck` as soon as the integrity check is known. |
250 | pub const TELL_ANY_CHECK: u32 = lzma_sys::LZMA_TELL_ANY_CHECK; |
251 | |
252 | /// A flag passed when initializing a decoder, causes `process` to return |
253 | /// `Error::NoCheck` if the stream being decoded has no integrity check. |
254 | pub const TELL_NO_CHECK: u32 = lzma_sys::LZMA_TELL_NO_CHECK; |
255 | |
256 | /// A flag passed when initializing a decoder, causes `process` to return |
257 | /// `Error::UnsupportedCheck` if the stream being decoded has an integrity check |
258 | /// that cannot be verified by this build of liblzma. |
259 | pub const TELL_UNSUPPORTED_CHECK: u32 = lzma_sys::LZMA_TELL_UNSUPPORTED_CHECK; |
260 | |
261 | /// A flag passed when initializing a decoder, causes the decoder to ignore any |
262 | /// integrity checks listed. |
263 | pub const IGNORE_CHECK: u32 = lzma_sys::LZMA_TELL_UNSUPPORTED_CHECK; |
264 | |
265 | /// A flag passed when initializing a decoder, indicates that the stream may be |
266 | /// multiple concatenated xz files. |
267 | pub const CONCATENATED: u32 = lzma_sys::LZMA_CONCATENATED; |
268 | |
269 | impl Stream { |
270 | /// Initialize .xz stream encoder using a preset number |
271 | /// |
272 | /// This is intended to be used by most for encoding data. The `preset` |
273 | /// argument is a number 0-9 indicating the compression level to use, and |
274 | /// normally 6 is a reasonable default. |
275 | /// |
276 | /// The `check` argument is the integrity check to insert at the end of the |
277 | /// stream. The default of `Crc64` is typically appropriate. |
278 | pub fn new_easy_encoder(preset: u32, check: Check) -> Result<Stream, Error> { |
279 | unsafe { |
280 | let mut init = Stream { raw: mem::zeroed() }; |
281 | cvt(lzma_sys::lzma_easy_encoder( |
282 | &mut init.raw, |
283 | preset, |
284 | check as lzma_sys::lzma_check, |
285 | ))?; |
286 | Ok(init) |
287 | } |
288 | } |
289 | |
290 | /// Initialize .lzma encoder (legacy file format) |
291 | /// |
292 | /// The .lzma format is sometimes called the LZMA_Alone format, which is the |
293 | /// reason for the name of this function. The .lzma format supports only the |
294 | /// LZMA1 filter. There is no support for integrity checks like CRC32. |
295 | /// |
296 | /// Use this function if and only if you need to create files readable by |
297 | /// legacy LZMA tools such as LZMA Utils 4.32.x. Moving to the .xz format |
298 | /// (the `new_easy_encoder` function) is strongly recommended. |
299 | /// |
300 | /// The valid action values for `process` are `Run` and `Finish`. No kind |
301 | /// of flushing is supported, because the file format doesn't make it |
302 | /// possible. |
303 | pub fn new_lzma_encoder(options: &LzmaOptions) -> Result<Stream, Error> { |
304 | unsafe { |
305 | let mut init = Stream { raw: mem::zeroed() }; |
306 | cvt(lzma_sys::lzma_alone_encoder(&mut init.raw, &options.raw))?; |
307 | Ok(init) |
308 | } |
309 | } |
310 | |
311 | /// Initialize .xz Stream encoder using a custom filter chain |
312 | /// |
313 | /// This function is similar to `new_easy_encoder` but a custom filter chain |
314 | /// is specified. |
315 | pub fn new_stream_encoder(filters: &Filters, check: Check) -> Result<Stream, Error> { |
316 | unsafe { |
317 | let mut init = Stream { raw: mem::zeroed() }; |
318 | cvt(lzma_sys::lzma_stream_encoder( |
319 | &mut init.raw, |
320 | filters.inner.as_ptr(), |
321 | check as lzma_sys::lzma_check, |
322 | ))?; |
323 | Ok(init) |
324 | } |
325 | } |
326 | |
327 | /// Initialize a .xz stream decoder. |
328 | /// |
329 | /// The maximum memory usage can be specified along with flags such as |
330 | /// `TELL_ANY_CHECK`, `TELL_NO_CHECK`, `TELL_UNSUPPORTED_CHECK`, |
331 | /// `TELL_IGNORE_CHECK`, or `CONCATENATED`. |
332 | pub fn new_stream_decoder(memlimit: u64, flags: u32) -> Result<Stream, Error> { |
333 | unsafe { |
334 | let mut init = Stream { raw: mem::zeroed() }; |
335 | cvt(lzma_sys::lzma_stream_decoder( |
336 | &mut init.raw, |
337 | memlimit, |
338 | flags, |
339 | ))?; |
340 | Ok(init) |
341 | } |
342 | } |
343 | |
344 | /// Initialize a .lzma stream decoder. |
345 | /// |
346 | /// The maximum memory usage can also be specified. |
347 | pub fn new_lzma_decoder(memlimit: u64) -> Result<Stream, Error> { |
348 | unsafe { |
349 | let mut init = Stream { raw: mem::zeroed() }; |
350 | cvt(lzma_sys::lzma_alone_decoder(&mut init.raw, memlimit))?; |
351 | Ok(init) |
352 | } |
353 | } |
354 | |
355 | /// Initialize a decoder which will choose a stream/lzma formats depending |
356 | /// on the input stream. |
357 | pub fn new_auto_decoder(memlimit: u64, flags: u32) -> Result<Stream, Error> { |
358 | unsafe { |
359 | let mut init = Stream { raw: mem::zeroed() }; |
360 | cvt(lzma_sys::lzma_auto_decoder(&mut init.raw, memlimit, flags))?; |
361 | Ok(init) |
362 | } |
363 | } |
364 | |
365 | /// Processes some data from input into an output buffer. |
366 | /// |
367 | /// This will perform the appropriate encoding or decoding operation |
368 | /// depending on the kind of underlying stream. Documentation for the |
369 | /// various `action` arguments can be found on the respective variants. |
370 | pub fn process( |
371 | &mut self, |
372 | input: &[u8], |
373 | output: &mut [u8], |
374 | action: Action, |
375 | ) -> Result<Status, Error> { |
376 | self.raw.next_in = input.as_ptr(); |
377 | self.raw.avail_in = input.len(); |
378 | self.raw.next_out = output.as_mut_ptr(); |
379 | self.raw.avail_out = output.len(); |
380 | let action = action as lzma_sys::lzma_action; |
381 | unsafe { cvt(lzma_sys::lzma_code(&mut self.raw, action)) } |
382 | } |
383 | |
384 | /// Performs the same data as `process`, but places output data in a `Vec`. |
385 | /// |
386 | /// This function will use the extra capacity of `output` as a destination |
387 | /// for bytes to be placed. The length of `output` will automatically get |
388 | /// updated after the operation has completed. |
389 | pub fn process_vec( |
390 | &mut self, |
391 | input: &[u8], |
392 | output: &mut Vec<u8>, |
393 | action: Action, |
394 | ) -> Result<Status, Error> { |
395 | let cap = output.capacity(); |
396 | let len = output.len(); |
397 | |
398 | unsafe { |
399 | let before = self.total_out(); |
400 | let ret = { |
401 | let ptr = output.as_mut_ptr().offset(len as isize); |
402 | let out = slice::from_raw_parts_mut(ptr, cap - len); |
403 | self.process(input, out, action) |
404 | }; |
405 | output.set_len((self.total_out() - before) as usize + len); |
406 | return ret; |
407 | } |
408 | } |
409 | |
410 | /// Returns the total amount of input bytes consumed by this stream. |
411 | pub fn total_in(&self) -> u64 { |
412 | self.raw.total_in |
413 | } |
414 | |
415 | /// Returns the total amount of bytes produced by this stream. |
416 | pub fn total_out(&self) -> u64 { |
417 | self.raw.total_out |
418 | } |
419 | |
420 | /// Get the current memory usage limit. |
421 | /// |
422 | /// This is only supported if the underlying stream supports a memlimit. |
423 | pub fn memlimit(&self) -> u64 { |
424 | unsafe { lzma_sys::lzma_memlimit_get(&self.raw) } |
425 | } |
426 | |
427 | /// Set the current memory usage limit. |
428 | /// |
429 | /// This can return `Error::MemLimit` if the new limit is too small or |
430 | /// `Error::Program` if this stream doesn't take a memory limit. |
431 | pub fn set_memlimit(&mut self, limit: u64) -> Result<(), Error> { |
432 | cvt(unsafe { lzma_sys::lzma_memlimit_set(&mut self.raw, limit) }).map(|_| ()) |
433 | } |
434 | } |
435 | |
436 | impl LzmaOptions { |
437 | /// Creates a new blank set of options for encoding. |
438 | /// |
439 | /// The `preset` argument is the compression level to use, typically in the |
440 | /// range of 0-9. |
441 | pub fn new_preset(preset: u32) -> Result<LzmaOptions, Error> { |
442 | unsafe { |
443 | let mut options = LzmaOptions { raw: mem::zeroed() }; |
444 | let ret = lzma_sys::lzma_lzma_preset(&mut options.raw, preset); |
445 | if ret != 0 { |
446 | Err(Error::Program) |
447 | } else { |
448 | Ok(options) |
449 | } |
450 | } |
451 | } |
452 | |
453 | /// Configures the dictionary size, in bytes |
454 | /// |
455 | /// Dictionary size indicates how many bytes of the recently processed |
456 | /// uncompressed data is kept in memory. |
457 | /// |
458 | /// The minimum dictionary size is 4096 bytes and the default is 2^23, 8MB. |
459 | pub fn dict_size(&mut self, size: u32) -> &mut LzmaOptions { |
460 | self.raw.dict_size = size; |
461 | self |
462 | } |
463 | |
464 | /// Configures the number of literal context bits. |
465 | /// |
466 | /// How many of the highest bits of the previous uncompressed eight-bit byte |
467 | /// (also known as `literal') are taken into account when predicting the |
468 | /// bits of the next literal. |
469 | /// |
470 | /// The maximum value to this is 4 and the default is 3. It is not currently |
471 | /// supported if this plus `literal_position_bits` is greater than 4. |
472 | pub fn literal_context_bits(&mut self, bits: u32) -> &mut LzmaOptions { |
473 | self.raw.lc = bits; |
474 | self |
475 | } |
476 | |
477 | /// Configures the number of literal position bits. |
478 | /// |
479 | /// This affects what kind of alignment in the uncompressed data is assumed |
480 | /// when encoding literals. A literal is a single 8-bit byte. See |
481 | /// `position_bits` for more information about alignment. |
482 | /// |
483 | /// The default for this is 0. |
484 | pub fn literal_position_bits(&mut self, bits: u32) -> &mut LzmaOptions { |
485 | self.raw.lp = bits; |
486 | self |
487 | } |
488 | |
489 | /// Configures the number of position bits. |
490 | /// |
491 | /// Position bits affects what kind of alignment in the uncompressed data is |
492 | /// assumed in general. The default of 2 means four-byte alignment (2^ pb |
493 | /// =2^2=4), which is often a good choice when there's no better guess. |
494 | /// |
495 | /// When the aligment is known, setting pb accordingly may reduce the file |
496 | /// size a little. E.g. with text files having one-byte alignment (US-ASCII, |
497 | /// ISO-8859-*, UTF-8), setting pb=0 can improve compression slightly. For |
498 | /// UTF-16 text, pb=1 is a good choice. If the alignment is an odd number |
499 | /// like 3 bytes, pb=0 might be the best choice. |
500 | /// |
501 | /// Even though the assumed alignment can be adjusted with pb and lp, LZMA1 |
502 | /// and LZMA2 still slightly favor 16-byte alignment. It might be worth |
503 | /// taking into account when designing file formats that are likely to be |
504 | /// often compressed with LZMA1 or LZMA2. |
505 | pub fn position_bits(&mut self, bits: u32) -> &mut LzmaOptions { |
506 | self.raw.pb = bits; |
507 | self |
508 | } |
509 | |
510 | /// Configures the compression mode. |
511 | pub fn mode(&mut self, mode: Mode) -> &mut LzmaOptions { |
512 | self.raw.mode = mode as lzma_sys::lzma_mode; |
513 | self |
514 | } |
515 | |
516 | /// Configures the nice length of a match. |
517 | /// |
518 | /// This determines how many bytes the encoder compares from the match |
519 | /// candidates when looking for the best match. Once a match of at least |
520 | /// `nice_len` bytes long is found, the encoder stops looking for better |
521 | /// candidates and encodes the match. (Naturally, if the found match is |
522 | /// actually longer than `nice_len`, the actual length is encoded; it's not |
523 | /// truncated to `nice_len`.) |
524 | /// |
525 | /// Bigger values usually increase the compression ratio and compression |
526 | /// time. For most files, 32 to 128 is a good value, which gives very good |
527 | /// compression ratio at good speed. |
528 | /// |
529 | /// The exact minimum value depends on the match finder. The maximum is 273, |
530 | /// which is the maximum length of a match that LZMA1 and LZMA2 can encode. |
531 | pub fn nice_len(&mut self, len: u32) -> &mut LzmaOptions { |
532 | self.raw.nice_len = len; |
533 | self |
534 | } |
535 | |
536 | /// Configures the match finder ID. |
537 | pub fn match_finder(&mut self, mf: MatchFinder) -> &mut LzmaOptions { |
538 | self.raw.mf = mf as lzma_sys::lzma_match_finder; |
539 | self |
540 | } |
541 | |
542 | /// Maximum search depth in the match finder. |
543 | /// |
544 | /// For every input byte, match finder searches through the hash chain or |
545 | /// binary tree in a loop, each iteration going one step deeper in the chain |
546 | /// or tree. The searching stops if |
547 | /// |
548 | /// - a match of at least `nice_len` bytes long is found; |
549 | /// - all match candidates from the hash chain or binary tree have |
550 | /// been checked; or |
551 | /// - maximum search depth is reached. |
552 | /// |
553 | /// Maximum search depth is needed to prevent the match finder from wasting |
554 | /// too much time in case there are lots of short match candidates. On the |
555 | /// other hand, stopping the search before all candidates have been checked |
556 | /// can reduce compression ratio. |
557 | /// |
558 | /// Setting depth to zero tells liblzma to use an automatic default value, |
559 | /// that depends on the selected match finder and nice_len. The default is |
560 | /// in the range [4, 200] or so (it may vary between liblzma versions). |
561 | /// |
562 | /// Using a bigger depth value than the default can increase compression |
563 | /// ratio in some cases. There is no strict maximum value, but high values |
564 | /// (thousands or millions) should be used with care: the encoder could |
565 | /// remain fast enough with typical input, but malicious input could cause |
566 | /// the match finder to slow down dramatically, possibly creating a denial |
567 | /// of service attack. |
568 | pub fn depth(&mut self, depth: u32) -> &mut LzmaOptions { |
569 | self.raw.depth = depth; |
570 | self |
571 | } |
572 | } |
573 | |
574 | impl Check { |
575 | /// Test if this check is supported in this build of liblzma. |
576 | pub fn is_supported(&self) -> bool { |
577 | let ret: u8 = unsafe { lzma_sys::lzma_check_is_supported(*self as lzma_sys::lzma_check) }; |
578 | ret != 0 |
579 | } |
580 | } |
581 | |
582 | impl MatchFinder { |
583 | /// Test if this match finder is supported in this build of liblzma. |
584 | pub fn is_supported(&self) -> bool { |
585 | let ret: u8 = unsafe { lzma_sys::lzma_mf_is_supported(*self as lzma_sys::lzma_match_finder) }; |
586 | ret != 0 |
587 | } |
588 | } |
589 | |
590 | impl Filters { |
591 | /// Creates a new filter chain with no filters. |
592 | pub fn new() -> Filters { |
593 | Filters { |
594 | inner: vec![lzma_sys::lzma_filter { |
595 | id: lzma_sys::LZMA_VLI_UNKNOWN, |
596 | options: 0 as *mut _, |
597 | }], |
598 | lzma_opts: LinkedList::new(), |
599 | } |
600 | } |
601 | |
602 | /// Add an LZMA1 filter. |
603 | /// |
604 | /// LZMA1 is the very same thing as what was called just LZMA in LZMA Utils, |
605 | /// 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from |
606 | /// accidentally using LZMA when they actually want LZMA2. |
607 | /// |
608 | /// LZMA1 shouldn't be used for new applications unless you _really_ know |
609 | /// what you are doing. LZMA2 is almost always a better choice. |
610 | pub fn lzma1(&mut self, opts: &LzmaOptions) -> &mut Filters { |
611 | self.lzma_opts.push_back(opts.raw); |
612 | let ptr = self.lzma_opts.back().unwrap() as *const _ as *mut _; |
613 | self.push(lzma_sys::lzma_filter { |
614 | id: lzma_sys::LZMA_FILTER_LZMA1, |
615 | options: ptr, |
616 | }) |
617 | } |
618 | |
619 | /// Add an LZMA2 filter. |
620 | /// |
621 | /// Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds |
622 | /// support for `SyncFlush`, uncompressed chunks (smaller expansion when |
623 | /// trying to compress uncompressible data), possibility to change |
624 | /// `literal_context_bits`/`literal_position_bits`/`position_bits` in the |
625 | /// middle of encoding, and some other internal improvements. |
626 | pub fn lzma2(&mut self, opts: &LzmaOptions) -> &mut Filters { |
627 | self.lzma_opts.push_back(opts.raw); |
628 | let ptr = self.lzma_opts.back().unwrap() as *const _ as *mut _; |
629 | self.push(lzma_sys::lzma_filter { |
630 | id: lzma_sys::LZMA_FILTER_LZMA2, |
631 | options: ptr, |
632 | }) |
633 | } |
634 | |
635 | // TODO: delta filter |
636 | |
637 | /// Add a filter for x86 binaries. |
638 | pub fn x86(&mut self) -> &mut Filters { |
639 | self.push(lzma_sys::lzma_filter { |
640 | id: lzma_sys::LZMA_FILTER_X86, |
641 | options: 0 as *mut _, |
642 | }) |
643 | } |
644 | |
645 | /// Add a filter for PowerPC binaries. |
646 | pub fn powerpc(&mut self) -> &mut Filters { |
647 | self.push(lzma_sys::lzma_filter { |
648 | id: lzma_sys::LZMA_FILTER_POWERPC, |
649 | options: 0 as *mut _, |
650 | }) |
651 | } |
652 | |
653 | /// Add a filter for IA-64 (itanium) binaries. |
654 | pub fn ia64(&mut self) -> &mut Filters { |
655 | self.push(lzma_sys::lzma_filter { |
656 | id: lzma_sys::LZMA_FILTER_IA64, |
657 | options: 0 as *mut _, |
658 | }) |
659 | } |
660 | |
661 | /// Add a filter for ARM binaries. |
662 | pub fn arm(&mut self) -> &mut Filters { |
663 | self.push(lzma_sys::lzma_filter { |
664 | id: lzma_sys::LZMA_FILTER_ARM, |
665 | options: 0 as *mut _, |
666 | }) |
667 | } |
668 | |
669 | /// Add a filter for ARM-Thumb binaries. |
670 | pub fn arm_thumb(&mut self) -> &mut Filters { |
671 | self.push(lzma_sys::lzma_filter { |
672 | id: lzma_sys::LZMA_FILTER_ARMTHUMB, |
673 | options: 0 as *mut _, |
674 | }) |
675 | } |
676 | |
677 | /// Add a filter for SPARC binaries. |
678 | pub fn sparc(&mut self) -> &mut Filters { |
679 | self.push(lzma_sys::lzma_filter { |
680 | id: lzma_sys::LZMA_FILTER_SPARC, |
681 | options: 0 as *mut _, |
682 | }) |
683 | } |
684 | |
685 | fn push(&mut self, filter: lzma_sys::lzma_filter) -> &mut Filters { |
686 | let pos = self.inner.len() - 1; |
687 | self.inner.insert(pos, filter); |
688 | self |
689 | } |
690 | } |
691 | |
692 | impl MtStreamBuilder { |
693 | /// Creates a new blank builder to create a multithreaded encoding `Stream`. |
694 | pub fn new() -> MtStreamBuilder { |
695 | unsafe { |
696 | let mut init = MtStreamBuilder { |
697 | raw: mem::zeroed(), |
698 | filters: None, |
699 | }; |
700 | init.raw.threads = 1; |
701 | return init; |
702 | } |
703 | } |
704 | |
705 | /// Configures the number of worker threads to use |
706 | pub fn threads(&mut self, threads: u32) -> &mut Self { |
707 | self.raw.threads = threads; |
708 | self |
709 | } |
710 | |
711 | /// Configures the maximum uncompressed size of a block |
712 | /// |
713 | /// The encoder will start a new .xz block every `block_size` bytes. |
714 | /// Using `FullFlush` or `FullBarrier` with `process` the caller may tell |
715 | /// liblzma to start a new block earlier. |
716 | /// |
717 | /// With LZMA2, a recommended block size is 2-4 times the LZMA2 dictionary |
718 | /// size. With very small dictionaries, it is recommended to use at least 1 |
719 | /// MiB block size for good compression ratio, even if this is more than |
720 | /// four times the dictionary size. Note that these are only recommendations |
721 | /// for typical use cases; feel free to use other values. Just keep in mind |
722 | /// that using a block size less than the LZMA2 dictionary size is waste of |
723 | /// RAM. |
724 | /// |
725 | /// Set this to 0 to let liblzma choose the block size depending on the |
726 | /// compression options. For LZMA2 it will be 3*`dict_size` or 1 MiB, |
727 | /// whichever is more. |
728 | /// |
729 | /// For each thread, about 3 * `block_size` bytes of memory will be |
730 | /// allocated. This may change in later liblzma versions. If so, the memory |
731 | /// usage will probably be reduced, not increased. |
732 | pub fn block_size(&mut self, block_size: u64) -> &mut Self { |
733 | self.raw.block_size = block_size; |
734 | self |
735 | } |
736 | |
737 | /// Timeout to allow `process` to return early |
738 | /// |
739 | /// Multithreading can make liblzma to consume input and produce output in a |
740 | /// very bursty way: it may first read a lot of input to fill internal |
741 | /// buffers, then no input or output occurs for a while. |
742 | /// |
743 | /// In single-threaded mode, `process` won't return until it has either |
744 | /// consumed all the input or filled the output buffer. If this is done in |
745 | /// multithreaded mode, it may cause a call `process` to take even tens of |
746 | /// seconds, which isn't acceptable in all applications. |
747 | /// |
748 | /// To avoid very long blocking times in `process`, a timeout (in |
749 | /// milliseconds) may be set here. If `process would block longer than |
750 | /// this number of milliseconds, it will return with `Ok`. Reasonable |
751 | /// values are 100 ms or more. The xz command line tool uses 300 ms. |
752 | /// |
753 | /// If long blocking times are fine for you, set timeout to a special |
754 | /// value of 0, which will disable the timeout mechanism and will make |
755 | /// `process` block until all the input is consumed or the output |
756 | /// buffer has been filled. |
757 | pub fn timeout_ms(&mut self, timeout: u32) -> &mut Self { |
758 | self.raw.timeout = timeout; |
759 | self |
760 | } |
761 | |
762 | /// Compression preset (level and possible flags) |
763 | /// |
764 | /// The preset is set just like with `Stream::new_easy_encoder`. The preset |
765 | /// is ignored if filters below have been specified. |
766 | pub fn preset(&mut self, preset: u32) -> &mut Self { |
767 | self.raw.preset = preset; |
768 | self |
769 | } |
770 | |
771 | /// Configure a custom filter chain |
772 | pub fn filters(&mut self, filters: Filters) -> &mut Self { |
773 | self.raw.filters = filters.inner.as_ptr(); |
774 | self.filters = Some(filters); |
775 | self |
776 | } |
777 | |
778 | /// Configures the integrity check type |
779 | pub fn check(&mut self, check: Check) -> &mut Self { |
780 | self.raw.check = check as lzma_sys::lzma_check; |
781 | self |
782 | } |
783 | |
784 | /// Calculate approximate memory usage of multithreaded .xz encoder |
785 | pub fn memusage(&self) -> u64 { |
786 | unsafe { lzma_sys::lzma_stream_encoder_mt_memusage(&self.raw) } |
787 | } |
788 | |
789 | /// Initialize multithreaded .xz stream encoder. |
790 | pub fn encoder(&self) -> Result<Stream, Error> { |
791 | unsafe { |
792 | let mut init = Stream { raw: mem::zeroed() }; |
793 | cvt(lzma_sys::lzma_stream_encoder_mt(&mut init.raw, &self.raw))?; |
794 | Ok(init) |
795 | } |
796 | } |
797 | } |
798 | |
799 | fn cvt(rc: lzma_sys::lzma_ret) -> Result<Status, Error> { |
800 | match rc { |
801 | lzma_sys::LZMA_OK => Ok(Status::Ok), |
802 | lzma_sys::LZMA_STREAM_END => Ok(Status::StreamEnd), |
803 | lzma_sys::LZMA_NO_CHECK => Err(Error::NoCheck), |
804 | lzma_sys::LZMA_UNSUPPORTED_CHECK => Err(Error::UnsupportedCheck), |
805 | lzma_sys::LZMA_GET_CHECK => Ok(Status::GetCheck), |
806 | lzma_sys::LZMA_MEM_ERROR => Err(Error::Mem), |
807 | lzma_sys::LZMA_MEMLIMIT_ERROR => Err(Error::MemLimit), |
808 | lzma_sys::LZMA_FORMAT_ERROR => Err(Error::Format), |
809 | lzma_sys::LZMA_OPTIONS_ERROR => Err(Error::Options), |
810 | lzma_sys::LZMA_DATA_ERROR => Err(Error::Data), |
811 | lzma_sys::LZMA_BUF_ERROR => Ok(Status::MemNeeded), |
812 | lzma_sys::LZMA_PROG_ERROR => Err(Error::Program), |
813 | c: u32 => panic!("unknown return code: {}" , c), |
814 | } |
815 | } |
816 | |
817 | impl From<Error> for io::Error { |
818 | fn from(e: Error) -> io::Error { |
819 | let kind: ErrorKind = match e { |
820 | Error::Data => std::io::ErrorKind::InvalidData, |
821 | Error::Options => std::io::ErrorKind::InvalidInput, |
822 | Error::Format => std::io::ErrorKind::InvalidData, |
823 | Error::MemLimit => std::io::ErrorKind::Other, |
824 | Error::Mem => std::io::ErrorKind::Other, |
825 | Error::Program => std::io::ErrorKind::Other, |
826 | Error::NoCheck => std::io::ErrorKind::InvalidInput, |
827 | Error::UnsupportedCheck => std::io::ErrorKind::Other, |
828 | }; |
829 | |
830 | io::Error::new(kind, error:e) |
831 | } |
832 | } |
833 | |
834 | impl error::Error for Error {} |
835 | |
836 | impl fmt::Display for Error { |
837 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
838 | match&str self { |
839 | Error::Data => "lzma data error" , |
840 | Error::Options => "invalid options" , |
841 | Error::Format => "stream/file format not recognized" , |
842 | Error::MemLimit => "memory limit reached" , |
843 | Error::Mem => "can't allocate memory" , |
844 | Error::Program => "liblzma internal error" , |
845 | Error::NoCheck => "no integrity check was available" , |
846 | Error::UnsupportedCheck => "liblzma not built with check support" , |
847 | } |
848 | .fmt(f) |
849 | } |
850 | } |
851 | |
852 | impl Drop for Stream { |
853 | fn drop(&mut self) { |
854 | unsafe { |
855 | lzma_sys::lzma_end(&mut self.raw); |
856 | } |
857 | } |
858 | } |
859 | |