| 1 | //! Raw in-memory LZMA streams. |
| 2 | //! |
| 3 | //! The `Stream` type exported by this module is the primary type which performs |
| 4 | //! encoding/decoding of LZMA streams. Each `Stream` is either an encoder or |
| 5 | //! decoder and processes data in a streaming fashion. |
| 6 | |
| 7 | use std::collections::LinkedList; |
| 8 | use std::error; |
| 9 | use std::fmt; |
| 10 | use std::io; |
| 11 | use std::mem; |
| 12 | use std::slice; |
| 13 | |
| 14 | use lzma_sys; |
| 15 | |
| 16 | /// Representation of an in-memory LZMA encoding or decoding stream. |
| 17 | /// |
| 18 | /// Wraps the raw underlying `lzma_stream` type and provides the ability to |
| 19 | /// create streams which can either decode or encode various LZMA-based formats. |
| 20 | pub struct Stream { |
| 21 | raw: lzma_sys::lzma_stream, |
| 22 | } |
| 23 | |
| 24 | unsafe impl Send for Stream {} |
| 25 | unsafe impl Sync for Stream {} |
| 26 | |
| 27 | /// Options that can be used to configure how LZMA encoding happens. |
| 28 | /// |
| 29 | /// This builder is consumed by a number of other methods. |
| 30 | pub struct LzmaOptions { |
| 31 | raw: lzma_sys::lzma_options_lzma, |
| 32 | } |
| 33 | |
| 34 | /// Builder to create a multi-threaded stream encoder. |
| 35 | pub struct MtStreamBuilder { |
| 36 | raw: lzma_sys::lzma_mt, |
| 37 | filters: Option<Filters>, |
| 38 | } |
| 39 | |
| 40 | /// A custom chain of filters to configure an encoding stream. |
| 41 | pub struct Filters { |
| 42 | inner: Vec<lzma_sys::lzma_filter>, |
| 43 | lzma_opts: LinkedList<lzma_sys::lzma_options_lzma>, |
| 44 | } |
| 45 | |
| 46 | /// The `action` argument for `process`, |
| 47 | /// |
| 48 | /// After the first use of SyncFlush, FullFlush, FullBarrier, or Finish, the |
| 49 | /// same `action' must is used until `process` returns `Status::StreamEnd`. |
| 50 | /// Also, the amount of input must not be modified by the application until |
| 51 | /// `process` returns `Status::StreamEnd`. Changing the `action' or modifying |
| 52 | /// the amount of input will make `process` return `Error::Program`. |
| 53 | #[derive (Copy, Clone)] |
| 54 | pub enum Action { |
| 55 | /// Continue processing |
| 56 | /// |
| 57 | /// When encoding, encode as much input as possible. Some internal buffering |
| 58 | /// will probably be done (depends on the filter chain in use), which causes |
| 59 | /// latency: the input used won't usually be decodeable from the output of |
| 60 | /// the same `process` call. |
| 61 | /// |
| 62 | /// When decoding, decode as much input as possible and produce as much |
| 63 | /// output as possible. |
| 64 | Run = lzma_sys::LZMA_RUN as isize, |
| 65 | |
| 66 | /// Make all the input available at output |
| 67 | /// |
| 68 | /// Normally the encoder introduces some latency. `SyncFlush` forces all the |
| 69 | /// buffered data to be available at output without resetting the internal |
| 70 | /// state of the encoder. This way it is possible to use compressed stream |
| 71 | /// for example for communication over network. |
| 72 | /// |
| 73 | /// Only some filters support `SyncFlush`. Trying to use `SyncFlush` with |
| 74 | /// filters that don't support it will make `process` return |
| 75 | /// `Error::Options`. For example, LZMA1 doesn't support `SyncFlush` but |
| 76 | /// LZMA2 does. |
| 77 | /// |
| 78 | /// Using `SyncFlush` very often can dramatically reduce the compression |
| 79 | /// ratio. With some filters (for example, LZMA2), fine-tuning the |
| 80 | /// compression options may help mitigate this problem significantly (for |
| 81 | /// example, match finder with LZMA2). |
| 82 | /// |
| 83 | /// Decoders don't support `SyncFlush`. |
| 84 | SyncFlush = lzma_sys::LZMA_SYNC_FLUSH as isize, |
| 85 | |
| 86 | /// Finish encoding of the current block. |
| 87 | /// |
| 88 | /// All the input data going to the current block must have been given to |
| 89 | /// the encoder. Call `process` with `FullFlush` until it returns |
| 90 | /// `Status::StreamEnd`. Then continue normally with `Run` or finish the |
| 91 | /// Stream with `Finish`. |
| 92 | /// |
| 93 | /// This action is currently supported only by stream encoder and easy |
| 94 | /// encoder (which uses stream encoder). If there is no unfinished block, no |
| 95 | /// empty block is created. |
| 96 | FullFlush = lzma_sys::LZMA_FULL_FLUSH as isize, |
| 97 | |
| 98 | /// Finish encoding of the current block. |
| 99 | /// |
| 100 | /// This is like `FullFlush` except that this doesn't necessarily wait until |
| 101 | /// all the input has been made available via the output buffer. That is, |
| 102 | /// `process` might return `Status::StreamEnd` as soon as all the input has |
| 103 | /// been consumed. |
| 104 | /// |
| 105 | /// `FullBarrier` is useful with a threaded encoder if one wants to split |
| 106 | /// the .xz Stream into blocks at specific offsets but doesn't care if the |
| 107 | /// output isn't flushed immediately. Using `FullBarrier` allows keeping the |
| 108 | /// threads busy while `FullFlush` would make `process` wait until all the |
| 109 | /// threads have finished until more data could be passed to the encoder. |
| 110 | /// |
| 111 | /// With a `Stream` initialized with the single-threaded |
| 112 | /// `new_stream_encoder` or `new_easy_encoder`, `FullBarrier` is an alias |
| 113 | /// for `FullFlush`. |
| 114 | FullBarrier = lzma_sys::LZMA_FULL_BARRIER as isize, |
| 115 | |
| 116 | /// Finish the current operation |
| 117 | /// |
| 118 | /// All the input data must have been given to the encoder (the last bytes |
| 119 | /// can still be pending in next_in). Call `process` with `Finish` until it |
| 120 | /// returns `Status::StreamEnd`. Once `Finish` has been used, the amount of |
| 121 | /// input must no longer be changed by the application. |
| 122 | /// |
| 123 | /// When decoding, using `Finish` is optional unless the concatenated flag |
| 124 | /// was used when the decoder was initialized. When concatenated was not |
| 125 | /// used, the only effect of `Finish` is that the amount of input must not |
| 126 | /// be changed just like in the encoder. |
| 127 | Finish = lzma_sys::LZMA_FINISH as isize, |
| 128 | } |
| 129 | |
| 130 | /// Return value of a `process` operation. |
| 131 | #[derive (Debug, Copy, Clone, PartialEq)] |
| 132 | pub enum Status { |
| 133 | /// Operation completed successfully. |
| 134 | Ok, |
| 135 | |
| 136 | /// End of stream was reached. |
| 137 | /// |
| 138 | /// When encoding, this means that a sync/full flush or `Finish` was |
| 139 | /// completed. When decoding, this indicates that all data was decoded |
| 140 | /// successfully. |
| 141 | StreamEnd, |
| 142 | |
| 143 | /// If the TELL_ANY_CHECK flags is specified when constructing a decoder, |
| 144 | /// this informs that the `check` method will now return the underlying |
| 145 | /// integrity check algorithm. |
| 146 | GetCheck, |
| 147 | |
| 148 | /// An error has not been encountered, but no progress is possible. |
| 149 | /// |
| 150 | /// Processing can be continued normally by providing more input and/or more |
| 151 | /// output space, if possible. |
| 152 | /// |
| 153 | /// Typically the first call to `process` that can do no progress returns |
| 154 | /// `Ok` instead of `MemNeeded`. Only the second consecutive call doing no |
| 155 | /// progress will return `MemNeeded`. |
| 156 | MemNeeded, |
| 157 | } |
| 158 | |
| 159 | /// Possible error codes that can be returned from a processing operation. |
| 160 | #[derive (Debug, Clone, PartialEq)] |
| 161 | pub enum Error { |
| 162 | /// The underlying data was corrupt. |
| 163 | Data, |
| 164 | |
| 165 | /// Invalid or unsupported options were specified. |
| 166 | Options, |
| 167 | |
| 168 | /// File format wasn't recognized. |
| 169 | Format, |
| 170 | |
| 171 | /// Memory usage limit was reached. |
| 172 | /// |
| 173 | /// The memory limit can be increased with `set_memlimit` |
| 174 | MemLimit, |
| 175 | |
| 176 | /// Memory couldn't be allocated. |
| 177 | Mem, |
| 178 | |
| 179 | /// A programming error was encountered. |
| 180 | Program, |
| 181 | |
| 182 | /// The `TELL_NO_CHECK` flag was specified and no integrity check was |
| 183 | /// available for this stream. |
| 184 | NoCheck, |
| 185 | |
| 186 | /// The `TELL_UNSUPPORTED_CHECK` flag was specified and no integrity check |
| 187 | /// isn't implemented in this build of liblzma for this stream. |
| 188 | UnsupportedCheck, |
| 189 | } |
| 190 | |
| 191 | /// Possible integrity checks that can be part of a .xz stream. |
| 192 | #[allow (missing_docs)] // self explanatory mostly |
| 193 | #[derive (Copy, Clone)] |
| 194 | pub enum Check { |
| 195 | None = lzma_sys::LZMA_CHECK_NONE as isize, |
| 196 | Crc32 = lzma_sys::LZMA_CHECK_CRC32 as isize, |
| 197 | Crc64 = lzma_sys::LZMA_CHECK_CRC64 as isize, |
| 198 | Sha256 = lzma_sys::LZMA_CHECK_SHA256 as isize, |
| 199 | } |
| 200 | |
| 201 | /// Compression modes |
| 202 | /// |
| 203 | /// This selects the function used to analyze the data produced by the match |
| 204 | /// finder. |
| 205 | #[derive (Copy, Clone)] |
| 206 | pub enum Mode { |
| 207 | /// Fast compression. |
| 208 | /// |
| 209 | /// Fast mode is usually at its best when combined with a hash chain match |
| 210 | /// finder. |
| 211 | Fast = lzma_sys::LZMA_MODE_FAST as isize, |
| 212 | |
| 213 | /// Normal compression. |
| 214 | /// |
| 215 | /// This is usually notably slower than fast mode. Use this together with |
| 216 | /// binary tree match finders to expose the full potential of the LZMA1 or |
| 217 | /// LZMA2 encoder. |
| 218 | Normal = lzma_sys::LZMA_MODE_NORMAL as isize, |
| 219 | } |
| 220 | |
| 221 | /// Match finders |
| 222 | /// |
| 223 | /// Match finder has major effect on both speed and compression ratio. Usually |
| 224 | /// hash chains are faster than binary trees. |
| 225 | /// |
| 226 | /// If you will use `SyncFlush` often, the hash chains may be a better choice, |
| 227 | /// because binary trees get much higher compression ratio penalty with |
| 228 | /// `SyncFlush`. |
| 229 | /// |
| 230 | /// The memory usage formulas are only rough estimates, which are closest to |
| 231 | /// reality when dict_size is a power of two. The formulas are more complex in |
| 232 | /// reality, and can also change a little between liblzma versions. |
| 233 | #[derive (Copy, Clone)] |
| 234 | pub enum MatchFinder { |
| 235 | /// Hash Chain with 2- and 3-byte hashing |
| 236 | HashChain3 = lzma_sys::LZMA_MF_HC3 as isize, |
| 237 | /// Hash Chain with 2-, 3-, and 4-byte hashing |
| 238 | HashChain4 = lzma_sys::LZMA_MF_HC4 as isize, |
| 239 | |
| 240 | /// Binary Tree with 2-byte hashing |
| 241 | BinaryTree2 = lzma_sys::LZMA_MF_BT2 as isize, |
| 242 | /// Binary Tree with 2- and 3-byte hashing |
| 243 | BinaryTree3 = lzma_sys::LZMA_MF_BT3 as isize, |
| 244 | /// Binary Tree with 2-, 3-, and 4-byte hashing |
| 245 | BinaryTree4 = lzma_sys::LZMA_MF_BT4 as isize, |
| 246 | } |
| 247 | |
| 248 | /// A flag passed when initializing a decoder, causes `process` to return |
| 249 | /// `Status::GetCheck` as soon as the integrity check is known. |
| 250 | pub const TELL_ANY_CHECK: u32 = lzma_sys::LZMA_TELL_ANY_CHECK; |
| 251 | |
| 252 | /// A flag passed when initializing a decoder, causes `process` to return |
| 253 | /// `Error::NoCheck` if the stream being decoded has no integrity check. |
| 254 | pub const TELL_NO_CHECK: u32 = lzma_sys::LZMA_TELL_NO_CHECK; |
| 255 | |
| 256 | /// A flag passed when initializing a decoder, causes `process` to return |
| 257 | /// `Error::UnsupportedCheck` if the stream being decoded has an integrity check |
| 258 | /// that cannot be verified by this build of liblzma. |
| 259 | pub const TELL_UNSUPPORTED_CHECK: u32 = lzma_sys::LZMA_TELL_UNSUPPORTED_CHECK; |
| 260 | |
| 261 | /// A flag passed when initializing a decoder, causes the decoder to ignore any |
| 262 | /// integrity checks listed. |
| 263 | pub const IGNORE_CHECK: u32 = lzma_sys::LZMA_TELL_UNSUPPORTED_CHECK; |
| 264 | |
| 265 | /// A flag passed when initializing a decoder, indicates that the stream may be |
| 266 | /// multiple concatenated xz files. |
| 267 | pub const CONCATENATED: u32 = lzma_sys::LZMA_CONCATENATED; |
| 268 | |
| 269 | impl Stream { |
| 270 | /// Initialize .xz stream encoder using a preset number |
| 271 | /// |
| 272 | /// This is intended to be used by most for encoding data. The `preset` |
| 273 | /// argument is a number 0-9 indicating the compression level to use, and |
| 274 | /// normally 6 is a reasonable default. |
| 275 | /// |
| 276 | /// The `check` argument is the integrity check to insert at the end of the |
| 277 | /// stream. The default of `Crc64` is typically appropriate. |
| 278 | pub fn new_easy_encoder(preset: u32, check: Check) -> Result<Stream, Error> { |
| 279 | unsafe { |
| 280 | let mut init = Stream { raw: mem::zeroed() }; |
| 281 | cvt(lzma_sys::lzma_easy_encoder( |
| 282 | &mut init.raw, |
| 283 | preset, |
| 284 | check as lzma_sys::lzma_check, |
| 285 | ))?; |
| 286 | Ok(init) |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | /// Initialize .lzma encoder (legacy file format) |
| 291 | /// |
| 292 | /// The .lzma format is sometimes called the LZMA_Alone format, which is the |
| 293 | /// reason for the name of this function. The .lzma format supports only the |
| 294 | /// LZMA1 filter. There is no support for integrity checks like CRC32. |
| 295 | /// |
| 296 | /// Use this function if and only if you need to create files readable by |
| 297 | /// legacy LZMA tools such as LZMA Utils 4.32.x. Moving to the .xz format |
| 298 | /// (the `new_easy_encoder` function) is strongly recommended. |
| 299 | /// |
| 300 | /// The valid action values for `process` are `Run` and `Finish`. No kind |
| 301 | /// of flushing is supported, because the file format doesn't make it |
| 302 | /// possible. |
| 303 | pub fn new_lzma_encoder(options: &LzmaOptions) -> Result<Stream, Error> { |
| 304 | unsafe { |
| 305 | let mut init = Stream { raw: mem::zeroed() }; |
| 306 | cvt(lzma_sys::lzma_alone_encoder(&mut init.raw, &options.raw))?; |
| 307 | Ok(init) |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | /// Initialize .xz Stream encoder using a custom filter chain |
| 312 | /// |
| 313 | /// This function is similar to `new_easy_encoder` but a custom filter chain |
| 314 | /// is specified. |
| 315 | pub fn new_stream_encoder(filters: &Filters, check: Check) -> Result<Stream, Error> { |
| 316 | unsafe { |
| 317 | let mut init = Stream { raw: mem::zeroed() }; |
| 318 | cvt(lzma_sys::lzma_stream_encoder( |
| 319 | &mut init.raw, |
| 320 | filters.inner.as_ptr(), |
| 321 | check as lzma_sys::lzma_check, |
| 322 | ))?; |
| 323 | Ok(init) |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | /// Initialize a .xz stream decoder. |
| 328 | /// |
| 329 | /// The maximum memory usage can be specified along with flags such as |
| 330 | /// `TELL_ANY_CHECK`, `TELL_NO_CHECK`, `TELL_UNSUPPORTED_CHECK`, |
| 331 | /// `TELL_IGNORE_CHECK`, or `CONCATENATED`. |
| 332 | pub fn new_stream_decoder(memlimit: u64, flags: u32) -> Result<Stream, Error> { |
| 333 | unsafe { |
| 334 | let mut init = Stream { raw: mem::zeroed() }; |
| 335 | cvt(lzma_sys::lzma_stream_decoder( |
| 336 | &mut init.raw, |
| 337 | memlimit, |
| 338 | flags, |
| 339 | ))?; |
| 340 | Ok(init) |
| 341 | } |
| 342 | } |
| 343 | |
| 344 | /// Initialize a .lzma stream decoder. |
| 345 | /// |
| 346 | /// The maximum memory usage can also be specified. |
| 347 | pub fn new_lzma_decoder(memlimit: u64) -> Result<Stream, Error> { |
| 348 | unsafe { |
| 349 | let mut init = Stream { raw: mem::zeroed() }; |
| 350 | cvt(lzma_sys::lzma_alone_decoder(&mut init.raw, memlimit))?; |
| 351 | Ok(init) |
| 352 | } |
| 353 | } |
| 354 | |
| 355 | /// Initialize a decoder which will choose a stream/lzma formats depending |
| 356 | /// on the input stream. |
| 357 | pub fn new_auto_decoder(memlimit: u64, flags: u32) -> Result<Stream, Error> { |
| 358 | unsafe { |
| 359 | let mut init = Stream { raw: mem::zeroed() }; |
| 360 | cvt(lzma_sys::lzma_auto_decoder(&mut init.raw, memlimit, flags))?; |
| 361 | Ok(init) |
| 362 | } |
| 363 | } |
| 364 | |
| 365 | /// Processes some data from input into an output buffer. |
| 366 | /// |
| 367 | /// This will perform the appropriate encoding or decoding operation |
| 368 | /// depending on the kind of underlying stream. Documentation for the |
| 369 | /// various `action` arguments can be found on the respective variants. |
| 370 | pub fn process( |
| 371 | &mut self, |
| 372 | input: &[u8], |
| 373 | output: &mut [u8], |
| 374 | action: Action, |
| 375 | ) -> Result<Status, Error> { |
| 376 | self.raw.next_in = input.as_ptr(); |
| 377 | self.raw.avail_in = input.len(); |
| 378 | self.raw.next_out = output.as_mut_ptr(); |
| 379 | self.raw.avail_out = output.len(); |
| 380 | let action = action as lzma_sys::lzma_action; |
| 381 | unsafe { cvt(lzma_sys::lzma_code(&mut self.raw, action)) } |
| 382 | } |
| 383 | |
| 384 | /// Performs the same data as `process`, but places output data in a `Vec`. |
| 385 | /// |
| 386 | /// This function will use the extra capacity of `output` as a destination |
| 387 | /// for bytes to be placed. The length of `output` will automatically get |
| 388 | /// updated after the operation has completed. |
| 389 | pub fn process_vec( |
| 390 | &mut self, |
| 391 | input: &[u8], |
| 392 | output: &mut Vec<u8>, |
| 393 | action: Action, |
| 394 | ) -> Result<Status, Error> { |
| 395 | let cap = output.capacity(); |
| 396 | let len = output.len(); |
| 397 | |
| 398 | unsafe { |
| 399 | let before = self.total_out(); |
| 400 | let ret = { |
| 401 | let ptr = output.as_mut_ptr().offset(len as isize); |
| 402 | let out = slice::from_raw_parts_mut(ptr, cap - len); |
| 403 | self.process(input, out, action) |
| 404 | }; |
| 405 | output.set_len((self.total_out() - before) as usize + len); |
| 406 | return ret; |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | /// Returns the total amount of input bytes consumed by this stream. |
| 411 | pub fn total_in(&self) -> u64 { |
| 412 | self.raw.total_in |
| 413 | } |
| 414 | |
| 415 | /// Returns the total amount of bytes produced by this stream. |
| 416 | pub fn total_out(&self) -> u64 { |
| 417 | self.raw.total_out |
| 418 | } |
| 419 | |
| 420 | /// Get the current memory usage limit. |
| 421 | /// |
| 422 | /// This is only supported if the underlying stream supports a memlimit. |
| 423 | pub fn memlimit(&self) -> u64 { |
| 424 | unsafe { lzma_sys::lzma_memlimit_get(&self.raw) } |
| 425 | } |
| 426 | |
| 427 | /// Set the current memory usage limit. |
| 428 | /// |
| 429 | /// This can return `Error::MemLimit` if the new limit is too small or |
| 430 | /// `Error::Program` if this stream doesn't take a memory limit. |
| 431 | pub fn set_memlimit(&mut self, limit: u64) -> Result<(), Error> { |
| 432 | cvt(unsafe { lzma_sys::lzma_memlimit_set(&mut self.raw, limit) }).map(|_| ()) |
| 433 | } |
| 434 | } |
| 435 | |
| 436 | impl LzmaOptions { |
| 437 | /// Creates a new blank set of options for encoding. |
| 438 | /// |
| 439 | /// The `preset` argument is the compression level to use, typically in the |
| 440 | /// range of 0-9. |
| 441 | pub fn new_preset(preset: u32) -> Result<LzmaOptions, Error> { |
| 442 | unsafe { |
| 443 | let mut options = LzmaOptions { raw: mem::zeroed() }; |
| 444 | let ret = lzma_sys::lzma_lzma_preset(&mut options.raw, preset); |
| 445 | if ret != 0 { |
| 446 | Err(Error::Program) |
| 447 | } else { |
| 448 | Ok(options) |
| 449 | } |
| 450 | } |
| 451 | } |
| 452 | |
| 453 | /// Configures the dictionary size, in bytes |
| 454 | /// |
| 455 | /// Dictionary size indicates how many bytes of the recently processed |
| 456 | /// uncompressed data is kept in memory. |
| 457 | /// |
| 458 | /// The minimum dictionary size is 4096 bytes and the default is 2^23, 8MB. |
| 459 | pub fn dict_size(&mut self, size: u32) -> &mut LzmaOptions { |
| 460 | self.raw.dict_size = size; |
| 461 | self |
| 462 | } |
| 463 | |
| 464 | /// Configures the number of literal context bits. |
| 465 | /// |
| 466 | /// How many of the highest bits of the previous uncompressed eight-bit byte |
| 467 | /// (also known as `literal') are taken into account when predicting the |
| 468 | /// bits of the next literal. |
| 469 | /// |
| 470 | /// The maximum value to this is 4 and the default is 3. It is not currently |
| 471 | /// supported if this plus `literal_position_bits` is greater than 4. |
| 472 | pub fn literal_context_bits(&mut self, bits: u32) -> &mut LzmaOptions { |
| 473 | self.raw.lc = bits; |
| 474 | self |
| 475 | } |
| 476 | |
| 477 | /// Configures the number of literal position bits. |
| 478 | /// |
| 479 | /// This affects what kind of alignment in the uncompressed data is assumed |
| 480 | /// when encoding literals. A literal is a single 8-bit byte. See |
| 481 | /// `position_bits` for more information about alignment. |
| 482 | /// |
| 483 | /// The default for this is 0. |
| 484 | pub fn literal_position_bits(&mut self, bits: u32) -> &mut LzmaOptions { |
| 485 | self.raw.lp = bits; |
| 486 | self |
| 487 | } |
| 488 | |
| 489 | /// Configures the number of position bits. |
| 490 | /// |
| 491 | /// Position bits affects what kind of alignment in the uncompressed data is |
| 492 | /// assumed in general. The default of 2 means four-byte alignment (2^ pb |
| 493 | /// =2^2=4), which is often a good choice when there's no better guess. |
| 494 | /// |
| 495 | /// When the aligment is known, setting pb accordingly may reduce the file |
| 496 | /// size a little. E.g. with text files having one-byte alignment (US-ASCII, |
| 497 | /// ISO-8859-*, UTF-8), setting pb=0 can improve compression slightly. For |
| 498 | /// UTF-16 text, pb=1 is a good choice. If the alignment is an odd number |
| 499 | /// like 3 bytes, pb=0 might be the best choice. |
| 500 | /// |
| 501 | /// Even though the assumed alignment can be adjusted with pb and lp, LZMA1 |
| 502 | /// and LZMA2 still slightly favor 16-byte alignment. It might be worth |
| 503 | /// taking into account when designing file formats that are likely to be |
| 504 | /// often compressed with LZMA1 or LZMA2. |
| 505 | pub fn position_bits(&mut self, bits: u32) -> &mut LzmaOptions { |
| 506 | self.raw.pb = bits; |
| 507 | self |
| 508 | } |
| 509 | |
| 510 | /// Configures the compression mode. |
| 511 | pub fn mode(&mut self, mode: Mode) -> &mut LzmaOptions { |
| 512 | self.raw.mode = mode as lzma_sys::lzma_mode; |
| 513 | self |
| 514 | } |
| 515 | |
| 516 | /// Configures the nice length of a match. |
| 517 | /// |
| 518 | /// This determines how many bytes the encoder compares from the match |
| 519 | /// candidates when looking for the best match. Once a match of at least |
| 520 | /// `nice_len` bytes long is found, the encoder stops looking for better |
| 521 | /// candidates and encodes the match. (Naturally, if the found match is |
| 522 | /// actually longer than `nice_len`, the actual length is encoded; it's not |
| 523 | /// truncated to `nice_len`.) |
| 524 | /// |
| 525 | /// Bigger values usually increase the compression ratio and compression |
| 526 | /// time. For most files, 32 to 128 is a good value, which gives very good |
| 527 | /// compression ratio at good speed. |
| 528 | /// |
| 529 | /// The exact minimum value depends on the match finder. The maximum is 273, |
| 530 | /// which is the maximum length of a match that LZMA1 and LZMA2 can encode. |
| 531 | pub fn nice_len(&mut self, len: u32) -> &mut LzmaOptions { |
| 532 | self.raw.nice_len = len; |
| 533 | self |
| 534 | } |
| 535 | |
| 536 | /// Configures the match finder ID. |
| 537 | pub fn match_finder(&mut self, mf: MatchFinder) -> &mut LzmaOptions { |
| 538 | self.raw.mf = mf as lzma_sys::lzma_match_finder; |
| 539 | self |
| 540 | } |
| 541 | |
| 542 | /// Maximum search depth in the match finder. |
| 543 | /// |
| 544 | /// For every input byte, match finder searches through the hash chain or |
| 545 | /// binary tree in a loop, each iteration going one step deeper in the chain |
| 546 | /// or tree. The searching stops if |
| 547 | /// |
| 548 | /// - a match of at least `nice_len` bytes long is found; |
| 549 | /// - all match candidates from the hash chain or binary tree have |
| 550 | /// been checked; or |
| 551 | /// - maximum search depth is reached. |
| 552 | /// |
| 553 | /// Maximum search depth is needed to prevent the match finder from wasting |
| 554 | /// too much time in case there are lots of short match candidates. On the |
| 555 | /// other hand, stopping the search before all candidates have been checked |
| 556 | /// can reduce compression ratio. |
| 557 | /// |
| 558 | /// Setting depth to zero tells liblzma to use an automatic default value, |
| 559 | /// that depends on the selected match finder and nice_len. The default is |
| 560 | /// in the range [4, 200] or so (it may vary between liblzma versions). |
| 561 | /// |
| 562 | /// Using a bigger depth value than the default can increase compression |
| 563 | /// ratio in some cases. There is no strict maximum value, but high values |
| 564 | /// (thousands or millions) should be used with care: the encoder could |
| 565 | /// remain fast enough with typical input, but malicious input could cause |
| 566 | /// the match finder to slow down dramatically, possibly creating a denial |
| 567 | /// of service attack. |
| 568 | pub fn depth(&mut self, depth: u32) -> &mut LzmaOptions { |
| 569 | self.raw.depth = depth; |
| 570 | self |
| 571 | } |
| 572 | } |
| 573 | |
| 574 | impl Check { |
| 575 | /// Test if this check is supported in this build of liblzma. |
| 576 | pub fn is_supported(&self) -> bool { |
| 577 | let ret: u8 = unsafe { lzma_sys::lzma_check_is_supported(*self as lzma_sys::lzma_check) }; |
| 578 | ret != 0 |
| 579 | } |
| 580 | } |
| 581 | |
| 582 | impl MatchFinder { |
| 583 | /// Test if this match finder is supported in this build of liblzma. |
| 584 | pub fn is_supported(&self) -> bool { |
| 585 | let ret: u8 = unsafe { lzma_sys::lzma_mf_is_supported(*self as lzma_sys::lzma_match_finder) }; |
| 586 | ret != 0 |
| 587 | } |
| 588 | } |
| 589 | |
| 590 | impl Filters { |
| 591 | /// Creates a new filter chain with no filters. |
| 592 | pub fn new() -> Filters { |
| 593 | Filters { |
| 594 | inner: vec![lzma_sys::lzma_filter { |
| 595 | id: lzma_sys::LZMA_VLI_UNKNOWN, |
| 596 | options: 0 as *mut _, |
| 597 | }], |
| 598 | lzma_opts: LinkedList::new(), |
| 599 | } |
| 600 | } |
| 601 | |
| 602 | /// Add an LZMA1 filter. |
| 603 | /// |
| 604 | /// LZMA1 is the very same thing as what was called just LZMA in LZMA Utils, |
| 605 | /// 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from |
| 606 | /// accidentally using LZMA when they actually want LZMA2. |
| 607 | /// |
| 608 | /// LZMA1 shouldn't be used for new applications unless you _really_ know |
| 609 | /// what you are doing. LZMA2 is almost always a better choice. |
| 610 | pub fn lzma1(&mut self, opts: &LzmaOptions) -> &mut Filters { |
| 611 | self.lzma_opts.push_back(opts.raw); |
| 612 | let ptr = self.lzma_opts.back().unwrap() as *const _ as *mut _; |
| 613 | self.push(lzma_sys::lzma_filter { |
| 614 | id: lzma_sys::LZMA_FILTER_LZMA1, |
| 615 | options: ptr, |
| 616 | }) |
| 617 | } |
| 618 | |
| 619 | /// Add an LZMA2 filter. |
| 620 | /// |
| 621 | /// Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds |
| 622 | /// support for `SyncFlush`, uncompressed chunks (smaller expansion when |
| 623 | /// trying to compress uncompressible data), possibility to change |
| 624 | /// `literal_context_bits`/`literal_position_bits`/`position_bits` in the |
| 625 | /// middle of encoding, and some other internal improvements. |
| 626 | pub fn lzma2(&mut self, opts: &LzmaOptions) -> &mut Filters { |
| 627 | self.lzma_opts.push_back(opts.raw); |
| 628 | let ptr = self.lzma_opts.back().unwrap() as *const _ as *mut _; |
| 629 | self.push(lzma_sys::lzma_filter { |
| 630 | id: lzma_sys::LZMA_FILTER_LZMA2, |
| 631 | options: ptr, |
| 632 | }) |
| 633 | } |
| 634 | |
| 635 | // TODO: delta filter |
| 636 | |
| 637 | /// Add a filter for x86 binaries. |
| 638 | pub fn x86(&mut self) -> &mut Filters { |
| 639 | self.push(lzma_sys::lzma_filter { |
| 640 | id: lzma_sys::LZMA_FILTER_X86, |
| 641 | options: 0 as *mut _, |
| 642 | }) |
| 643 | } |
| 644 | |
| 645 | /// Add a filter for PowerPC binaries. |
| 646 | pub fn powerpc(&mut self) -> &mut Filters { |
| 647 | self.push(lzma_sys::lzma_filter { |
| 648 | id: lzma_sys::LZMA_FILTER_POWERPC, |
| 649 | options: 0 as *mut _, |
| 650 | }) |
| 651 | } |
| 652 | |
| 653 | /// Add a filter for IA-64 (itanium) binaries. |
| 654 | pub fn ia64(&mut self) -> &mut Filters { |
| 655 | self.push(lzma_sys::lzma_filter { |
| 656 | id: lzma_sys::LZMA_FILTER_IA64, |
| 657 | options: 0 as *mut _, |
| 658 | }) |
| 659 | } |
| 660 | |
| 661 | /// Add a filter for ARM binaries. |
| 662 | pub fn arm(&mut self) -> &mut Filters { |
| 663 | self.push(lzma_sys::lzma_filter { |
| 664 | id: lzma_sys::LZMA_FILTER_ARM, |
| 665 | options: 0 as *mut _, |
| 666 | }) |
| 667 | } |
| 668 | |
| 669 | /// Add a filter for ARM-Thumb binaries. |
| 670 | pub fn arm_thumb(&mut self) -> &mut Filters { |
| 671 | self.push(lzma_sys::lzma_filter { |
| 672 | id: lzma_sys::LZMA_FILTER_ARMTHUMB, |
| 673 | options: 0 as *mut _, |
| 674 | }) |
| 675 | } |
| 676 | |
| 677 | /// Add a filter for SPARC binaries. |
| 678 | pub fn sparc(&mut self) -> &mut Filters { |
| 679 | self.push(lzma_sys::lzma_filter { |
| 680 | id: lzma_sys::LZMA_FILTER_SPARC, |
| 681 | options: 0 as *mut _, |
| 682 | }) |
| 683 | } |
| 684 | |
| 685 | fn push(&mut self, filter: lzma_sys::lzma_filter) -> &mut Filters { |
| 686 | let pos = self.inner.len() - 1; |
| 687 | self.inner.insert(pos, filter); |
| 688 | self |
| 689 | } |
| 690 | } |
| 691 | |
| 692 | impl MtStreamBuilder { |
| 693 | /// Creates a new blank builder to create a multithreaded encoding `Stream`. |
| 694 | pub fn new() -> MtStreamBuilder { |
| 695 | unsafe { |
| 696 | let mut init = MtStreamBuilder { |
| 697 | raw: mem::zeroed(), |
| 698 | filters: None, |
| 699 | }; |
| 700 | init.raw.threads = 1; |
| 701 | return init; |
| 702 | } |
| 703 | } |
| 704 | |
| 705 | /// Configures the number of worker threads to use |
| 706 | pub fn threads(&mut self, threads: u32) -> &mut Self { |
| 707 | self.raw.threads = threads; |
| 708 | self |
| 709 | } |
| 710 | |
| 711 | /// Configures the maximum uncompressed size of a block |
| 712 | /// |
| 713 | /// The encoder will start a new .xz block every `block_size` bytes. |
| 714 | /// Using `FullFlush` or `FullBarrier` with `process` the caller may tell |
| 715 | /// liblzma to start a new block earlier. |
| 716 | /// |
| 717 | /// With LZMA2, a recommended block size is 2-4 times the LZMA2 dictionary |
| 718 | /// size. With very small dictionaries, it is recommended to use at least 1 |
| 719 | /// MiB block size for good compression ratio, even if this is more than |
| 720 | /// four times the dictionary size. Note that these are only recommendations |
| 721 | /// for typical use cases; feel free to use other values. Just keep in mind |
| 722 | /// that using a block size less than the LZMA2 dictionary size is waste of |
| 723 | /// RAM. |
| 724 | /// |
| 725 | /// Set this to 0 to let liblzma choose the block size depending on the |
| 726 | /// compression options. For LZMA2 it will be 3*`dict_size` or 1 MiB, |
| 727 | /// whichever is more. |
| 728 | /// |
| 729 | /// For each thread, about 3 * `block_size` bytes of memory will be |
| 730 | /// allocated. This may change in later liblzma versions. If so, the memory |
| 731 | /// usage will probably be reduced, not increased. |
| 732 | pub fn block_size(&mut self, block_size: u64) -> &mut Self { |
| 733 | self.raw.block_size = block_size; |
| 734 | self |
| 735 | } |
| 736 | |
| 737 | /// Timeout to allow `process` to return early |
| 738 | /// |
| 739 | /// Multithreading can make liblzma to consume input and produce output in a |
| 740 | /// very bursty way: it may first read a lot of input to fill internal |
| 741 | /// buffers, then no input or output occurs for a while. |
| 742 | /// |
| 743 | /// In single-threaded mode, `process` won't return until it has either |
| 744 | /// consumed all the input or filled the output buffer. If this is done in |
| 745 | /// multithreaded mode, it may cause a call `process` to take even tens of |
| 746 | /// seconds, which isn't acceptable in all applications. |
| 747 | /// |
| 748 | /// To avoid very long blocking times in `process`, a timeout (in |
| 749 | /// milliseconds) may be set here. If `process would block longer than |
| 750 | /// this number of milliseconds, it will return with `Ok`. Reasonable |
| 751 | /// values are 100 ms or more. The xz command line tool uses 300 ms. |
| 752 | /// |
| 753 | /// If long blocking times are fine for you, set timeout to a special |
| 754 | /// value of 0, which will disable the timeout mechanism and will make |
| 755 | /// `process` block until all the input is consumed or the output |
| 756 | /// buffer has been filled. |
| 757 | pub fn timeout_ms(&mut self, timeout: u32) -> &mut Self { |
| 758 | self.raw.timeout = timeout; |
| 759 | self |
| 760 | } |
| 761 | |
| 762 | /// Compression preset (level and possible flags) |
| 763 | /// |
| 764 | /// The preset is set just like with `Stream::new_easy_encoder`. The preset |
| 765 | /// is ignored if filters below have been specified. |
| 766 | pub fn preset(&mut self, preset: u32) -> &mut Self { |
| 767 | self.raw.preset = preset; |
| 768 | self |
| 769 | } |
| 770 | |
| 771 | /// Configure a custom filter chain |
| 772 | pub fn filters(&mut self, filters: Filters) -> &mut Self { |
| 773 | self.raw.filters = filters.inner.as_ptr(); |
| 774 | self.filters = Some(filters); |
| 775 | self |
| 776 | } |
| 777 | |
| 778 | /// Configures the integrity check type |
| 779 | pub fn check(&mut self, check: Check) -> &mut Self { |
| 780 | self.raw.check = check as lzma_sys::lzma_check; |
| 781 | self |
| 782 | } |
| 783 | |
| 784 | /// Calculate approximate memory usage of multithreaded .xz encoder |
| 785 | pub fn memusage(&self) -> u64 { |
| 786 | unsafe { lzma_sys::lzma_stream_encoder_mt_memusage(&self.raw) } |
| 787 | } |
| 788 | |
| 789 | /// Initialize multithreaded .xz stream encoder. |
| 790 | pub fn encoder(&self) -> Result<Stream, Error> { |
| 791 | unsafe { |
| 792 | let mut init = Stream { raw: mem::zeroed() }; |
| 793 | cvt(lzma_sys::lzma_stream_encoder_mt(&mut init.raw, &self.raw))?; |
| 794 | Ok(init) |
| 795 | } |
| 796 | } |
| 797 | } |
| 798 | |
| 799 | fn cvt(rc: lzma_sys::lzma_ret) -> Result<Status, Error> { |
| 800 | match rc { |
| 801 | lzma_sys::LZMA_OK => Ok(Status::Ok), |
| 802 | lzma_sys::LZMA_STREAM_END => Ok(Status::StreamEnd), |
| 803 | lzma_sys::LZMA_NO_CHECK => Err(Error::NoCheck), |
| 804 | lzma_sys::LZMA_UNSUPPORTED_CHECK => Err(Error::UnsupportedCheck), |
| 805 | lzma_sys::LZMA_GET_CHECK => Ok(Status::GetCheck), |
| 806 | lzma_sys::LZMA_MEM_ERROR => Err(Error::Mem), |
| 807 | lzma_sys::LZMA_MEMLIMIT_ERROR => Err(Error::MemLimit), |
| 808 | lzma_sys::LZMA_FORMAT_ERROR => Err(Error::Format), |
| 809 | lzma_sys::LZMA_OPTIONS_ERROR => Err(Error::Options), |
| 810 | lzma_sys::LZMA_DATA_ERROR => Err(Error::Data), |
| 811 | lzma_sys::LZMA_BUF_ERROR => Ok(Status::MemNeeded), |
| 812 | lzma_sys::LZMA_PROG_ERROR => Err(Error::Program), |
| 813 | c: u32 => panic!("unknown return code: {}" , c), |
| 814 | } |
| 815 | } |
| 816 | |
| 817 | impl From<Error> for io::Error { |
| 818 | fn from(e: Error) -> io::Error { |
| 819 | let kind: ErrorKind = match e { |
| 820 | Error::Data => std::io::ErrorKind::InvalidData, |
| 821 | Error::Options => std::io::ErrorKind::InvalidInput, |
| 822 | Error::Format => std::io::ErrorKind::InvalidData, |
| 823 | Error::MemLimit => std::io::ErrorKind::Other, |
| 824 | Error::Mem => std::io::ErrorKind::Other, |
| 825 | Error::Program => std::io::ErrorKind::Other, |
| 826 | Error::NoCheck => std::io::ErrorKind::InvalidInput, |
| 827 | Error::UnsupportedCheck => std::io::ErrorKind::Other, |
| 828 | }; |
| 829 | |
| 830 | io::Error::new(kind, error:e) |
| 831 | } |
| 832 | } |
| 833 | |
| 834 | impl error::Error for Error {} |
| 835 | |
| 836 | impl fmt::Display for Error { |
| 837 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 838 | match&'static str self { |
| 839 | Error::Data => "lzma data error" , |
| 840 | Error::Options => "invalid options" , |
| 841 | Error::Format => "stream/file format not recognized" , |
| 842 | Error::MemLimit => "memory limit reached" , |
| 843 | Error::Mem => "can't allocate memory" , |
| 844 | Error::Program => "liblzma internal error" , |
| 845 | Error::NoCheck => "no integrity check was available" , |
| 846 | Error::UnsupportedCheck => "liblzma not built with check support" , |
| 847 | } |
| 848 | .fmt(f) |
| 849 | } |
| 850 | } |
| 851 | |
| 852 | impl Drop for Stream { |
| 853 | fn drop(&mut self) { |
| 854 | unsafe { |
| 855 | lzma_sys::lzma_end(&mut self.raw); |
| 856 | } |
| 857 | } |
| 858 | } |
| 859 | |