| 1 | /* This file is part of the KDE libraries |
| 2 | SPDX-FileCopyrightText: 2025 Azhar Momin <azhar.momin@kdemail.net> |
| 3 | |
| 4 | SPDX-License-Identifier: LGPL-2.0-or-later |
| 5 | */ |
| 6 | |
| 7 | #include "klzfilter.h" |
| 8 | #include "loggingcategory.h" |
| 9 | |
| 10 | #include <QDebug> |
| 11 | #include <QIODevice> |
| 12 | #include <qloggingcategory.h> |
| 13 | |
| 14 | #if HAVE_XZ_SUPPORT |
| 15 | |
| 16 | extern "C" { |
| 17 | #include <lzma.h> |
| 18 | } |
| 19 | |
| 20 | #if LZMA_VERSION_MAJOR > 5 || (LZMA_VERSION_MAJOR == 5 && LZMA_VERSION_MINOR >= 4) |
| 21 | #define LZMA_LZIP_DECODER_AVAILABLE 1 |
| 22 | #endif |
| 23 | |
| 24 | #ifndef LZMA_LZIP_DECODER_AVAILABLE |
| 25 | static const uint32_t minDictSize = 1 << 12; // 4 KiB |
| 26 | static const uint32_t maxDictSize = 1 << 29; // 512 MiB |
| 27 | #endif |
| 28 | |
| 29 | // Defaut dictionary size (8 MiB) |
| 30 | static const uint8_t defaultEncodedDictSize = 23; |
| 31 | static const uint32_t defaultDecodedDictSize = 1 << 23; |
| 32 | |
| 33 | class Q_DECL_HIDDEN KLzFilter::Private |
| 34 | { |
| 35 | public: |
| 36 | Private() |
| 37 | : zStream(LZMA_STREAM_INIT) |
| 38 | , mode(0) |
| 39 | , isInitialized(false) |
| 40 | { |
| 41 | } |
| 42 | |
| 43 | lzma_stream zStream; |
| 44 | |
| 45 | uint32_t decodedDictSize; |
| 46 | uint32_t crc32; |
| 47 | |
| 48 | int mode; |
| 49 | bool isInitialized; |
| 50 | }; |
| 51 | |
| 52 | KLzFilter::KLzFilter() |
| 53 | : d(new Private) |
| 54 | { |
| 55 | } |
| 56 | |
| 57 | KLzFilter::~KLzFilter() |
| 58 | { |
| 59 | } |
| 60 | |
| 61 | bool KLzFilter::init(int mode) |
| 62 | { |
| 63 | if (d->isInitialized) { |
| 64 | terminate(); |
| 65 | } |
| 66 | |
| 67 | d->zStream.next_in = nullptr; |
| 68 | d->zStream.avail_in = 0; |
| 69 | |
| 70 | d->crc32 = 0; |
| 71 | d->decodedDictSize = defaultDecodedDictSize; |
| 72 | |
| 73 | lzma_ret result; |
| 74 | |
| 75 | if (mode == QIODevice::ReadOnly) { |
| 76 | #ifdef LZMA_LZIP_DECODER_AVAILABLE |
| 77 | result = lzma_lzip_decoder(&d->zStream, 100 << 20, 0); |
| 78 | if (result != LZMA_OK) { |
| 79 | qCWarning(KArchiveLog) << "lzma_lzip_decoder returned" << result; |
| 80 | return false; |
| 81 | } |
| 82 | d->isInitialized = true; |
| 83 | #else |
| 84 | // We cannot initialize lzma_raw_decoder here because we |
| 85 | // need to read the header first to extract the dictionary size. |
| 86 | #endif |
| 87 | } else if (mode == QIODevice::WriteOnly) { |
| 88 | lzma_options_lzma lzma_opt; |
| 89 | lzma_lzma_preset(options: &lzma_opt, LZMA_PRESET_DEFAULT); |
| 90 | lzma_opt.dict_size = defaultDecodedDictSize; |
| 91 | |
| 92 | lzma_filter filters[2]; |
| 93 | filters[0].id = LZMA_FILTER_LZMA1; |
| 94 | filters[0].options = &lzma_opt; |
| 95 | filters[1].id = LZMA_VLI_UNKNOWN; |
| 96 | filters[1].options = nullptr; |
| 97 | |
| 98 | result = lzma_raw_encoder(strm: &d->zStream, filters); |
| 99 | if (result != LZMA_OK) { |
| 100 | qCWarning(KArchiveLog) << "lzma_raw_encoder returned" << result; |
| 101 | return false; |
| 102 | } |
| 103 | d->isInitialized = true; |
| 104 | } else { |
| 105 | return false; |
| 106 | } |
| 107 | d->mode = mode; |
| 108 | return true; |
| 109 | } |
| 110 | |
| 111 | int KLzFilter::mode() const |
| 112 | { |
| 113 | return d->mode; |
| 114 | } |
| 115 | |
| 116 | bool KLzFilter::terminate() |
| 117 | { |
| 118 | if (d->mode != QIODevice::ReadOnly && d->mode != QIODevice::WriteOnly) { |
| 119 | return false; |
| 120 | } |
| 121 | |
| 122 | if (d->isInitialized) { |
| 123 | lzma_end(strm: &d->zStream); |
| 124 | } |
| 125 | |
| 126 | d->isInitialized = false; |
| 127 | return true; |
| 128 | } |
| 129 | |
| 130 | void KLzFilter::reset() |
| 131 | { |
| 132 | terminate(); |
| 133 | init(mode: d->mode); |
| 134 | } |
| 135 | |
| 136 | void KLzFilter::setOutBuffer(char *data, uint maxlen) |
| 137 | { |
| 138 | d->zStream.avail_out = maxlen; |
| 139 | d->zStream.next_out = (uint8_t *)data; |
| 140 | } |
| 141 | |
| 142 | void KLzFilter::setInBuffer(const char *data, unsigned int size) |
| 143 | { |
| 144 | d->zStream.avail_in = size; |
| 145 | d->zStream.next_in = (uint8_t *)const_cast<char *>(data); |
| 146 | } |
| 147 | |
| 148 | int KLzFilter::inBufferAvailable() const |
| 149 | { |
| 150 | return d->zStream.avail_in; |
| 151 | } |
| 152 | |
| 153 | int KLzFilter::outBufferAvailable() const |
| 154 | { |
| 155 | return d->zStream.avail_out; |
| 156 | } |
| 157 | |
| 158 | #ifndef LZMA_LZIP_DECODER_AVAILABLE |
| 159 | static uint32_t parseUi32(const uint8_t *buffer) |
| 160 | { |
| 161 | uint32_t value = 0; |
| 162 | for (int i = 0; i < 4; ++i) { |
| 163 | value |= (uint32_t)buffer[i] << (i * 8); |
| 164 | } |
| 165 | return value; |
| 166 | } |
| 167 | |
| 168 | static uint64_t parseUi64(const uint8_t *buffer) |
| 169 | { |
| 170 | return (parseUi32(buffer) | (uint64_t)parseUi32(buffer: buffer + 4) << 32); |
| 171 | } |
| 172 | #endif |
| 173 | |
| 174 | static void putUi32(uint8_t *buffer, uint32_t value) |
| 175 | { |
| 176 | for (int i = 0; i < 4; ++i) { |
| 177 | buffer[i] = value; |
| 178 | value >>= 8; |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | static void putUi64(uint8_t *buffer, uint64_t value) |
| 183 | { |
| 184 | for (int i = 0; i < 8; ++i) { |
| 185 | buffer[i] = value; |
| 186 | value >>= 8; |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | bool KLzFilter::() |
| 191 | { |
| 192 | if (d->mode != QIODevice::ReadOnly) { |
| 193 | return false; |
| 194 | } |
| 195 | |
| 196 | #ifndef LZMA_LZIP_DECODER_AVAILABLE |
| 197 | if (d->zStream.avail_in < 6) { |
| 198 | qCWarning(KArchiveLog) << "Not enough data to read LZIP header" ; |
| 199 | return false; |
| 200 | } |
| 201 | |
| 202 | const uint8_t * = d->zStream.next_in; |
| 203 | |
| 204 | // check the lzip magic + version (should be 1) |
| 205 | if (memcmp(s1: header, s2: "LZIP\x01" , n: 5) != 0) { |
| 206 | qCWarning(KArchiveLog) << "Invalid LZIP header or unsupported version" ; |
| 207 | return false; |
| 208 | } |
| 209 | |
| 210 | uint32_t dictSize = 1 << (header[5] & 0x1F); |
| 211 | if (dictSize > minDictSize) { |
| 212 | dictSize -= (dictSize / 16) * ((header[5] >> 5) & 7); |
| 213 | } |
| 214 | |
| 215 | if (dictSize < minDictSize || dictSize > maxDictSize) { |
| 216 | qCWarning(KArchiveLog) << "Invalid LZIP dictSize:" << dictSize; |
| 217 | return false; |
| 218 | } |
| 219 | |
| 220 | d->decodedDictSize = dictSize; |
| 221 | |
| 222 | d->zStream.next_in += 6; |
| 223 | d->zStream.avail_in -= 6; |
| 224 | #endif |
| 225 | |
| 226 | return true; |
| 227 | } |
| 228 | |
| 229 | bool KLzFilter::readTrailer() |
| 230 | { |
| 231 | if (d->mode != QIODevice::ReadOnly) { |
| 232 | return false; |
| 233 | } |
| 234 | |
| 235 | #ifndef LZMA_LZIP_DECODER_AVAILABLE |
| 236 | if (d->zStream.avail_in < 20) { |
| 237 | qCWarning(KArchiveLog) << "Not enough data to read LZIP header" ; |
| 238 | return false; |
| 239 | } |
| 240 | |
| 241 | uint64_t actualDataSize = d->zStream.total_out; // total uncompressed data |
| 242 | uint64_t actualMemberSize = d->zStream.total_in + 26; // header (6) + data + trailer (20) |
| 243 | |
| 244 | const uint8_t *trailer = d->zStream.next_in; |
| 245 | |
| 246 | const uint32_t crc32 = parseUi32(buffer: trailer); |
| 247 | if (crc32 != d->crc32) { |
| 248 | qCWarning(KArchiveLog) << "Invalid LZIP CRC32:" << crc32; |
| 249 | return false; |
| 250 | } |
| 251 | |
| 252 | const uint64_t dataSize = parseUi64(buffer: trailer + 4); |
| 253 | if (dataSize != actualDataSize) { |
| 254 | qCWarning(KArchiveLog) << "Invalid LZIP dataSize:" << dataSize; |
| 255 | return false; |
| 256 | } |
| 257 | |
| 258 | const uint64_t memberSize = parseUi64(buffer: trailer + 12); |
| 259 | if (memberSize != actualMemberSize) { |
| 260 | qCWarning(KArchiveLog) << "Invalid LZIP memberSize:" << memberSize; |
| 261 | return false; |
| 262 | } |
| 263 | |
| 264 | d->zStream.next_in += 20; |
| 265 | d->zStream.avail_in -= 20; |
| 266 | #endif |
| 267 | |
| 268 | return true; |
| 269 | } |
| 270 | |
| 271 | bool KLzFilter::(const QByteArray &) |
| 272 | { |
| 273 | if (d->mode != QIODevice::WriteOnly) { |
| 274 | return false; |
| 275 | } |
| 276 | |
| 277 | if (d->zStream.avail_out < 6) { |
| 278 | qCWarning(KArchiveLog) << "Not enough space to write LZIP header" ; |
| 279 | return false; |
| 280 | } |
| 281 | |
| 282 | d->zStream.next_out[0] = 'L'; |
| 283 | d->zStream.next_out[1] = 'Z'; |
| 284 | d->zStream.next_out[2] = 'I'; |
| 285 | d->zStream.next_out[3] = 'P'; |
| 286 | d->zStream.next_out[4] = 1; // version = 1 |
| 287 | d->zStream.next_out[5] = defaultEncodedDictSize; |
| 288 | |
| 289 | d->zStream.next_out += 6; |
| 290 | d->zStream.avail_out -= 6; |
| 291 | |
| 292 | return true; |
| 293 | } |
| 294 | |
| 295 | bool KLzFilter::writeTrailer() |
| 296 | { |
| 297 | if (d->mode != QIODevice::WriteOnly) { |
| 298 | return false; |
| 299 | } |
| 300 | |
| 301 | if (d->zStream.avail_out < 20) { |
| 302 | qCWarning(KArchiveLog) << "Not enough space to write LZIP trailer" ; |
| 303 | return false; |
| 304 | } |
| 305 | |
| 306 | uint64_t dataSize = d->zStream.total_in; // total uncompressed data |
| 307 | uint64_t memberSize = d->zStream.total_out + 26; // header (6) + data + trailer (20) |
| 308 | |
| 309 | putUi32(buffer: d->zStream.next_out, value: d->crc32); |
| 310 | putUi64(buffer: d->zStream.next_out + 4, value: dataSize); |
| 311 | putUi64(buffer: d->zStream.next_out + 12, value: memberSize); |
| 312 | |
| 313 | d->zStream.next_out += 20; |
| 314 | d->zStream.avail_out -= 20; |
| 315 | |
| 316 | return true; |
| 317 | } |
| 318 | |
| 319 | KLzFilter::Result KLzFilter::uncompress() |
| 320 | { |
| 321 | #ifdef LZMA_LZIP_DECODER_AVAILABLE |
| 322 | lzma_ret result = lzma_code(&d->zStream, LZMA_RUN); |
| 323 | #else |
| 324 | lzma_ret result; |
| 325 | if (!d->isInitialized) { |
| 326 | lzma_options_lzma lzma_opt; |
| 327 | lzma_lzma_preset(options: &lzma_opt, LZMA_PRESET_DEFAULT); |
| 328 | lzma_opt.dict_size = d->decodedDictSize; |
| 329 | |
| 330 | lzma_filter filters[2]; |
| 331 | filters[0].id = LZMA_FILTER_LZMA1; |
| 332 | filters[0].options = &lzma_opt; |
| 333 | filters[1].id = LZMA_VLI_UNKNOWN; |
| 334 | filters[1].options = nullptr; |
| 335 | |
| 336 | result = lzma_raw_decoder(strm: &d->zStream, filters); |
| 337 | if (result != LZMA_OK) { |
| 338 | qCWarning(KArchiveLog) << "lzma_raw_decoder returned" << result; |
| 339 | return KFilterBase::Error; |
| 340 | } |
| 341 | |
| 342 | d->isInitialized = true; |
| 343 | } |
| 344 | |
| 345 | size_t prevAvailOut = d->zStream.avail_out; |
| 346 | result = lzma_code(strm: &d->zStream, action: LZMA_RUN); |
| 347 | size_t written = prevAvailOut - d->zStream.avail_out; |
| 348 | |
| 349 | if (written > 0) { |
| 350 | d->crc32 = lzma_crc32(buf: d->zStream.next_out - written, size: written, crc: d->crc32); |
| 351 | } |
| 352 | #endif |
| 353 | |
| 354 | switch (result) { |
| 355 | case LZMA_OK: |
| 356 | return KFilterBase::Ok; |
| 357 | case LZMA_STREAM_END: |
| 358 | if (!readTrailer()) { |
| 359 | return KFilterBase::Error; |
| 360 | } |
| 361 | return KFilterBase::End; |
| 362 | default: |
| 363 | qCWarning(KArchiveLog) << "lzma_code returned" << result; |
| 364 | return KFilterBase::Error; |
| 365 | } |
| 366 | } |
| 367 | |
| 368 | KLzFilter::Result KLzFilter::compress(bool finish) |
| 369 | { |
| 370 | size_t prevAvailIn = d->zStream.avail_in; |
| 371 | lzma_ret result = lzma_code(strm: &d->zStream, action: finish ? LZMA_FINISH : LZMA_RUN); |
| 372 | size_t read = prevAvailIn - d->zStream.avail_in; |
| 373 | |
| 374 | if (read > 0) { |
| 375 | d->crc32 = lzma_crc32(buf: d->zStream.next_in - read, size: read, crc: d->crc32); |
| 376 | } |
| 377 | |
| 378 | switch (result) { |
| 379 | case LZMA_OK: |
| 380 | return KFilterBase::Ok; |
| 381 | case LZMA_STREAM_END: |
| 382 | if (finish && !writeTrailer()) { |
| 383 | return KFilterBase::Error; |
| 384 | } |
| 385 | return KFilterBase::End; |
| 386 | default: |
| 387 | qCDebug(KArchiveLog) << " lzma_code returned " << result; |
| 388 | return KFilterBase::Error; |
| 389 | } |
| 390 | } |
| 391 | |
| 392 | #endif |
| 393 | |