1// SPDX-FileCopyrightText: 2008 by Jakub Stachowski <qbast@go2.pl>
2// RLE decompressor based on FBReader
3// SPDX-FileCopyrightText: 2004-2008 Geometer Plus <contact@geometerplus.com>
4// Huffdic decompressor based on Python code by Igor Skochinsky
5// SPDX-License-Identifier: GPL-2.0-or-later
6
7#include "decompressor.h"
8
9#include "bitreader_p.h"
10
11#include <QVector>
12#include <QtEndian>
13
14#include <vector>
15
16// clang-format off
17static const unsigned char TOKEN_CODE[256] = {
18 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
27 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
28 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
29 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
30 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
31 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
32 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
33 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
34};
35// clang-format on
36
37namespace Mobipocket
38{
39
40class NOOPDecompressor : public Decompressor
41{
42public:
43 NOOPDecompressor()
44 {
45 valid = true;
46 }
47 QByteArray decompress(const QByteArray &data) override
48 {
49 return data;
50 }
51};
52
53class RLEDecompressor : public Decompressor
54{
55public:
56 RLEDecompressor()
57 {
58 valid = true;
59 }
60 QByteArray decompress(const QByteArray &data) override;
61};
62
63class HuffdicDecompressor : public Decompressor
64{
65public:
66 HuffdicDecompressor() = delete;
67 HuffdicDecompressor(const HuffdicDecompressor &) = delete;
68 HuffdicDecompressor(const QVector<QByteArray> &huffData);
69 QByteArray decompress(const QByteArray &data) override;
70
71private:
72 bool unpack(std::vector<char> &buf, BitReader reader, int depth) const;
73 const QVector<QByteArray> dicts;
74 quint32 entry_bits;
75 quint32 dict1[256];
76 quint32 dict2[64];
77};
78
79QByteArray RLEDecompressor::decompress(const QByteArray &data)
80{
81 QByteArray ret;
82 ret.reserve(asize: 8192);
83
84 int i = 0;
85 int maxIndex = data.size() - 1;
86
87 while (i < data.size()) {
88 unsigned char token = data.at(i: i++);
89 switch (TOKEN_CODE[token]) {
90 case 0:
91 ret.append(c: token);
92 break;
93 case 1:
94 if ((i + token > maxIndex + 1)) {
95 return ret;
96 }
97 ret.append(a: data.mid(index: i, len: token));
98 i += token;
99 break;
100 case 2:
101 ret.append(c: ' ');
102 ret.append(c: token ^ 0x80);
103 break;
104 case 3:
105 {
106 if (i > maxIndex) {
107 return ret;
108 }
109 quint16 N = token << 8;
110 N += (unsigned char)data.at(i: i++);
111 quint16 copyLength = (N & 7) + 3;
112 quint16 shift = (N & 0x3fff) / 8;
113 if ((shift < 1) || (shift > ret.size())) {
114 return ret;
115 }
116 auto shifted = ret.size() - shift;
117 for (auto j = shifted; j < shifted + copyLength; j++) {
118 ret.append(c: ret.at(i: j));
119 }
120 }
121 break;
122 }
123 }
124 return ret;
125}
126
127HuffdicDecompressor::HuffdicDecompressor(const QVector<QByteArray> &huffData)
128 : dicts(huffData.mid(pos: 1))
129{
130 if (dicts.empty())
131 return;
132
133 if ((dicts[0].size() < 18) || !dicts[0].startsWith(bv: "CDIC"))
134 return;
135
136 const QByteArray &huff1 = huffData[0];
137 if ((huff1.size() < 24) || !huff1.startsWith(bv: "HUFF"))
138 return;
139
140 quint32 off1 = qFromBigEndian<quint32>(src: huff1.constData() + 16);
141 quint32 off2 = qFromBigEndian<quint32>(src: huff1.constData() + 20);
142 if (((off1 + 256 * 4) > huff1.size()) || ((off2 + 64 * 4) > huff1.size()))
143 return;
144
145 memcpy(dest: dict1, src: huff1.data() + off1, n: 256 * 4);
146 memcpy(dest: dict2, src: huff1.data() + off2, n: 64 * 4);
147
148 entry_bits = qFromBigEndian<quint32>(src: dicts[0].constData() + 12);
149 if (entry_bits > 32)
150 return;
151
152 valid = true;
153}
154
155QByteArray HuffdicDecompressor::decompress(const QByteArray &data)
156{
157 std::vector<char> buf;
158 buf.reserve(n: 4096);
159 if (!unpack(buf, reader: BitReader(data), depth: 0)) {
160 valid = false;
161 }
162 return QByteArray(buf.data(), buf.size());
163}
164
165bool HuffdicDecompressor::unpack(std::vector<char> &buf, BitReader reader, int depth) const
166{
167 // These two checks are fairly arbitrary, due to lack of an actual specification
168 // Both exceed typical real world files by far, but are useful to protect against
169 // 'ZIP bomb' style attacks
170 if (depth > 32) {
171 return false;
172 } else if (buf.size() > 16 * 1024 * 1024) {
173 return false;
174 }
175
176 auto dict_count = dicts.size();
177 quint32 entry_mask = (quint64(1) << entry_bits) - 1;
178
179 while (reader.left()) {
180 quint32 dw = reader.read();
181 quint32 v = dict1[dw >> 24];
182 quint8 codelen = v & 0x1F;
183 if (!codelen)
184 return false;
185 quint32 code = dw >> (32 - codelen);
186 quint32 r = (v >> 8);
187 if (!(v & 0x80)) {
188 while (code < dict2[(codelen - 1) * 2]) {
189 codelen++;
190 code = dw >> (32 - codelen);
191 }
192 r = dict2[(codelen - 1) * 2 + 1];
193 }
194 r -= code;
195 if (!reader.eat(n: codelen))
196 return true;
197 quint32 dict_no = quint64(r) >> entry_bits;
198 if (dict_no >= dict_count) {
199 return false;
200 }
201 QByteArrayView dict = dicts.at(i: dict_no);
202 auto dict_size = dict.size();
203
204 quint32 off1 = 16 + (r & entry_mask) * 2;
205 if (off1 > (dict_size - 2)) {
206 return false;
207 }
208
209 quint16 off2 = 16 + qFromBigEndian<quint16>(src: dict.constData() + off1);
210 if (off2 > (dict_size - 2)) {
211 return false;
212 }
213
214 quint16 blen = qFromBigEndian<quint16>(src: dict.constData() + off2);
215 if ((blen & 0x7fff) > (dict_size - 2 - off2)) {
216 return false;
217 }
218
219 auto slice = dict.mid(pos: off2 + 2, n: (blen & 0x7fff));
220 if (blen & 0x8000) {
221 buf.insert(position: buf.end(), first: slice.begin(), last: slice.end());
222 } else {
223 if (!unpack(buf, reader: BitReader(slice), depth: depth + 1)) {
224 return false;
225 }
226 }
227 }
228 return true;
229}
230
231std::unique_ptr<Decompressor> Decompressor::create(quint8 type, const QVector<QByteArray> &auxData)
232{
233 switch (type) {
234 case 1:
235 return std::make_unique<NOOPDecompressor>();
236 case 2:
237 return std::make_unique<RLEDecompressor>();
238 case 'H':
239 return std::make_unique<HuffdicDecompressor>(args: auxData);
240 default:
241 return nullptr;
242 }
243}
244}
245

source code of kdegraphics-mobipocket/lib/decompressor.cpp