1 | // SPDX-FileCopyrightText: 2008 by Jakub Stachowski <qbast@go2.pl> |
2 | // RLE decompressor based on FBReader |
3 | // SPDX-FileCopyrightText: 2004-2008 Geometer Plus <contact@geometerplus.com> |
4 | // Huffdic decompressor based on Python code by Igor Skochinsky |
5 | // SPDX-License-Identifier: GPL-2.0-or-later |
6 | |
7 | #include "decompressor.h" |
8 | |
9 | #include "bitreader_p.h" |
10 | |
11 | #include <QVector> |
12 | #include <QtEndian> |
13 | |
14 | #include <vector> |
15 | |
16 | // clang-format off |
17 | static const unsigned char TOKEN_CODE[256] = { |
18 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, |
19 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
20 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
21 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
22 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
23 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
24 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
25 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
26 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
27 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
28 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
29 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
30 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
31 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
32 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
33 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
34 | }; |
35 | // clang-format on |
36 | |
37 | namespace Mobipocket |
38 | { |
39 | |
40 | class NOOPDecompressor : public Decompressor |
41 | { |
42 | public: |
43 | NOOPDecompressor() |
44 | { |
45 | valid = true; |
46 | } |
47 | QByteArray decompress(const QByteArray &data) override |
48 | { |
49 | return data; |
50 | } |
51 | }; |
52 | |
53 | class RLEDecompressor : public Decompressor |
54 | { |
55 | public: |
56 | RLEDecompressor() |
57 | { |
58 | valid = true; |
59 | } |
60 | QByteArray decompress(const QByteArray &data) override; |
61 | }; |
62 | |
63 | class HuffdicDecompressor : public Decompressor |
64 | { |
65 | public: |
66 | HuffdicDecompressor() = delete; |
67 | HuffdicDecompressor(const HuffdicDecompressor &) = delete; |
68 | HuffdicDecompressor(const QVector<QByteArray> &huffData); |
69 | QByteArray decompress(const QByteArray &data) override; |
70 | |
71 | private: |
72 | bool unpack(std::vector<char> &buf, BitReader reader, int depth) const; |
73 | const QVector<QByteArray> dicts; |
74 | quint32 entry_bits; |
75 | quint32 dict1[256]; |
76 | quint32 dict2[64]; |
77 | }; |
78 | |
79 | QByteArray RLEDecompressor::decompress(const QByteArray &data) |
80 | { |
81 | QByteArray ret; |
82 | ret.reserve(asize: 8192); |
83 | |
84 | int i = 0; |
85 | int maxIndex = data.size() - 1; |
86 | |
87 | while (i < data.size()) { |
88 | unsigned char token = data.at(i: i++); |
89 | switch (TOKEN_CODE[token]) { |
90 | case 0: |
91 | ret.append(c: token); |
92 | break; |
93 | case 1: |
94 | if ((i + token > maxIndex + 1)) { |
95 | return ret; |
96 | } |
97 | ret.append(a: data.mid(index: i, len: token)); |
98 | i += token; |
99 | break; |
100 | case 2: |
101 | ret.append(c: ' '); |
102 | ret.append(c: token ^ 0x80); |
103 | break; |
104 | case 3: |
105 | { |
106 | if (i > maxIndex) { |
107 | return ret; |
108 | } |
109 | quint16 N = token << 8; |
110 | N += (unsigned char)data.at(i: i++); |
111 | quint16 copyLength = (N & 7) + 3; |
112 | quint16 shift = (N & 0x3fff) / 8; |
113 | if ((shift < 1) || (shift > ret.size())) { |
114 | return ret; |
115 | } |
116 | auto shifted = ret.size() - shift; |
117 | for (auto j = shifted; j < shifted + copyLength; j++) { |
118 | ret.append(c: ret.at(i: j)); |
119 | } |
120 | } |
121 | break; |
122 | } |
123 | } |
124 | return ret; |
125 | } |
126 | |
127 | HuffdicDecompressor::HuffdicDecompressor(const QVector<QByteArray> &huffData) |
128 | : dicts(huffData.mid(pos: 1)) |
129 | { |
130 | if (dicts.empty()) |
131 | return; |
132 | |
133 | if ((dicts[0].size() < 18) || !dicts[0].startsWith(bv: "CDIC" )) |
134 | return; |
135 | |
136 | const QByteArray &huff1 = huffData[0]; |
137 | if ((huff1.size() < 24) || !huff1.startsWith(bv: "HUFF" )) |
138 | return; |
139 | |
140 | quint32 off1 = qFromBigEndian<quint32>(src: huff1.constData() + 16); |
141 | quint32 off2 = qFromBigEndian<quint32>(src: huff1.constData() + 20); |
142 | if (((off1 + 256 * 4) > huff1.size()) || ((off2 + 64 * 4) > huff1.size())) |
143 | return; |
144 | |
145 | memcpy(dest: dict1, src: huff1.data() + off1, n: 256 * 4); |
146 | memcpy(dest: dict2, src: huff1.data() + off2, n: 64 * 4); |
147 | |
148 | entry_bits = qFromBigEndian<quint32>(src: dicts[0].constData() + 12); |
149 | if (entry_bits > 32) |
150 | return; |
151 | |
152 | valid = true; |
153 | } |
154 | |
155 | QByteArray HuffdicDecompressor::decompress(const QByteArray &data) |
156 | { |
157 | std::vector<char> buf; |
158 | buf.reserve(n: 4096); |
159 | if (!unpack(buf, reader: BitReader(data), depth: 0)) { |
160 | valid = false; |
161 | } |
162 | return QByteArray(buf.data(), buf.size()); |
163 | } |
164 | |
165 | bool HuffdicDecompressor::unpack(std::vector<char> &buf, BitReader reader, int depth) const |
166 | { |
167 | // These two checks are fairly arbitrary, due to lack of an actual specification |
168 | // Both exceed typical real world files by far, but are useful to protect against |
169 | // 'ZIP bomb' style attacks |
170 | if (depth > 32) { |
171 | return false; |
172 | } else if (buf.size() > 16 * 1024 * 1024) { |
173 | return false; |
174 | } |
175 | |
176 | auto dict_count = dicts.size(); |
177 | quint32 entry_mask = (quint64(1) << entry_bits) - 1; |
178 | |
179 | while (reader.left()) { |
180 | quint32 dw = reader.read(); |
181 | quint32 v = dict1[dw >> 24]; |
182 | quint8 codelen = v & 0x1F; |
183 | if (!codelen) |
184 | return false; |
185 | quint32 code = dw >> (32 - codelen); |
186 | quint32 r = (v >> 8); |
187 | if (!(v & 0x80)) { |
188 | while (code < dict2[(codelen - 1) * 2]) { |
189 | codelen++; |
190 | code = dw >> (32 - codelen); |
191 | } |
192 | r = dict2[(codelen - 1) * 2 + 1]; |
193 | } |
194 | r -= code; |
195 | if (!reader.eat(n: codelen)) |
196 | return true; |
197 | quint32 dict_no = quint64(r) >> entry_bits; |
198 | if (dict_no >= dict_count) { |
199 | return false; |
200 | } |
201 | QByteArrayView dict = dicts.at(i: dict_no); |
202 | auto dict_size = dict.size(); |
203 | |
204 | quint32 off1 = 16 + (r & entry_mask) * 2; |
205 | if (off1 > (dict_size - 2)) { |
206 | return false; |
207 | } |
208 | |
209 | quint16 off2 = 16 + qFromBigEndian<quint16>(src: dict.constData() + off1); |
210 | if (off2 > (dict_size - 2)) { |
211 | return false; |
212 | } |
213 | |
214 | quint16 blen = qFromBigEndian<quint16>(src: dict.constData() + off2); |
215 | if ((blen & 0x7fff) > (dict_size - 2 - off2)) { |
216 | return false; |
217 | } |
218 | |
219 | auto slice = dict.mid(pos: off2 + 2, n: (blen & 0x7fff)); |
220 | if (blen & 0x8000) { |
221 | buf.insert(position: buf.end(), first: slice.begin(), last: slice.end()); |
222 | } else { |
223 | if (!unpack(buf, reader: BitReader(slice), depth: depth + 1)) { |
224 | return false; |
225 | } |
226 | } |
227 | } |
228 | return true; |
229 | } |
230 | |
231 | std::unique_ptr<Decompressor> Decompressor::create(quint8 type, const QVector<QByteArray> &auxData) |
232 | { |
233 | switch (type) { |
234 | case 1: |
235 | return std::make_unique<NOOPDecompressor>(); |
236 | case 2: |
237 | return std::make_unique<RLEDecompressor>(); |
238 | case 'H': |
239 | return std::make_unique<HuffdicDecompressor>(args: auxData); |
240 | default: |
241 | return nullptr; |
242 | } |
243 | } |
244 | } |
245 | |