decompressor.cpp source code [kdegraphics-mobipocket/lib/decompressor.cpp]

1	// SPDX-FileCopyrightText: 2008 by Jakub Stachowski <qbast@go2.pl>
2	// RLE decompressor based on FBReader
3	// SPDX-FileCopyrightText: 2004-2008 Geometer Plus <contact@geometerplus.com>
4	// Huffdic decompressor based on Python code by Igor Skochinsky
5	// SPDX-License-Identifier: GPL-2.0-or-later
6
7	#include "decompressor.h"
8
9	#include "bitreader_p.h"
10
11	#include <QVector>
12	#include <QtEndian>
13
14	#include <vector>
15
16	// clang-format off
17	static const unsigned char TOKEN_CODE[`256`] = {
18	`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
19	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
20	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
22	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
23	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
24	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
25	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
26	`3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`,
27	`3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`,
28	`3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`,
29	`3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`,
30	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
31	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
32	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
33	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
34	};
35	// clang-format on
36
37	namespace Mobipocket
38	{
39
40	class NOOPDecompressor : public Decompressor
41	{
42	public:
43	NOOPDecompressor()
44	{
45	valid = true;
46	}
47	QByteArray decompress(const QByteArray &data) override
48	{
49	return data;
50	}
51	};
52
53	class RLEDecompressor : public Decompressor
54	{
55	public:
56	RLEDecompressor()
57	{
58	valid = true;
59	}
60	QByteArray decompress(const QByteArray &data) override;
61	};
62
63	class HuffdicDecompressor : public Decompressor
64	{
65	public:
66	HuffdicDecompressor() = delete;
67	HuffdicDecompressor(const HuffdicDecompressor &) = delete;
68	HuffdicDecompressor(const QVector<QByteArray> &huffData);
69	QByteArray decompress(const QByteArray &data) override;
70
71	private:
72	bool unpack(std::vector<char> &buf, BitReader reader, int depth) const;
73	const QVector<QByteArray> dicts;
74	quint32 entry_bits;
75	quint32 dict1[`256`];
76	quint32 dict2[`64`];
77	};
78
79	QByteArray RLEDecompressor::decompress(const QByteArray &data)
80	{
81	QByteArray ret;
82	ret.reserve(asize: `8192`);
83
84	int i = `0`;
85	int maxIndex = data.size() - `1`;
86
87	while (i < data.size()) {
88	unsigned char token = data.at(i: i++);
89	switch (TOKEN_CODE[token]) {
90	case `0`:
91	ret.append(c: token);
92	break;
93	case `1`:
94	if ((i + token > maxIndex + `1`)) {
95	return ret;
96	}
97	ret.append(a: data.mid(index: i, len: token));
98	i += token;
99	break;
100	case `2`:
101	ret.append(c: `' '`);
102	ret.append(c: token ^ `0x80`);
103	break;
104	case `3`:
105	{
106	if (i > maxIndex) {
107	return ret;
108	}
109	quint16 N = token << `8`;
110	N += (unsigned char)data.at(i: i++);
111	quint16 copyLength = (N & `7`) + `3`;
112	quint16 shift = (N & `0x3fff`) / `8`;
113	if ((shift < `1`) \|\| (shift > ret.size())) {
114	return ret;
115	}
116	auto shifted = ret.size() - shift;
117	for (auto j = shifted; j < shifted + copyLength; j++) {
118	ret.append(c: ret.at(i: j));
119	}
120	}
121	break;
122	}
123	}
124	return ret;
125	}
126
127	HuffdicDecompressor::HuffdicDecompressor(const QVector<QByteArray> &huffData)
128	: dicts(huffData.mid(pos: `1`))
129	{
130	if (dicts.empty())
131	return;
132
133	if ((dicts[`0`].size() < `18`) \|\| !dicts[`0`].startsWith(bv: "CDIC"))
134	return;
135
136	const QByteArray &huff1 = huffData[`0`];
137	if ((huff1.size() < `24`) \|\| !huff1.startsWith(bv: "HUFF"))
138	return;
139
140	quint32 off1 = qFromBigEndian<quint32>(src: huff1.constData() + `16`);
141	quint32 off2 = qFromBigEndian<quint32>(src: huff1.constData() + `20`);
142	if (((off1 + `256` * `4`) > huff1.size()) \|\| ((off2 + `64` * `4`) > huff1.size()))
143	return;
144
145	memcpy(dest: dict1, src: huff1.data() + off1, n: `256` * `4`);
146	memcpy(dest: dict2, src: huff1.data() + off2, n: `64` * `4`);
147
148	entry_bits = qFromBigEndian<quint32>(src: dicts[`0`].constData() + `12`);
149	if (entry_bits > `32`)
150	return;
151
152	valid = true;
153	}
154
155	QByteArray HuffdicDecompressor::decompress(const QByteArray &data)
156	{
157	std::vector<char> buf;
158	buf.reserve(n: `4096`);
159	if (!unpack(buf, reader: BitReader (data), depth: `0`)) {
160	valid = false;
161	}
162	return QByteArray(buf.data(), buf.size());
163	}
164
165	bool HuffdicDecompressor::unpack(std::vector<char> &buf, BitReader reader, int depth) const
166	{
167	// These two checks are fairly arbitrary, due to lack of an actual specification
168	// Both exceed typical real world files by far, but are useful to protect against
169	// 'ZIP bomb' style attacks
170	if (depth > `32`) {
171	return false;
172	} else if (buf.size() > `16` * `1024` * `1024`) {
173	return false;
174	}
175
176	auto dict_count = dicts.size();
177	quint32 entry_mask = (quint64(`1`) << entry_bits) - `1`;
178
179	while (reader.left()) {
180	quint32 dw = reader.read();
181	quint32 v = dict1[dw >> `24`];
182	quint8 codelen = v & `0x1F`;
183	if (!codelen)
184	return false;
185	quint32 code = dw >> (`32` - codelen);
186	quint32 r = (v >> `8`);
187	if (!(v & `0x80`)) {
188	while (code < dict2[(codelen - `1`) * `2`]) {
189	codelen++;
190	code = dw >> (`32` - codelen);
191	}
192	r = dict2[(codelen - `1`) * `2` + `1`];
193	}
194	r -= code;
195	if (!reader.eat(n: codelen))
196	return true;
197	quint32 dict_no = quint64(r) >> entry_bits;
198	if (dict_no >= dict_count) {
199	return false;
200	}
201	QByteArrayView dict = dicts.at(i: dict_no);
202	auto dict_size = dict.size();
203
204	quint32 off1 = `16` + (r & entry_mask) * `2`;
205	if (off1 > (dict_size - `2`)) {
206	return false;
207	}
208
209	quint16 off2 = `16` + qFromBigEndian<quint16>(src: dict.constData() + off1);
210	if (off2 > (dict_size - `2`)) {
211	return false;
212	}
213
214	quint16 blen = qFromBigEndian<quint16>(src: dict.constData() + off2);
215	if ((blen & `0x7fff`) > (dict_size - `2` - off2)) {
216	return false;
217	}
218
219	auto slice = dict.mid(pos: off2 + `2`, n: (blen & `0x7fff`));
220	if (blen & `0x8000`) {
221	buf.insert(position: buf.end(), first: slice.begin(), last: slice.end());
222	} else {
223	if (!unpack(buf, reader: BitReader (slice), depth: depth + `1`)) {
224	return false;
225	}
226	}
227	}
228	return true;
229	}
230
231	std::unique_ptr<Decompressor> Decompressor::create(quint8 type, const QVector<QByteArray> &auxData)
232	{
233	switch (type) {
234	case `1`:
235	return std::make_unique<NOOPDecompressor>();
236	case `2`:
237	return std::make_unique<RLEDecompressor>();
238	case `'H'`:
239	return std::make_unique<HuffdicDecompressor>(args: auxData);
240	default:
241	return nullptr;
242	}
243	}
244	}
245

source code of kdegraphics-mobipocket/lib/decompressor.cpp