1 | /*************************************************************************** |
2 | * Copyright (C) 2008 by Jakub Stachowski <qbast@go2.pl> * |
3 | * * |
4 | * RLE decompressor based on FBReader * |
5 | * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com> * |
6 | * * |
7 | * Huffdic decompressor based on Python code by Igor Skochinsky * |
8 | * * |
9 | * This program is free software; you can redistribute it and/or modify * |
10 | * it under the terms of the GNU General Public License as published by * |
11 | * the Free Software Foundation; either version 2 of the License, or * |
12 | * (at your option) any later version. * |
13 | ***************************************************************************/ |
14 | |
15 | #include "decompressor.h" |
16 | #include "mobipocket.h" |
17 | |
18 | #include <QList> |
19 | |
20 | static const unsigned char TOKEN_CODE[256] = { |
21 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, |
22 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
23 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
24 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
25 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
26 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
27 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
28 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
29 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
30 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
31 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
32 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
33 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
34 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
35 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
36 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
37 | }; |
38 | |
39 | namespace Mobipocket { |
40 | |
41 | class NOOPDecompressor : public Decompressor |
42 | { |
43 | public: |
44 | NOOPDecompressor(const PDB& p) : Decompressor(p) {} |
45 | QByteArray decompress(const QByteArray& data) override { return data; } |
46 | }; |
47 | |
48 | |
49 | class RLEDecompressor : public Decompressor |
50 | { |
51 | public: |
52 | RLEDecompressor(const PDB& p) : Decompressor(p) {} |
53 | QByteArray decompress(const QByteArray& data) override; |
54 | }; |
55 | |
56 | class BitReader |
57 | { |
58 | public: |
59 | BitReader(const QByteArray& d) : pos(0), data(d) |
60 | { |
61 | data.append(s: "\000\000\000\000" ); //krazy:exclude=strings |
62 | len=data.size()*8; |
63 | } |
64 | |
65 | quint32 read() { |
66 | quint32 g=0; |
67 | quint64 r=0; |
68 | while (g<32) { |
69 | r=(r << 8) | (quint8)data[(pos+g)>>3]; |
70 | g=g+8 - ((pos+g) & 7); |
71 | } |
72 | return (r >> (g-32)); |
73 | } |
74 | bool eat(int n) { |
75 | pos+=n; |
76 | return pos <= len; |
77 | } |
78 | |
79 | int left() { |
80 | return len - pos; |
81 | } |
82 | |
83 | private: |
84 | int pos; |
85 | int len; |
86 | QByteArray data; |
87 | }; |
88 | |
89 | class HuffdicDecompressor : public Decompressor |
90 | { |
91 | public: |
92 | HuffdicDecompressor(const PDB& p); |
93 | QByteArray decompress(const QByteArray& data) override; |
94 | private: |
95 | void unpack(BitReader reader, int depth = 0); |
96 | QList<QByteArray> dicts; |
97 | quint32 entry_bits; |
98 | quint32 dict1[256]; |
99 | quint32 dict2[64]; |
100 | |
101 | QByteArray buf; |
102 | }; |
103 | |
104 | |
105 | |
106 | QByteArray RLEDecompressor::decompress(const QByteArray& data) |
107 | { |
108 | QByteArray ret; |
109 | ret.reserve(asize: 8192); |
110 | |
111 | unsigned char token; |
112 | unsigned short copyLength, N, shift; |
113 | unsigned short shifted; |
114 | int i=0; |
115 | int maxIndex=data.size()-1; |
116 | |
117 | while (i<data.size()) { |
118 | token = data.at(i: i++); |
119 | switch (TOKEN_CODE[token]) { |
120 | case 0: |
121 | ret.append(c: token); |
122 | break; |
123 | case 1: |
124 | if ((i + token > maxIndex) ) { |
125 | goto endOfLoop; |
126 | } |
127 | ret.append(a: data.mid(index: i,len: token)); |
128 | i+=token; |
129 | break; |
130 | case 2: |
131 | ret.append(c: ' '); |
132 | ret.append(c: token ^ 0x80); |
133 | break; |
134 | case 3: |
135 | if (i + 1 > maxIndex) { |
136 | goto endOfLoop; |
137 | } |
138 | N = token; |
139 | N<<=8; |
140 | N+=(unsigned char)data.at(i: i++); |
141 | copyLength = (N & 7) + 3; |
142 | shift = (N & 0x3fff) / 8; |
143 | shifted = ret.size()-shift; |
144 | if (shifted>(ret.size()-1)) goto endOfLoop; |
145 | for (int i=0;i<copyLength;i++) ret.append(c: ret.at(i: shifted+i)); |
146 | break; |
147 | } |
148 | } |
149 | endOfLoop: |
150 | return ret; |
151 | |
152 | } |
153 | |
154 | quint32 readBELong(const QByteArray& data, int offset) |
155 | { |
156 | quint32 ret=0; |
157 | for (int i=0;i<4;i++) { ret<<=8; ret+=(unsigned char)data[offset+i]; } |
158 | return ret; |
159 | } |
160 | |
161 | HuffdicDecompressor::HuffdicDecompressor(const PDB& p) : Decompressor(p) |
162 | { |
163 | QByteArray =p.getRecord(i: 0); |
164 | quint32 huff_ofs=readBELong(data: header,offset: 0x70); |
165 | quint32 huff_num=readBELong(data: header,offset: 0x74); |
166 | quint32 off1,off2; |
167 | |
168 | QByteArray huff1=p.getRecord(i: huff_ofs); |
169 | if (huff1.isNull()) goto fail; |
170 | for (unsigned int i=1;i<huff_num;i++) { |
171 | QByteArray h=p.getRecord(i: huff_ofs+i); |
172 | if (h.isNull()) goto fail; |
173 | dicts.append(t: h); |
174 | } |
175 | |
176 | off1=readBELong(data: huff1,offset: 16); |
177 | off2=readBELong(data: huff1,offset: 20); |
178 | |
179 | if (!huff1.startsWith(bv: "HUFF" )) goto fail; //krazy:exclude=strings |
180 | if (!dicts[0].startsWith(bv: "CDIC" )) goto fail; //krazy:exclude=strings |
181 | |
182 | entry_bits=readBELong(data: dicts[0],offset: 12); |
183 | |
184 | memcpy(dest: dict1,src: huff1.data()+off1, n: 256*4); |
185 | memcpy(dest: dict2,src: huff1.data()+off2, n: 64*4); |
186 | return; |
187 | fail: |
188 | valid=false; |
189 | } |
190 | |
191 | QByteArray HuffdicDecompressor::decompress(const QByteArray& data) |
192 | { |
193 | buf.clear(); |
194 | unpack(reader: BitReader(data)); |
195 | return buf; |
196 | } |
197 | |
198 | void HuffdicDecompressor::unpack(BitReader reader,int depth) |
199 | { |
200 | if (depth>32) goto fail; |
201 | while (reader.left()) { |
202 | quint32 dw=reader.read(); |
203 | quint32 v=dict1[dw>>24]; |
204 | quint8 codelen = v & 0x1F; |
205 | if (!codelen) goto fail; |
206 | quint32 code = dw >> (32 - codelen); |
207 | quint32 r=(v >> 8); |
208 | if (!( v & 0x80)) { |
209 | while (code < dict2[(codelen-1)*2]) { |
210 | codelen++; |
211 | code = dw >> (32 - codelen); |
212 | } |
213 | r = dict2[(codelen-1)*2+1]; |
214 | } |
215 | r-=code; |
216 | if (!codelen) goto fail; |
217 | if (!reader.eat(n: codelen)) return; |
218 | quint32 dict_no = r >> entry_bits; |
219 | quint32 off1 = 16 + (r - (dict_no << entry_bits))*2; |
220 | QByteArray dict=dicts[dict_no]; |
221 | quint32 off2 = 16 + (unsigned char)dict[off1]*256 + (unsigned char)dict[off1+1]; |
222 | quint32 blen = (unsigned char)dict[off2]*256 + (unsigned char)dict[off2+1]; |
223 | QByteArray slice=dict.mid(index: off2+2,len: (blen & 0x7fff)); |
224 | if (blen & 0x8000) buf+=slice; |
225 | else unpack(reader: BitReader(slice),depth: depth+1); |
226 | } |
227 | return; |
228 | fail: |
229 | valid=false; |
230 | } |
231 | |
232 | Decompressor* Decompressor::create(quint8 type, const PDB& pdb) |
233 | { |
234 | switch (type) { |
235 | case 1 : return new NOOPDecompressor(pdb); |
236 | case 2 : return new RLEDecompressor(pdb); |
237 | case 'H' : return new HuffdicDecompressor(pdb); |
238 | default : return nullptr; |
239 | } |
240 | |
241 | } |
242 | } |
243 | |