1/***************************************************************************
2 * Copyright (C) 2008 by Jakub Stachowski <qbast@go2.pl> *
3 * *
4 * This program is free software; you can redistribute it and/or modify *
5 * it under the terms of the GNU General Public License as published by *
6 * the Free Software Foundation; either version 2 of the License, or *
7 * (at your option) any later version. *
8 ***************************************************************************/
9
10#include "mobipocket.h"
11#include "decompressor.h"
12
13#include <QIODevice>
14#include <QtEndian>
15#include <QBuffer>
16#include <QTextCodec>
17#include <QImageReader>
18#include <QRegularExpression>
19
20namespace Mobipocket {
21
22QByteArray Stream::read(int len)
23{
24 QByteArray ret;
25 ret.resize(size: len);
26 len=read(buf: ret.data(),size: len);
27 if (len<0) len=0;
28 ret.resize(size: len);
29 return ret;
30}
31
32QByteArray Stream::readAll()
33{
34 QByteArray ret, bit;
35 while (!(bit=read(len: 4096)).isEmpty()) ret+=bit;
36 return ret;
37}
38
39
40
41struct PDBPrivate {
42 QList<quint32> recordOffsets;
43 Stream* device;
44 QString fileType;
45 quint16 nrecords;
46 bool valid;
47
48 void init();
49};
50
51void PDBPrivate::init()
52{
53 valid=true;
54 quint16 word;
55 quint32 dword;
56 if (!device->seek(pos: 0x3c)) goto fail;
57 fileType=QString::fromLatin1(ba: device->read(len: 8));
58
59 if (!device->seek(pos: 0x4c)) goto fail;
60 device->read(buf: (char*)&word,size: 2);
61 nrecords=qFromBigEndian(source: word);
62
63 for (int i=0;i<nrecords;i++) {
64 device->read(buf: (char*)&dword,size: 4);
65 recordOffsets.append(t: qFromBigEndian(source: dword));
66 device->read(buf: (char*)&dword,size: 4);
67 }
68 return;
69 fail:
70 valid=false;
71}
72
73PDB::PDB(Stream* dev) : d(new PDBPrivate)
74{
75 d->device=dev;
76 d->init();
77}
78
79PDB::~PDB()
80{
81 delete d;
82}
83
84QByteArray PDB::getRecord(int i) const
85{
86 if (i>=d->nrecords) return QByteArray();
87 quint32 offset=d->recordOffsets[i];
88 bool last=(i==(d->nrecords-1));
89 if (!d->device->seek(pos: offset)) return QByteArray();
90 if (last) return d->device->readAll();
91 return d->device->read(len: d->recordOffsets[i+1]-offset);
92}
93
94bool PDB::isValid() const
95{
96 return d->valid;
97}
98
99int PDB::recordCount() const
100{
101 return d->nrecords;
102}
103
104////////////////////////////////////////////
105struct DocumentPrivate
106{
107 DocumentPrivate(Stream* d) : pdb(d), valid(true), firstImageRecord(0),
108 drm(false), thumbnailIndex(0) {}
109 PDB pdb;
110 Decompressor* dec;
111 quint16 ntextrecords;
112 quint16 maxRecordSize;
113 bool valid;
114
115 // number of first record holding image. Usually it is directly after end of text, but not always
116 quint16 firstImageRecord;
117 QMap<Document::MetaKey, QString> metadata;
118 QTextCodec* codec;
119 bool drm;
120
121 // index of thumbnail in image list. May be specified in EXTH.
122 // If not then just use first image and hope for the best
123 int thumbnailIndex;
124
125 void init();
126 void findFirstImage();
127 void parseEXTH(const QByteArray& data);
128 void parseHtmlHead(const QString& data);
129 QString readEXTHRecord(const QByteArray& data, quint32& offset);
130 QImage getImageFromRecord(int recnum);
131};
132
133
134void DocumentPrivate::parseHtmlHead(const QString& data)
135{
136 static const QRegularExpression title(QLatin1String("<dc:title.*>(.*)</dc:title>"), QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
137 static const QRegularExpression author(QLatin1String("<dc:creator.*>(.*)</dc:creator>"), QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
138 static const QRegularExpression copyright(QLatin1String("<dc:rights.*>(.*)</dc:rights>"), QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
139 static const QRegularExpression subject(QLatin1String("<dc:subject.*>(.*)</dc:subject>"), QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
140 static const QRegularExpression description(QLatin1String("<dc:description.*>(.*)</dc:description>"), QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
141
142 // title could have been already taken from MOBI record
143 if (!metadata.contains(key: Document::Title)) {
144 if (const auto titleMatch = title.match(subject: data); titleMatch.hasMatch()) metadata[Document::Title]=titleMatch.captured(nth: 1);
145 }
146 if (const auto authorMatch = author.match(subject: data); authorMatch.hasMatch()) metadata[Document::Author]=authorMatch.captured(nth: 1);
147 if (const auto copyrightMatch = copyright.match(subject: data); copyrightMatch.hasMatch()) metadata[Document::Copyright]=copyrightMatch.captured(nth: 1);
148 if (const auto subjectMatch = subject.match(subject: data); subjectMatch.hasMatch()) metadata[Document::Subject]=subjectMatch.captured(nth: 1);
149 if (const auto descriptionMatch = description.match(subject: data); descriptionMatch.hasMatch()) metadata[Document::Description]=descriptionMatch.captured(nth: 1);
150
151}
152
153void DocumentPrivate::init()
154{
155 quint32 encoding=0;
156
157 valid=pdb.isValid();
158 if (!valid) return;
159 QByteArray mhead=pdb.getRecord(i: 0);
160 if (mhead.isNull() || mhead.size() <14 ) goto fail;
161 dec = Decompressor::create(type: mhead[1], pdb);
162 if ((int)mhead[12]!=0 || (int)mhead[13]!=0) drm=true;
163 if (!dec) goto fail;
164
165 ntextrecords=(unsigned char)mhead[8];
166 ntextrecords<<=8;
167 ntextrecords+=(unsigned char)mhead[9];
168 maxRecordSize=(unsigned char)mhead[10];
169 maxRecordSize<<=8;
170 maxRecordSize+=(unsigned char)mhead[11];
171 if (mhead.size() > 31 ) encoding=readBELong(data: mhead, offset: 28);
172 if (encoding==65001) codec=QTextCodec::codecForName(name: "UTF-8");
173 else codec=QTextCodec::codecForName(name: "CP1252");
174 if (mhead.size()>176) parseEXTH(data: mhead);
175
176 // try getting metadata from HTML if nothing or only title was recovered from MOBI and EXTH records
177 if (metadata.size()<2 && !drm) parseHtmlHead(data: codec->toUnicode(dec->decompress(data: pdb.getRecord(i: 1))));
178 return;
179fail:
180 valid=false;
181}
182
183void DocumentPrivate::findFirstImage() {
184 firstImageRecord=ntextrecords+1;
185 while (firstImageRecord<pdb.recordCount()) {
186 QByteArray rec=pdb.getRecord(i: firstImageRecord);
187 if (rec.isNull()) return;
188 QBuffer buf(&rec);
189 buf.open(openMode: QIODevice::ReadOnly);
190 QImageReader r(&buf);
191 if (r.canRead()) return;
192 firstImageRecord++;
193 }
194}
195
196QString DocumentPrivate::readEXTHRecord(const QByteArray& data, quint32& offset)
197{
198 quint32 len=readBELong(data,offset);
199 offset+=4;
200 len-=8;
201 QString ret=codec->toUnicode(data.mid(index: offset,len));
202 offset+=len;
203 return ret;
204}
205
206QImage DocumentPrivate::getImageFromRecord(int i)
207{
208 QByteArray rec=pdb.getRecord(i);
209 return (rec.isNull()) ? QImage() : QImage::fromData(data: rec);
210}
211
212
213void DocumentPrivate::parseEXTH(const QByteArray& data)
214{
215 // try to get name
216 if (data.size()>=92) {
217 qint32 nameoffset=readBELong(data,offset: 84);
218 qint32 namelen=readBELong(data,offset: 88);
219 if ( (nameoffset + namelen) < data.size() ) {
220 metadata[Document::Title]=codec->toUnicode(data.mid(index: nameoffset, len: namelen));
221 }
222 }
223
224 quint32 exthoffs=readBELong(data,offset: 20)+16;
225
226 if (data.mid(index: exthoffs,len: 4)!="EXTH") return;
227 quint32 records=readBELong(data,offset: exthoffs+8);
228 quint32 offset=exthoffs+12;
229 for (unsigned int i=0;i<records;i++) {
230 if (offset+4 > quint32(data.size())) break;
231 quint32 type=readBELong(data,offset);
232 offset+=4;
233 switch (type) {
234 case 100: metadata[Document::Author]=readEXTHRecord(data,offset); break;
235 case 103: metadata[Document::Description]=readEXTHRecord(data,offset); break;
236 case 105: metadata[Document::Subject]=readEXTHRecord(data,offset); break;
237 case 109: metadata[Document::Copyright]=readEXTHRecord(data,offset); break;
238 case 202: offset += 4; thumbnailIndex = readBELong(data,offset); offset+=4; break;
239 default: readEXTHRecord(data,offset);
240 }
241 }
242
243
244}
245
246Document::Document(Stream* dev) : d(new DocumentPrivate(dev))
247{
248 d->init();
249}
250
251Document::~Document()
252{
253 delete d;
254}
255
256
257QString Document::text(int size) const
258{
259 QByteArray whole;
260 for (int i=1;i<d->ntextrecords+1;i++) {
261 QByteArray decompressedRecord = d->dec->decompress(data: d->pdb.getRecord(i));
262 if (decompressedRecord.size() > d->maxRecordSize)
263 decompressedRecord.resize(size: d->maxRecordSize);
264 whole+=decompressedRecord;
265 if (!d->dec->isValid()) {
266 d->valid=false;
267 return QString();
268 }
269 if (size!=-1 && whole.size()>size) break;
270 }
271 return d->codec->toUnicode(whole);
272}
273
274int Document::imageCount() const
275{
276 //FIXME: don't count FLIS and FCIS records
277 return d->pdb.recordCount()-d->ntextrecords;
278}
279
280bool Document::isValid() const
281{
282 return d->valid;
283}
284
285QImage Document::getImage(int i) const
286{
287 if (!d->firstImageRecord) d->findFirstImage();
288 return d->getImageFromRecord(i: d->firstImageRecord+i);
289}
290
291QMap<Document::MetaKey,QString> Document::metadata() const
292{
293 return d->metadata;
294}
295
296bool Document::hasDRM() const
297{
298 return d->drm;
299}
300
301QImage Document::thumbnail() const
302{
303 if (!d->firstImageRecord) d->findFirstImage();
304 QImage img=d->getImageFromRecord(i: d->thumbnailIndex+d->firstImageRecord);
305 // does not work, try first image
306 if (img.isNull() && d->thumbnailIndex) {
307 d->thumbnailIndex=0;
308 img=d->getImageFromRecord(i: d->firstImageRecord);
309 }
310 return img;
311}
312
313}
314

source code of kdegraphics-mobipocket/lib/mobipocket.cpp