1/*
2 SPDX-FileCopyrightText: 2022 Kai Uwe Broulik <kde@broulik.de>
3
4 SPDX-License-Identifier: LGPL-2.1-or-later
5*/
6
7#include "datetimeparser_p.h"
8#include "fb2extractor.h"
9#include "kfilemetadata_debug.h"
10
11#include <QDateTime>
12#include <QFile>
13#include <QXmlStreamReader>
14
15#include <KZip>
16
17#include <memory>
18
19using namespace KFileMetaData;
20
21Fb2Extractor::Fb2Extractor(QObject *parent)
22 : ExtractorPlugin(parent)
23{
24}
25
26namespace
27{
28static const QString regularMimeType()
29{
30 return QStringLiteral("application/x-fictionbook+xml");
31}
32
33static const QString compressedMimeType()
34{
35 return QStringLiteral("application/x-zip-compressed-fb2");
36}
37
38static const QStringList supportedMimeTypes = {regularMimeType(), compressedMimeType()};
39
40}
41
42QStringList Fb2Extractor::mimetypes() const
43{
44 return supportedMimeTypes;
45}
46
47void Fb2Extractor::extract(ExtractionResult *result)
48{
49 std::unique_ptr<QIODevice> device;
50 std::unique_ptr<KZip> zip;
51
52 if (result->inputMimetype() == regularMimeType()) {
53 device.reset(p: new QFile(result->inputUrl()));
54 if (!device->open(mode: QIODevice::ReadOnly | QIODevice::Text)) {
55 return;
56 }
57
58 } else if (result->inputMimetype() == compressedMimeType()) {
59 zip.reset(p: new KZip(result->inputUrl()));
60 if (!zip->open(mode: QIODevice::ReadOnly)) {
61 qCDebug(KFILEMETADATA_LOG) << "Failed to open" << zip->fileName() << "-" << zip->errorString();
62 return;
63 }
64
65 const auto entries = zip->directory()->entries();
66 if (entries.count() != 1) {
67 return;
68 }
69
70 const QString entryPath = entries.first();
71 if (!entryPath.endsWith(s: QLatin1String(".fb2"))) {
72 return;
73 }
74
75 const auto *entry = zip->directory()->file(name: entryPath);
76 if (!entry) {
77 return;
78 }
79
80 device.reset(p: entry->createDevice());
81 }
82
83 result->addType(type: Type::Document);
84
85 QXmlStreamReader xml(device.get());
86
87 bool inFictionBook = false;
88 bool inDescription = false;
89 bool inTitleInfo = false;
90 bool inAuthor = false;
91 bool inDocumentInfo = false;
92 bool inPublishInfo = false;
93 bool inBody = false;
94
95 QString authorFirstName;
96 QString authorMiddleName;
97 QString authorLastName;
98 QString authorNickName;
99
100 while (!xml.atEnd() && !xml.hasError()) {
101 xml.readNext();
102
103 if (xml.name() == QLatin1String("FictionBook")) {
104 if (xml.isStartElement()) {
105 inFictionBook = true;
106 } else if (xml.isEndElement()) {
107 break;
108 }
109 } else if (xml.name() == QLatin1String("description")) {
110 if (xml.isStartElement()) {
111 inDescription = true;
112 } else if (xml.isEndElement()) {
113 inDescription = false;
114 }
115 } else if (xml.name() == QLatin1String("title-info")) {
116 if (xml.isStartElement()) {
117 inTitleInfo = true;
118 } else if (xml.isEndElement()) {
119 inTitleInfo = false;
120 }
121 } else if (xml.name() == QLatin1String("document-info")) {
122 if (xml.isStartElement()) {
123 inDocumentInfo = true;
124 } else if (xml.isEndElement()) {
125 inDocumentInfo = false;
126 }
127 } else if (xml.name() == QLatin1String("publish-info")) {
128 if (xml.isStartElement()) {
129 inPublishInfo = true;
130 } else if (xml.isEndElement()) {
131 inPublishInfo = false;
132 }
133 } else if (xml.name() == QLatin1String("body")) {
134 if (xml.isStartElement()) {
135 inBody = true;
136 } else if (xml.isEndElement()) {
137 inBody = false;
138 }
139 }
140
141 if (!inFictionBook) {
142 continue;
143 }
144
145 if (inDescription && result->inputFlags() & ExtractionResult::ExtractMetaData) {
146 if (inTitleInfo) {
147 if (xml.isStartElement()) {
148 if (xml.name() == QLatin1String("author")) {
149 inAuthor = true;
150 } else if (inAuthor) {
151 if (xml.name() == QLatin1String("first-name")) {
152 authorFirstName = xml.readElementText();
153 } else if (xml.name() == QLatin1String("middle-name")) {
154 authorMiddleName = xml.readElementText();
155 } else if (xml.name() == QLatin1String("last-name")) {
156 authorLastName = xml.readElementText();
157 } else if (xml.name() == QLatin1String("nickname")) {
158 authorNickName = xml.readElementText();
159 }
160 } else if (xml.name() == QLatin1String("book-title")) {
161 result->add(property: Property::Title, value: xml.readElementText());
162 } else if (xml.name() == QLatin1String("annotation")) {
163 result->add(property: Property::Description, value: xml.readElementText(behaviour: QXmlStreamReader::IncludeChildElements).trimmed());
164 } else if (xml.name() == QLatin1String("lang")) {
165 result->add(property: Property::Language, value: xml.readElementText());
166 } else if (xml.name() == QLatin1String("genre")) {
167 result->add(property: Property::Genre, value: xml.readElementText());
168 }
169 } else if (xml.isEndElement()) {
170 inAuthor = false;
171
172 QStringList nameParts = {authorFirstName, authorMiddleName, authorLastName};
173 nameParts.removeAll(t: QString());
174
175 if (!nameParts.isEmpty()) {
176 result->add(property: Property::Author, value: nameParts.join(sep: QLatin1Char(' ')));
177 } else if (!authorNickName.isEmpty()) {
178 result->add(property: Property::Author, value: authorNickName);
179 }
180
181 authorFirstName.clear();
182 authorMiddleName.clear();
183 authorLastName.clear();
184 authorNickName.clear();
185 }
186 } else if (inDocumentInfo) {
187 if (xml.name() == QLatin1String("date")) {
188 // Date can be "not exact" but date "value", if present, is an xs:date
189 const auto dateValue = xml.attributes().value(qualifiedName: QLatin1String("value"));
190 QDateTime dt = QDateTime::fromString(string: dateValue.toString());
191
192 if (!dt.isValid()) {
193 dt = Parser::dateTimeFromString(dateString: xml.readElementText());
194 }
195
196 if (dt.isValid()) {
197 result->add(property: Property::CreationDate, value: dt);
198 }
199 } else if (xml.name() == QLatin1String("program-used")) {
200 result->add(property: Property::Generator, value: xml.readElementText());
201 // "Owner of the fb2 document copyrights"
202 } else if (xml.name() == QLatin1String("publisher")) {
203 result->add(property: Property::Copyright, value: xml.readElementText());
204 }
205 } else if (inPublishInfo) {
206 if (xml.name() == QLatin1String("publisher")) {
207 result->add(property: Property::Publisher, value: xml.readElementText());
208 } else if (xml.name() == QLatin1String("year")) {
209 bool ok;
210 const int releaseYear = xml.readElementText().toInt(ok: &ok);
211 if (ok) {
212 result->add(property: Property::ReleaseYear, value: releaseYear);
213 }
214 }
215 }
216 } else if (inBody && result->inputFlags() & ExtractionResult::ExtractPlainText && xml.isCharacters() && !xml.isWhitespace()) {
217 result->append(text: xml.text().toString());
218 }
219 }
220}
221
222#include "moc_fb2extractor.cpp"
223

source code of kfilemetadata/src/extractors/fb2extractor.cpp