1 | /* |
2 | SPDX-FileCopyrightText: 2022 Kai Uwe Broulik <kde@broulik.de> |
3 | |
4 | SPDX-License-Identifier: LGPL-2.1-or-later |
5 | */ |
6 | |
7 | #include "datetimeparser_p.h" |
8 | #include "fb2extractor.h" |
9 | #include "kfilemetadata_debug.h" |
10 | |
11 | #include <QDateTime> |
12 | #include <QFile> |
13 | #include <QXmlStreamReader> |
14 | |
15 | #include <KZip> |
16 | |
17 | #include <memory> |
18 | |
19 | using namespace KFileMetaData; |
20 | |
21 | Fb2Extractor::(QObject *parent) |
22 | : ExtractorPlugin(parent) |
23 | { |
24 | } |
25 | |
26 | namespace |
27 | { |
28 | static const QString regularMimeType() |
29 | { |
30 | return QStringLiteral("application/x-fictionbook+xml" ); |
31 | } |
32 | |
33 | static const QString compressedMimeType() |
34 | { |
35 | return QStringLiteral("application/x-zip-compressed-fb2" ); |
36 | } |
37 | |
38 | static const QStringList supportedMimeTypes = {regularMimeType(), compressedMimeType()}; |
39 | |
40 | } |
41 | |
42 | QStringList Fb2Extractor::() const |
43 | { |
44 | return supportedMimeTypes; |
45 | } |
46 | |
47 | void Fb2Extractor::(ExtractionResult *result) |
48 | { |
49 | std::unique_ptr<QIODevice> device; |
50 | std::unique_ptr<KZip> zip; |
51 | |
52 | if (result->inputMimetype() == regularMimeType()) { |
53 | device.reset(p: new QFile(result->inputUrl())); |
54 | if (!device->open(mode: QIODevice::ReadOnly | QIODevice::Text)) { |
55 | return; |
56 | } |
57 | |
58 | } else if (result->inputMimetype() == compressedMimeType()) { |
59 | zip.reset(p: new KZip(result->inputUrl())); |
60 | if (!zip->open(mode: QIODevice::ReadOnly)) { |
61 | qCDebug(KFILEMETADATA_LOG) << "Failed to open" << zip->fileName() << "-" << zip->errorString(); |
62 | return; |
63 | } |
64 | |
65 | const auto entries = zip->directory()->entries(); |
66 | if (entries.count() != 1) { |
67 | return; |
68 | } |
69 | |
70 | const QString entryPath = entries.first(); |
71 | if (!entryPath.endsWith(s: QLatin1String(".fb2" ))) { |
72 | return; |
73 | } |
74 | |
75 | const auto *entry = zip->directory()->file(name: entryPath); |
76 | if (!entry) { |
77 | return; |
78 | } |
79 | |
80 | device.reset(p: entry->createDevice()); |
81 | } |
82 | |
83 | result->addType(type: Type::Document); |
84 | |
85 | QXmlStreamReader xml(device.get()); |
86 | |
87 | bool inFictionBook = false; |
88 | bool inDescription = false; |
89 | bool inTitleInfo = false; |
90 | bool inAuthor = false; |
91 | bool inDocumentInfo = false; |
92 | bool inPublishInfo = false; |
93 | bool inBody = false; |
94 | |
95 | QString authorFirstName; |
96 | QString authorMiddleName; |
97 | QString authorLastName; |
98 | QString authorNickName; |
99 | |
100 | while (!xml.atEnd() && !xml.hasError()) { |
101 | xml.readNext(); |
102 | |
103 | if (xml.name() == QLatin1String("FictionBook" )) { |
104 | if (xml.isStartElement()) { |
105 | inFictionBook = true; |
106 | } else if (xml.isEndElement()) { |
107 | break; |
108 | } |
109 | } else if (xml.name() == QLatin1String("description" )) { |
110 | if (xml.isStartElement()) { |
111 | inDescription = true; |
112 | } else if (xml.isEndElement()) { |
113 | inDescription = false; |
114 | } |
115 | } else if (xml.name() == QLatin1String("title-info" )) { |
116 | if (xml.isStartElement()) { |
117 | inTitleInfo = true; |
118 | } else if (xml.isEndElement()) { |
119 | inTitleInfo = false; |
120 | } |
121 | } else if (xml.name() == QLatin1String("document-info" )) { |
122 | if (xml.isStartElement()) { |
123 | inDocumentInfo = true; |
124 | } else if (xml.isEndElement()) { |
125 | inDocumentInfo = false; |
126 | } |
127 | } else if (xml.name() == QLatin1String("publish-info" )) { |
128 | if (xml.isStartElement()) { |
129 | inPublishInfo = true; |
130 | } else if (xml.isEndElement()) { |
131 | inPublishInfo = false; |
132 | } |
133 | } else if (xml.name() == QLatin1String("body" )) { |
134 | if (xml.isStartElement()) { |
135 | inBody = true; |
136 | } else if (xml.isEndElement()) { |
137 | inBody = false; |
138 | } |
139 | } |
140 | |
141 | if (!inFictionBook) { |
142 | continue; |
143 | } |
144 | |
145 | if (inDescription && result->inputFlags() & ExtractionResult::ExtractMetaData) { |
146 | if (inTitleInfo) { |
147 | if (xml.isStartElement()) { |
148 | if (xml.name() == QLatin1String("author" )) { |
149 | inAuthor = true; |
150 | } else if (inAuthor) { |
151 | if (xml.name() == QLatin1String("first-name" )) { |
152 | authorFirstName = xml.readElementText(); |
153 | } else if (xml.name() == QLatin1String("middle-name" )) { |
154 | authorMiddleName = xml.readElementText(); |
155 | } else if (xml.name() == QLatin1String("last-name" )) { |
156 | authorLastName = xml.readElementText(); |
157 | } else if (xml.name() == QLatin1String("nickname" )) { |
158 | authorNickName = xml.readElementText(); |
159 | } |
160 | } else if (xml.name() == QLatin1String("book-title" )) { |
161 | result->add(property: Property::Title, value: xml.readElementText()); |
162 | } else if (xml.name() == QLatin1String("annotation" )) { |
163 | result->add(property: Property::Description, value: xml.readElementText(behaviour: QXmlStreamReader::IncludeChildElements).trimmed()); |
164 | } else if (xml.name() == QLatin1String("lang" )) { |
165 | result->add(property: Property::Language, value: xml.readElementText()); |
166 | } else if (xml.name() == QLatin1String("genre" )) { |
167 | result->add(property: Property::Genre, value: xml.readElementText()); |
168 | } |
169 | } else if (xml.isEndElement()) { |
170 | inAuthor = false; |
171 | |
172 | QStringList nameParts = {authorFirstName, authorMiddleName, authorLastName}; |
173 | nameParts.removeAll(t: QString()); |
174 | |
175 | if (!nameParts.isEmpty()) { |
176 | result->add(property: Property::Author, value: nameParts.join(sep: QLatin1Char(' '))); |
177 | } else if (!authorNickName.isEmpty()) { |
178 | result->add(property: Property::Author, value: authorNickName); |
179 | } |
180 | |
181 | authorFirstName.clear(); |
182 | authorMiddleName.clear(); |
183 | authorLastName.clear(); |
184 | authorNickName.clear(); |
185 | } |
186 | } else if (inDocumentInfo) { |
187 | if (xml.name() == QLatin1String("date" )) { |
188 | // Date can be "not exact" but date "value", if present, is an xs:date |
189 | const auto dateValue = xml.attributes().value(qualifiedName: QLatin1String("value" )); |
190 | QDateTime dt = QDateTime::fromString(string: dateValue.toString()); |
191 | |
192 | if (!dt.isValid()) { |
193 | dt = Parser::dateTimeFromString(dateString: xml.readElementText()); |
194 | } |
195 | |
196 | if (dt.isValid()) { |
197 | result->add(property: Property::CreationDate, value: dt); |
198 | } |
199 | } else if (xml.name() == QLatin1String("program-used" )) { |
200 | result->add(property: Property::Generator, value: xml.readElementText()); |
201 | // "Owner of the fb2 document copyrights" |
202 | } else if (xml.name() == QLatin1String("publisher" )) { |
203 | result->add(property: Property::Copyright, value: xml.readElementText()); |
204 | } |
205 | } else if (inPublishInfo) { |
206 | if (xml.name() == QLatin1String("publisher" )) { |
207 | result->add(property: Property::Publisher, value: xml.readElementText()); |
208 | } else if (xml.name() == QLatin1String("year" )) { |
209 | bool ok; |
210 | const int releaseYear = xml.readElementText().toInt(ok: &ok); |
211 | if (ok) { |
212 | result->add(property: Property::ReleaseYear, value: releaseYear); |
213 | } |
214 | } |
215 | } |
216 | } else if (inBody && result->inputFlags() & ExtractionResult::ExtractPlainText && xml.isCharacters() && !xml.isWhitespace()) { |
217 | result->append(text: xml.text().toString()); |
218 | } |
219 | } |
220 | } |
221 | |
222 | #include "moc_fb2extractor.cpp" |
223 | |