1/*
2 SPDX-FileCopyrightText: 2018 Stefan BrĂ¼ns <stefan.bruens@rwth-aachen.de>
3
4 SPDX-License-Identifier: LGPL-2.1-or-later
5*/
6
7
8#include "xmlextractor.h"
9#include "kfilemetadata_debug.h"
10#include "dublincoreextractor.h"
11
12#include <QDomDocument>
13#include <QFile>
14#include <QXmlStreamReader>
15
16#ifdef SVG_XML_COMPRESSED_SUPPORT
17#include <KCompressionDevice>
18#endif
19
20namespace {
21
22//inline QString dcElementNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); }
23inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg"); }
24inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); }
25inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#"); }
26
27void extractSvgText(KFileMetaData::ExtractionResult* result, const QDomElement &node)
28{
29 if (node.namespaceURI() != svgNS()) {
30 return;
31 }
32
33 if ((node.localName() == QLatin1String("g")) ||
34 (node.localName() == QLatin1String("a"))) {
35 QDomElement e = node.firstChildElement();
36 for (; !e.isNull(); e = e.nextSiblingElement()) {
37 extractSvgText(result, node: e);
38 }
39 } else if (node.localName() == QLatin1String("text")) {
40 qCDebug(KFILEMETADATA_LOG) << node.text();
41 result->append(text: node.text());
42 }
43}
44
45static const QStringList supportedMimeTypes = {
46 QStringLiteral("application/xml"),
47 QStringLiteral("image/svg+xml"),
48 QStringLiteral("image/svg+xml-compressed"),
49 QStringLiteral("image/svg"),
50};
51
52}
53
54namespace KFileMetaData
55{
56
57XmlExtractor::XmlExtractor(QObject* parent)
58 : ExtractorPlugin(parent)
59{
60
61}
62
63QStringList XmlExtractor::mimetypes() const
64{
65 return supportedMimeTypes;
66}
67
68void XmlExtractor::extract(ExtractionResult* result)
69{
70 auto flags = result->inputFlags();
71
72 QFile file(result->inputUrl());
73 if (!file.open(flags: QIODevice::ReadOnly)) {
74 qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file";
75 return;
76 }
77
78
79 if ((result->inputMimetype() == QLatin1String("image/svg")) ||
80 (result->inputMimetype() == QLatin1String("image/svg+xml-compressed")) ||
81 (result->inputMimetype() == QLatin1String("image/svg+xml"))) {
82
83 result->addType(type: Type::Image);
84
85 QIODevice *ioDevice = &file;
86#ifdef SVG_XML_COMPRESSED_SUPPORT
87 std::unique_ptr<KCompressionDevice> gzReader;
88 if (result->inputMimetype() == QLatin1String("image/svg+xml-compressed")) {
89 gzReader.reset(p: new KCompressionDevice(&file, false, KCompressionDevice::CompressionType::GZip));
90 if (!gzReader->open(mode: QIODevice::ReadOnly)) {
91 qCDebug(KFILEMETADATA_LOG) << "Failed to open" << result->inputUrl() << "-" << gzReader->errorString();
92 return;
93 }
94 ioDevice = gzReader.get();
95 }
96#else
97 if (result->inputMimetype() == QLatin1String("image/svg+xml-compressed")) {
98 return;
99 }
100#endif
101
102 QDomDocument doc;
103 doc.setContent(device: ioDevice, options: QDomDocument::ParseOption::UseNamespaceProcessing);
104 QDomElement svg = doc.firstChildElement();
105
106 if (!svg.isNull()
107 && svg.localName() == QLatin1String("svg")
108 && svg.namespaceURI() == svgNS()) {
109
110 QDomElement e = svg.firstChildElement();
111 for (; !e.isNull(); e = e.nextSiblingElement()) {
112 if (e.namespaceURI() != svgNS()) {
113 continue;
114 }
115
116 if (e.localName() == QLatin1String("metadata")) {
117 if (!(flags & ExtractionResult::ExtractMetaData)) {
118 continue;
119 }
120
121 auto rdf = e.firstChildElement(tagName: QLatin1String("RDF"));
122 if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) {
123 continue;
124 }
125
126 auto cc = rdf.firstChildElement(tagName: QLatin1String("Work"));
127 if (cc.isNull() || cc.namespaceURI() != ccNS()) {
128 continue;
129 }
130
131 DublinCoreExtractor::extract(result, fragment: cc);
132
133 } else if (e.localName() == QLatin1String("defs")) {
134 // skip
135 continue;
136 } else if (flags & ExtractionResult::ExtractPlainText) {
137 // extract
138 extractSvgText(result, node: e);
139 }
140 }
141 }
142 } else {
143 result->addType(type: Type::Text);
144
145 if (flags & ExtractionResult::ExtractPlainText) {
146 QXmlStreamReader stream(&file);
147 while (!stream.atEnd()) {
148 QXmlStreamReader::TokenType token = stream.readNext();
149
150 if (token == QXmlStreamReader::Characters) {
151 QString text = stream.text().trimmed().toString();
152 if (!text.isEmpty()) {
153 result->append(text);
154 }
155 }
156 }
157 }
158 }
159}
160
161} // namespace KFileMetaData
162
163#include "moc_xmlextractor.cpp"
164

source code of kfilemetadata/src/extractors/xmlextractor.cpp