1 | /* |
2 | SPDX-FileCopyrightText: 2018 Stefan BrĂ¼ns <stefan.bruens@rwth-aachen.de> |
3 | |
4 | SPDX-License-Identifier: LGPL-2.1-or-later |
5 | */ |
6 | |
7 | |
8 | #include "xmlextractor.h" |
9 | #include "kfilemetadata_debug.h" |
10 | #include "dublincoreextractor.h" |
11 | |
12 | #include <QDomDocument> |
13 | #include <QFile> |
14 | #include <QXmlStreamReader> |
15 | |
16 | #ifdef SVG_XML_COMPRESSED_SUPPORT |
17 | #include <KCompressionDevice> |
18 | #endif |
19 | |
20 | namespace { |
21 | |
22 | //inline QString dcElementNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); } |
23 | inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg" ); } |
24 | inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#" ); } |
25 | inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#" ); } |
26 | |
27 | void (KFileMetaData::ExtractionResult* result, const QDomElement &node) |
28 | { |
29 | if (node.namespaceURI() != svgNS()) { |
30 | return; |
31 | } |
32 | |
33 | if ((node.localName() == QLatin1String("g" )) || |
34 | (node.localName() == QLatin1String("a" ))) { |
35 | QDomElement e = node.firstChildElement(); |
36 | for (; !e.isNull(); e = e.nextSiblingElement()) { |
37 | extractSvgText(result, node: e); |
38 | } |
39 | } else if (node.localName() == QLatin1String("text" )) { |
40 | qCDebug(KFILEMETADATA_LOG) << node.text(); |
41 | result->append(text: node.text()); |
42 | } |
43 | } |
44 | |
45 | static const QStringList supportedMimeTypes = { |
46 | QStringLiteral("application/xml" ), |
47 | QStringLiteral("image/svg+xml" ), |
48 | QStringLiteral("image/svg+xml-compressed" ), |
49 | QStringLiteral("image/svg" ), |
50 | }; |
51 | |
52 | } |
53 | |
54 | namespace KFileMetaData |
55 | { |
56 | |
57 | XmlExtractor::(QObject* parent) |
58 | : ExtractorPlugin(parent) |
59 | { |
60 | |
61 | } |
62 | |
63 | QStringList XmlExtractor::() const |
64 | { |
65 | return supportedMimeTypes; |
66 | } |
67 | |
68 | void XmlExtractor::(ExtractionResult* result) |
69 | { |
70 | auto flags = result->inputFlags(); |
71 | |
72 | QFile file(result->inputUrl()); |
73 | if (!file.open(flags: QIODevice::ReadOnly)) { |
74 | qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file" ; |
75 | return; |
76 | } |
77 | |
78 | |
79 | if ((result->inputMimetype() == QLatin1String("image/svg" )) || |
80 | (result->inputMimetype() == QLatin1String("image/svg+xml-compressed" )) || |
81 | (result->inputMimetype() == QLatin1String("image/svg+xml" ))) { |
82 | |
83 | result->addType(type: Type::Image); |
84 | |
85 | QIODevice *ioDevice = &file; |
86 | #ifdef SVG_XML_COMPRESSED_SUPPORT |
87 | std::unique_ptr<KCompressionDevice> gzReader; |
88 | if (result->inputMimetype() == QLatin1String("image/svg+xml-compressed" )) { |
89 | gzReader.reset(p: new KCompressionDevice(&file, false, KCompressionDevice::CompressionType::GZip)); |
90 | if (!gzReader->open(mode: QIODevice::ReadOnly)) { |
91 | qCDebug(KFILEMETADATA_LOG) << "Failed to open" << result->inputUrl() << "-" << gzReader->errorString(); |
92 | return; |
93 | } |
94 | ioDevice = gzReader.get(); |
95 | } |
96 | #else |
97 | if (result->inputMimetype() == QLatin1String("image/svg+xml-compressed" )) { |
98 | return; |
99 | } |
100 | #endif |
101 | |
102 | QDomDocument doc; |
103 | doc.setContent(device: ioDevice, options: QDomDocument::ParseOption::UseNamespaceProcessing); |
104 | QDomElement svg = doc.firstChildElement(); |
105 | |
106 | if (!svg.isNull() |
107 | && svg.localName() == QLatin1String("svg" ) |
108 | && svg.namespaceURI() == svgNS()) { |
109 | |
110 | QDomElement e = svg.firstChildElement(); |
111 | for (; !e.isNull(); e = e.nextSiblingElement()) { |
112 | if (e.namespaceURI() != svgNS()) { |
113 | continue; |
114 | } |
115 | |
116 | if (e.localName() == QLatin1String("metadata" )) { |
117 | if (!(flags & ExtractionResult::ExtractMetaData)) { |
118 | continue; |
119 | } |
120 | |
121 | auto rdf = e.firstChildElement(tagName: QLatin1String("RDF" )); |
122 | if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) { |
123 | continue; |
124 | } |
125 | |
126 | auto cc = rdf.firstChildElement(tagName: QLatin1String("Work" )); |
127 | if (cc.isNull() || cc.namespaceURI() != ccNS()) { |
128 | continue; |
129 | } |
130 | |
131 | DublinCoreExtractor::extract(result, fragment: cc); |
132 | |
133 | } else if (e.localName() == QLatin1String("defs" )) { |
134 | // skip |
135 | continue; |
136 | } else if (flags & ExtractionResult::ExtractPlainText) { |
137 | // extract |
138 | extractSvgText(result, node: e); |
139 | } |
140 | } |
141 | } |
142 | } else { |
143 | result->addType(type: Type::Text); |
144 | |
145 | if (flags & ExtractionResult::ExtractPlainText) { |
146 | QXmlStreamReader stream(&file); |
147 | while (!stream.atEnd()) { |
148 | QXmlStreamReader::TokenType token = stream.readNext(); |
149 | |
150 | if (token == QXmlStreamReader::Characters) { |
151 | QString text = stream.text().trimmed().toString(); |
152 | if (!text.isEmpty()) { |
153 | result->append(text); |
154 | } |
155 | } |
156 | } |
157 | } |
158 | } |
159 | } |
160 | |
161 | } // namespace KFileMetaData |
162 | |
163 | #include "moc_xmlextractor.cpp" |
164 | |