1// Copyright (C) 2019 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4
5#include <QtXml/qtxmlglobal.h>
6
7#if QT_CONFIG(dom)
8
9#include "qdomhelpers_p.h"
10#include "qdom_p.h"
11#include "qxmlstream.h"
12#include "private/qxmlstream_p.h"
13
14#include <memory>
15#include <stack>
16
17QT_BEGIN_NAMESPACE
18
19using namespace Qt::StringLiterals;
20
21/**************************************************************
22 *
23 * QDomBuilder
24 *
25 **************************************************************/
26
27QDomBuilder::QDomBuilder(QDomDocumentPrivate *d, QXmlStreamReader *r,
28 QDomDocument::ParseOptions options)
29 : doc(d), node(d), reader(r), parseOptions(options)
30{
31 Q_ASSERT(doc);
32 Q_ASSERT(reader);
33}
34
35QDomBuilder::~QDomBuilder() {}
36
37bool QDomBuilder::endDocument()
38{
39 // ### is this really necessary? (rms)
40 if (node != doc)
41 return false;
42 return true;
43}
44
45bool QDomBuilder::startDTD(const QString &name, const QString &publicId, const QString &systemId)
46{
47 doc->doctype()->name = name;
48 doc->doctype()->publicId = publicId;
49 doc->doctype()->systemId = systemId;
50 return true;
51}
52
53QString QDomBuilder::dtdInternalSubset(const QString &dtd)
54{
55 // https://www.w3.org/TR/xml/#NT-intSubset
56 // doctypedecl: '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
57 const QString &name = doc->doctype()->name;
58 QStringView tmp = QStringView(dtd).sliced(pos: dtd.indexOf(s: name) + name.size());
59
60 const QString &publicId = doc->doctype()->publicId;
61 if (!publicId.isEmpty())
62 tmp = tmp.sliced(pos: tmp.indexOf(s: publicId) + publicId.size());
63
64 const QString &systemId = doc->doctype()->systemId;
65 if (!systemId.isEmpty())
66 tmp = tmp.sliced(pos: tmp.indexOf(s: systemId) + systemId.size());
67
68 const qsizetype obra = tmp.indexOf(c: u'[');
69 const qsizetype cbra = tmp.lastIndexOf(c: u']');
70 if (obra >= 0 && cbra >= 0)
71 return tmp.left(n: cbra).sliced(pos: obra + 1).toString();
72
73 return QString();
74}
75
76bool QDomBuilder::parseDTD(const QString &dtd)
77{
78 doc->doctype()->internalSubset = dtdInternalSubset(dtd);
79 return true;
80}
81
82bool QDomBuilder::startElement(const QString &nsURI, const QString &qName,
83 const QXmlStreamAttributes &atts)
84{
85 const bool nsProcessing =
86 parseOptions.testFlag(flag: QDomDocument::ParseOption::UseNamespaceProcessing);
87 QDomNodePrivate *n =
88 nsProcessing ? doc->createElementNS(nsURI, qName) : doc->createElement(tagName: qName);
89 if (!n)
90 return false;
91
92 n->setLocation(lineNumber: int(reader->lineNumber()), columnNumber: int(reader->columnNumber()));
93
94 node->appendChild(newChild: n);
95 node = n;
96
97 // attributes
98 for (const auto &attr : atts) {
99 auto domElement = static_cast<QDomElementPrivate *>(node);
100 if (nsProcessing) {
101 domElement->setAttributeNS(nsURI: attr.namespaceUri().toString(),
102 qName: attr.qualifiedName().toString(),
103 newValue: attr.value().toString());
104 } else {
105 domElement->setAttribute(name: attr.qualifiedName().toString(),
106 value: attr.value().toString());
107 }
108 }
109
110 return true;
111}
112
113bool QDomBuilder::endElement()
114{
115 if (!node || node == doc)
116 return false;
117 node = node->parent();
118
119 return true;
120}
121
122bool QDomBuilder::characters(const QString &characters, bool cdata)
123{
124 // No text as child of some document
125 if (node == doc)
126 return false;
127
128 std::unique_ptr<QDomNodePrivate> n;
129 if (cdata) {
130 n.reset(p: doc->createCDATASection(data: characters));
131 } else if (!entityName.isEmpty()) {
132 auto e = std::make_unique<QDomEntityPrivate>(
133 args&: doc, args: nullptr, args&: entityName, args: QString(), args: QString(), args: QString());
134 e->value = characters;
135 e->ref.deref();
136 doc->doctype()->appendChild(newChild: e.get());
137 Q_UNUSED(e.release());
138 n.reset(p: doc->createEntityReference(name: entityName));
139 } else {
140 n.reset(p: doc->createTextNode(data: characters));
141 }
142 n->setLocation(lineNumber: int(reader->lineNumber()), columnNumber: int(reader->columnNumber()));
143 node->appendChild(newChild: n.get());
144 Q_UNUSED(n.release());
145
146 return true;
147}
148
149bool QDomBuilder::processingInstruction(const QString &target, const QString &data)
150{
151 QDomNodePrivate *n;
152 n = doc->createProcessingInstruction(target, data);
153 if (n) {
154 n->setLocation(lineNumber: int(reader->lineNumber()), columnNumber: int(reader->columnNumber()));
155 node->appendChild(newChild: n);
156 return true;
157 } else
158 return false;
159}
160
161bool QDomBuilder::skippedEntity(const QString &name)
162{
163 QDomNodePrivate *n = doc->createEntityReference(name);
164 n->setLocation(lineNumber: int(reader->lineNumber()), columnNumber: int(reader->columnNumber()));
165 node->appendChild(newChild: n);
166 return true;
167}
168
169void QDomBuilder::fatalError(const QString &message)
170{
171 parseResult.errorMessage = message;
172 parseResult.errorLine = reader->lineNumber();
173 parseResult.errorColumn = reader->columnNumber();
174}
175
176bool QDomBuilder::startEntity(const QString &name)
177{
178 entityName = name;
179 return true;
180}
181
182bool QDomBuilder::endEntity()
183{
184 entityName.clear();
185 return true;
186}
187
188bool QDomBuilder::comment(const QString &characters)
189{
190 QDomNodePrivate *n;
191 n = doc->createComment(data: characters);
192 n->setLocation(lineNumber: int(reader->lineNumber()), columnNumber: int(reader->columnNumber()));
193 node->appendChild(newChild: n);
194 return true;
195}
196
197bool QDomBuilder::unparsedEntityDecl(const QString &name, const QString &publicId,
198 const QString &systemId, const QString &notationName)
199{
200 QDomEntityPrivate *e =
201 new QDomEntityPrivate(doc, nullptr, name, publicId, systemId, notationName);
202 // keep the refcount balanced: appendChild() does a ref anyway.
203 e->ref.deref();
204 doc->doctype()->appendChild(newChild: e);
205 return true;
206}
207
208bool QDomBuilder::externalEntityDecl(const QString &name, const QString &publicId,
209 const QString &systemId)
210{
211 return unparsedEntityDecl(name, publicId, systemId, notationName: QString());
212}
213
214bool QDomBuilder::notationDecl(const QString &name, const QString &publicId,
215 const QString &systemId)
216{
217 QDomNotationPrivate *n = new QDomNotationPrivate(doc, nullptr, name, publicId, systemId);
218 // keep the refcount balanced: appendChild() does a ref anyway.
219 n->ref.deref();
220 doc->doctype()->appendChild(newChild: n);
221 return true;
222}
223
224/**************************************************************
225 *
226 * QDomParser
227 *
228 **************************************************************/
229
230QDomParser::QDomParser(QDomDocumentPrivate *d, QXmlStreamReader *r,
231 QDomDocument::ParseOptions options)
232 : reader(r), domBuilder(d, r, options)
233{
234}
235
236bool QDomParser::parse()
237{
238 return parseProlog() && parseBody();
239}
240
241bool QDomParser::parseProlog()
242{
243 Q_ASSERT(reader);
244
245 bool foundDtd = false;
246
247 while (!reader->atEnd()) {
248 reader->readNext();
249
250 if (reader->hasError()) {
251 domBuilder.fatalError(message: reader->errorString());
252 return false;
253 }
254
255 switch (reader->tokenType()) {
256 case QXmlStreamReader::StartDocument:
257 if (!reader->documentVersion().isEmpty()) {
258 QString value(u"version='"_s);
259 value += reader->documentVersion();
260 value += u'\'';
261 if (!reader->documentEncoding().isEmpty()) {
262 value += u" encoding='"_s;
263 value += reader->documentEncoding();
264 value += u'\'';
265 }
266 if (reader->isStandaloneDocument()) {
267 value += u" standalone='yes'"_s;
268 } else {
269 // Add the standalone attribute only if it was specified
270 if (reader->hasStandaloneDeclaration())
271 value += u" standalone='no'"_s;
272 }
273
274 if (!domBuilder.processingInstruction(target: u"xml"_s, data: value)) {
275 domBuilder.fatalError(
276 message: QDomParser::tr(sourceText: "Error occurred while processing XML declaration"));
277 return false;
278 }
279 }
280 break;
281 case QXmlStreamReader::DTD:
282 if (foundDtd) {
283 domBuilder.fatalError(message: QDomParser::tr(sourceText: "Multiple DTD sections are not allowed"));
284 return false;
285 }
286 foundDtd = true;
287
288 if (!domBuilder.startDTD(name: reader->dtdName().toString(),
289 publicId: reader->dtdPublicId().toString(),
290 systemId: reader->dtdSystemId().toString())) {
291 domBuilder.fatalError(
292 message: QDomParser::tr(sourceText: "Error occurred while processing document type declaration"));
293 return false;
294 }
295 if (!domBuilder.parseDTD(dtd: reader->text().toString()))
296 return false;
297 if (!parseMarkupDecl())
298 return false;
299 break;
300 case QXmlStreamReader::Comment:
301 if (!domBuilder.comment(characters: reader->text().toString())) {
302 domBuilder.fatalError(message: QDomParser::tr(sourceText: "Error occurred while processing comment"));
303 return false;
304 }
305 break;
306 case QXmlStreamReader::ProcessingInstruction:
307 if (!domBuilder.processingInstruction(target: reader->processingInstructionTarget().toString(),
308 data: reader->processingInstructionData().toString())) {
309 domBuilder.fatalError(
310 message: QDomParser::tr(sourceText: "Error occurred while processing a processing instruction"));
311 return false;
312 }
313 break;
314 default:
315 // If the token is none of the above, prolog processing is done.
316 return true;
317 }
318 }
319
320 return true;
321}
322
323bool QDomParser::parseBody()
324{
325 Q_ASSERT(reader);
326
327 std::stack<QString> tagStack;
328 while (!reader->atEnd() && !reader->hasError()) {
329 switch (reader->tokenType()) {
330 case QXmlStreamReader::StartElement:
331 tagStack.push(x: reader->qualifiedName().toString());
332 if (!domBuilder.startElement(nsURI: reader->namespaceUri().toString(),
333 qName: reader->qualifiedName().toString(),
334 atts: reader->attributes())) {
335 domBuilder.fatalError(
336 message: QDomParser::tr(sourceText: "Error occurred while processing a start element"));
337 return false;
338 }
339 break;
340 case QXmlStreamReader::EndElement:
341 if (tagStack.empty() || reader->qualifiedName() != tagStack.top()) {
342 domBuilder.fatalError(
343 message: QDomParser::tr(sourceText: "Unexpected end element '%1'").arg(a: reader->name()));
344 return false;
345 }
346 tagStack.pop();
347 if (!domBuilder.endElement()) {
348 domBuilder.fatalError(
349 message: QDomParser::tr(sourceText: "Error occurred while processing an end element"));
350 return false;
351 }
352 break;
353 case QXmlStreamReader::Characters:
354 // Skip the content if it contains only spacing characters,
355 // unless it's CDATA or PreserveSpacingOnlyNodes was specified.
356 if (reader->isCDATA() || domBuilder.preserveSpacingOnlyNodes()
357 || !(reader->isWhitespace() || reader->text().trimmed().isEmpty())) {
358 if (!domBuilder.characters(characters: reader->text().toString(), cdata: reader->isCDATA())) {
359 domBuilder.fatalError(
360 message: QDomParser::tr(sourceText: "Error occurred while processing the element content"));
361 return false;
362 }
363 }
364 break;
365 case QXmlStreamReader::Comment:
366 if (!domBuilder.comment(characters: reader->text().toString())) {
367 domBuilder.fatalError(message: QDomParser::tr(sourceText: "Error occurred while processing comments"));
368 return false;
369 }
370 break;
371 case QXmlStreamReader::ProcessingInstruction:
372 if (!domBuilder.processingInstruction(target: reader->processingInstructionTarget().toString(),
373 data: reader->processingInstructionData().toString())) {
374 domBuilder.fatalError(
375 message: QDomParser::tr(sourceText: "Error occurred while processing a processing instruction"));
376 return false;
377 }
378 break;
379 case QXmlStreamReader::EntityReference:
380 if (!domBuilder.skippedEntity(name: reader->name().toString())) {
381 domBuilder.fatalError(
382 message: QDomParser::tr(sourceText: "Error occurred while processing an entity reference"));
383 return false;
384 }
385 break;
386 default:
387 domBuilder.fatalError(message: QDomParser::tr(sourceText: "Unexpected token"));
388 return false;
389 }
390
391 reader->readNext();
392 }
393
394 if (reader->hasError()) {
395 domBuilder.fatalError(message: reader->errorString());
396 reader->readNext();
397 return false;
398 }
399
400 if (!tagStack.empty()) {
401 domBuilder.fatalError(message: QDomParser::tr(sourceText: "Tag mismatch"));
402 return false;
403 }
404
405 return true;
406}
407
408bool QDomParser::parseMarkupDecl()
409{
410 Q_ASSERT(reader);
411
412 const auto entities = reader->entityDeclarations();
413 for (const auto &entityDecl : entities) {
414 // Entity declarations are created only for External Entities. Internal Entities
415 // are parsed, and QXmlStreamReader handles the parsing itself and returns the
416 // parsed result. So we don't need to do anything for the Internal Entities.
417 if (!entityDecl.publicId().isEmpty() || !entityDecl.systemId().isEmpty()) {
418 // External Entity
419 if (!domBuilder.unparsedEntityDecl(name: entityDecl.name().toString(),
420 publicId: entityDecl.publicId().toString(),
421 systemId: entityDecl.systemId().toString(),
422 notationName: entityDecl.notationName().toString())) {
423 domBuilder.fatalError(
424 message: QDomParser::tr(sourceText: "Error occurred while processing entity declaration"));
425 return false;
426 }
427 }
428 }
429
430 const auto notations = reader->notationDeclarations();
431 for (const auto &notationDecl : notations) {
432 if (!domBuilder.notationDecl(name: notationDecl.name().toString(),
433 publicId: notationDecl.publicId().toString(),
434 systemId: notationDecl.systemId().toString())) {
435 domBuilder.fatalError(
436 message: QDomParser::tr(sourceText: "Error occurred while processing notation declaration"));
437 return false;
438 }
439 }
440
441 return true;
442}
443
444QT_END_NAMESPACE
445
446#endif // feature dom
447

source code of qtbase/src/xml/dom/qdomhelpers.cpp