1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the test suite of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:GPL-EXCEPT$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT |
21 | ** included in the packaging of this file. Please review the following |
22 | ** information to ensure the GNU General Public License requirements will |
23 | ** be met: https://www.gnu.org/licenses/gpl-3.0.html. |
24 | ** |
25 | ** $QT_END_LICENSE$ |
26 | ** |
27 | ****************************************************************************/ |
28 | |
29 | #include <QCoreApplication> |
30 | #include <QDateTime> |
31 | #include <QIODevice> |
32 | #include <QList> |
33 | #include <QPair> |
34 | #include <QStack> |
35 | #include <QtDebug> |
36 | |
37 | #include "XMLWriter.h" |
38 | |
39 | /* Issues: |
40 | * - Switch to Qt's d-pointer semantics, if in Qt. |
41 | * - Remove namespace(PatternistSDK), and change name, if in Qt. |
42 | * - Is it really necessary to pass the tag name to endElement()? |
43 | * - Could it be of interest to let the user control the encoding? Are those cases common |
44 | * enough to justify support in Qt? Using anything but UTF-8 or UTF-16 |
45 | * means asking for trouble, from an interoperability perspective. |
46 | */ |
47 | |
48 | /* Design rationalis, comments: |
49 | * |
50 | * - The class is called XMLWriter to harvest familiarity by being consistent with |
51 | * Java's XMLWriter class. If XMLWriter is moved to Qt, the name QXmlWriter is perhaps suitable. |
52 | * - The class does not handle indentation because the "do one thing well"-principle is |
53 | * in use. XMLWriter should be fast and not assume a certain idea of indentation. Indentation |
54 | * should be implemented in a standalone QXmlContentHandler that performs the indentation and |
55 | * "has a" QXmlContentHandler which it in addition calls, and by that proxying/piping another |
56 | * QXmlContentHandler(which most likely is an XMLWriter). Thus, achieving a modularized, |
57 | * flexibly approach to indentation. A reason is also that indentation is very subjective. |
58 | * The indenter class should probably be called XMLIndenter/QXmlIndenter. |
59 | * - It could be of interest to implement QXmlDTDHandler such that it would be possible to serialize |
60 | * DTDs. Must be done before BC becomes significant. |
61 | * - I think the most valuable of this class is its Q_ASSERT tests. Many programmers have severe problems |
62 | * producing XML, and the tests helps them catching their mistakes. They therefore promote |
63 | * interoperability. Do not remove them. If any are wrong, fix them instead. |
64 | */ |
65 | |
66 | using namespace QPatternistSDK; |
67 | |
68 | /** |
69 | * A namespace binding, prefix/namespace URI. |
70 | */ |
71 | typedef QPair<QString, QString> NSBinding; |
72 | typedef QList<NSBinding> NSBindingList; |
73 | |
74 | class XMLWriter::Private |
75 | { |
76 | public: |
77 | inline Private(QIODevice *devP) : insideCDATA(false), |
78 | addModificationNote(false), |
79 | dev(devP) |
80 | { |
81 | hasContentStack.push(t: true); |
82 | } |
83 | |
84 | #ifdef QT_NO_DEBUG |
85 | inline void validateQName(const QString &) const |
86 | { |
87 | } |
88 | |
89 | inline void verifyNS(const QString &) const |
90 | { |
91 | } |
92 | #else |
93 | /** |
94 | * Simple test of that @p name is an acceptable QName. |
95 | */ |
96 | inline void validateQName(const QString &name) |
97 | { |
98 | Q_ASSERT_X(!name.isEmpty(), Q_FUNC_INFO, |
99 | "An XML name cannot be empty." ); |
100 | Q_ASSERT_X(!name.endsWith(QLatin1Char(':')), Q_FUNC_INFO, |
101 | "An XML name cannot end with a colon(QLatin1Char(':'))." ); |
102 | Q_ASSERT_X(!name.contains(QRegExp(QLatin1String("[ \t\n]" ))), Q_FUNC_INFO, |
103 | "An XML name cannot contain whitespace." ); |
104 | } |
105 | |
106 | /** |
107 | * Ensures that the prefix of @p qName is declared. |
108 | */ |
109 | inline void verifyNS(const QString &qName) const |
110 | { |
111 | const QString prefix(qName.left(n: qName.indexOf(c: QLatin1Char(':')))); |
112 | |
113 | if(qName.contains(c: QLatin1Char(':')) && prefix != QLatin1String("xml" )) |
114 | { |
115 | bool foundPrefix = false; |
116 | const QStack<NSBindingList>::const_iterator end(namespaceTracker.constEnd()); |
117 | QStack<NSBindingList>::const_iterator it(namespaceTracker.constBegin()); |
118 | |
119 | for(; it != end; ++it) |
120 | { |
121 | const NSBindingList::const_iterator lend((*it).constEnd()); |
122 | NSBindingList::const_iterator lit((*it).constBegin()); |
123 | |
124 | for(; lit != lend; ++it) |
125 | { |
126 | if((*lit).first == prefix) |
127 | { |
128 | foundPrefix = true; |
129 | break; |
130 | } |
131 | } |
132 | if(foundPrefix) |
133 | break; |
134 | } |
135 | |
136 | Q_ASSERT_X(foundPrefix, "XMLWriter::startElement()" , |
137 | qPrintable(QString::fromLatin1("The prefix %1 is not declared. All prefixes " |
138 | "except 'xml' must be declared." ).arg(prefix))); |
139 | } |
140 | } |
141 | #endif |
142 | |
143 | inline QString escapeElementContent(const QString &ch) |
144 | { |
145 | const int l = ch.length(); |
146 | QString retval; |
147 | |
148 | for(int i = 0; i != l; ++i) |
149 | { |
150 | const QChar c(ch.at(i)); |
151 | |
152 | if(c == QLatin1Char(QLatin1Char('&'))) |
153 | retval += QLatin1String("&" ); |
154 | else if(c == QLatin1Char(QLatin1Char('<'))) |
155 | retval += QLatin1String("<" ); |
156 | else |
157 | retval += c; |
158 | } |
159 | |
160 | return retval; |
161 | } |
162 | |
163 | inline QString escapeAttributeContent(const QString &ch) |
164 | { |
165 | const int l = ch.length(); |
166 | QString retval; |
167 | |
168 | for(int i = 0; i != l; ++i) |
169 | { |
170 | const QChar c(ch.at(i)); |
171 | |
172 | /* We don't have to escape '\'' because we use '\"' as attribute delimiter. */ |
173 | if(c == QLatin1Char('&')) |
174 | retval += QLatin1String("&" ); |
175 | else if(c == QLatin1Char('<')) |
176 | retval += QLatin1String("<" ); |
177 | else if(c == QLatin1Char('"')) |
178 | retval += QLatin1String(""" ); |
179 | else |
180 | retval += c; |
181 | } |
182 | |
183 | return retval; |
184 | } |
185 | |
186 | inline QString escapeCDATAContent(const QString &ch) |
187 | { |
188 | const int l = ch.length(); |
189 | QString retval; |
190 | qint8 atEnd = 0; |
191 | |
192 | for(int i = 0; i != l; ++i) |
193 | { |
194 | const QChar c(ch.at(i)); |
195 | |
196 | /* Escape '>' if in "]]>" */ |
197 | if(c == QLatin1Char(']')) |
198 | { |
199 | if(atEnd == 0 || atEnd == 1) |
200 | ++atEnd; |
201 | else |
202 | atEnd = 0; |
203 | |
204 | retval += QLatin1Char(']'); |
205 | } |
206 | else if(c == QLatin1Char('>')) |
207 | { |
208 | if(atEnd == 2) |
209 | retval += QLatin1String(">" ); |
210 | else |
211 | { |
212 | atEnd = 0; |
213 | retval += QLatin1Char('>'); |
214 | } |
215 | } |
216 | else |
217 | retval += c; |
218 | } |
219 | |
220 | return retval; |
221 | } |
222 | |
223 | /** |
224 | * We wrap dev in this function such that we can deploy the Q_ASSERT_X |
225 | * macro in each place it's used. |
226 | */ |
227 | inline QIODevice *device() const |
228 | { |
229 | Q_ASSERT_X(dev, Q_FUNC_INFO, |
230 | "No device specified for XMLWriter; one must be specified with " |
231 | "setDevice() or via the constructor before XMLWriter can be used." ); |
232 | return dev; |
233 | } |
234 | |
235 | /** |
236 | * @returns true on success, otherwise false |
237 | */ |
238 | inline bool serialize(const QString &data) |
239 | { |
240 | const QByteArray utf8(data.toUtf8()); |
241 | |
242 | return device()->write(data: utf8) == utf8.size(); |
243 | } |
244 | |
245 | /** |
246 | * @returns true on success, otherwise false |
247 | */ |
248 | inline bool serialize(const char data) |
249 | { |
250 | return device()->putChar(c: data); |
251 | } |
252 | |
253 | /** |
254 | * @returns true on success, otherwise false |
255 | */ |
256 | inline bool serialize(const char *data) |
257 | { |
258 | return device()->write(data) == qstrlen(str: data); |
259 | } |
260 | |
261 | inline bool hasElementContent() const |
262 | { |
263 | return hasContentStack.top(); |
264 | } |
265 | |
266 | inline void handleElement() |
267 | { |
268 | if(!hasElementContent()) |
269 | serialize(data: '>'); |
270 | |
271 | /* This element is content for the parent. */ |
272 | hasContentStack.top() = true; |
273 | } |
274 | |
275 | NSBindingList namespaces; |
276 | bool insideCDATA; |
277 | bool addModificationNote; |
278 | QString msg; |
279 | QIODevice *dev; |
280 | QStack<bool> hasContentStack; |
281 | QString errorString; |
282 | QStack<QString> tags; |
283 | QStack<NSBindingList> namespaceTracker; |
284 | }; |
285 | |
286 | /** |
287 | * Reduces complexity. The empty else clause is for avoiding mess when macro |
288 | * is used in the 'then' branch of an if clause, which is followed by an else clause. |
289 | */ |
290 | #define serialize(string) if(!d->serialize(string)) \ |
291 | { \ |
292 | d->errorString = d->device()->errorString(); \ |
293 | return false; \ |
294 | } \ |
295 | else do {} while (false) |
296 | |
297 | XMLWriter::XMLWriter(QIODevice *outStream) : d(new Private(outStream)) |
298 | { |
299 | } |
300 | |
301 | XMLWriter::~XMLWriter() |
302 | { |
303 | delete d; |
304 | } |
305 | |
306 | bool XMLWriter::startDocument() |
307 | { |
308 | if(!device()->isOpen() && !device()->open(mode: QIODevice::WriteOnly)) |
309 | return false; |
310 | |
311 | if(d->addModificationNote) |
312 | { |
313 | if(d->msg.isNull()) |
314 | { |
315 | d->msg = QString::fromLatin1(str: "NOTE: This file was automatically generated " |
316 | "by %1 at %2. All changes to this file will be lost." ) |
317 | .arg(args: QCoreApplication::instance()->applicationName(), |
318 | args: QDateTime::currentDateTime().toString()); |
319 | } |
320 | if(!comment(ch: d->msg)) |
321 | return false; |
322 | |
323 | serialize('\n'); |
324 | } |
325 | |
326 | serialize(QLatin1String("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" )); |
327 | |
328 | return true; |
329 | } |
330 | |
331 | bool XMLWriter::startElement(const QString &qName, const QXmlStreamAttributes &atts) |
332 | { |
333 | Q_ASSERT_X(!d->insideCDATA, Q_FUNC_INFO, |
334 | "Only characters() can be received when inside CDATA." ); |
335 | Q_ASSERT_X(!qName.startsWith(QLatin1String("xmlns" )), Q_FUNC_INFO, |
336 | "startElement should not be used for declaring prefixes, " |
337 | "use startPrefixMapping() for that." ); |
338 | |
339 | d->validateQName(name: qName); |
340 | d->verifyNS(qName); |
341 | |
342 | d->handleElement(); |
343 | |
344 | serialize('<'); |
345 | serialize(qName); |
346 | |
347 | d->tags.push(t: qName); |
348 | d->namespaceTracker.push(t: d->namespaces); |
349 | |
350 | /* Add namespace declarations. */ |
351 | const NSBindingList::const_iterator end(d->namespaces.constEnd()); |
352 | NSBindingList::const_iterator it(d->namespaces.constBegin()); |
353 | |
354 | for(; it != end; ++it) |
355 | { |
356 | if((*it).first.isEmpty()) |
357 | serialize(" xmlns=" ); |
358 | else |
359 | { |
360 | serialize(" xmlns:" ); |
361 | serialize((*it).first); |
362 | serialize('='); |
363 | } |
364 | |
365 | serialize('"'); |
366 | serialize(d->escapeElementContent((*it).second)); |
367 | serialize('"'); |
368 | } |
369 | d->namespaces.clear(); |
370 | |
371 | for (const auto &attr : atts) { |
372 | const auto qName = attr.qualifiedName().toString(); |
373 | |
374 | d->validateQName(name: qName); |
375 | d->verifyNS(qName); |
376 | |
377 | serialize(' '); |
378 | serialize(qName); |
379 | serialize("=\"" ); |
380 | serialize(d->escapeAttributeContent(attr.value().toString())); |
381 | serialize('"'); |
382 | } |
383 | |
384 | d->hasContentStack.push(t: false); |
385 | return true; |
386 | } |
387 | |
388 | bool XMLWriter::endElement(const QString &qName) |
389 | { |
390 | Q_ASSERT_X(!d->insideCDATA, Q_FUNC_INFO, |
391 | "Only characters() can be received when inside CDATA." ); |
392 | Q_ASSERT_X(d->tags.pop() == qName, Q_FUNC_INFO, |
393 | "The element tags are not balanced, the produced XML is invalid." ); |
394 | |
395 | d->namespaceTracker.pop(); |
396 | |
397 | /* "this" element is content for our parent, so ensure hasElementContent is true. */ |
398 | |
399 | if(d->hasElementContent()) |
400 | { |
401 | serialize(QLatin1String("</" )); |
402 | serialize(qName); |
403 | serialize('>'); |
404 | } |
405 | else |
406 | serialize(QLatin1String("/>" )); |
407 | |
408 | d->hasContentStack.pop(); |
409 | |
410 | return true; |
411 | } |
412 | |
413 | bool XMLWriter::startPrefixMapping(const QString &prefix, const QString &uri) |
414 | { |
415 | Q_ASSERT_X(!d->insideCDATA, Q_FUNC_INFO, |
416 | "Only characters() can be received when inside CDATA." ); |
417 | Q_ASSERT_X(prefix.toLower() != QLatin1String("xml" ) || |
418 | (prefix.toLower() == QLatin1String("xml" ) && |
419 | (uri == QLatin1String("http://www.w3.org/TR/REC-xml-names/" ) || |
420 | uri.isEmpty())), |
421 | Q_FUNC_INFO, |
422 | "The prefix 'xml' can only be bound to the namespace " |
423 | "\"http://www.w3.org/TR/REC-xml-names/\"." ); |
424 | Q_ASSERT_X(prefix.toLower() != QLatin1String("xml" ) && |
425 | uri != QLatin1String("http://www.w3.org/TR/REC-xml-names/" ), |
426 | Q_FUNC_INFO, |
427 | "The namespace \"http://www.w3.org/TR/REC-xml-names/\" can only be bound to the " |
428 | "\"xml\" prefix." ); |
429 | |
430 | d->namespaces.append(t: qMakePair(x: prefix, y: uri)); |
431 | return true; |
432 | } |
433 | |
434 | bool XMLWriter::processingInstruction(const QString &target, |
435 | const QString &data) |
436 | { |
437 | Q_ASSERT_X(target.toLower() != QLatin1String("xml" ), Q_FUNC_INFO, |
438 | "A processing instruction cannot have the name xml in any " |
439 | "capitalization, because it is reserved." ); |
440 | Q_ASSERT_X(!data.contains(QLatin1String("?>" )), Q_FUNC_INFO, |
441 | "The content of a processing instruction cannot contain the string \"?>\"." ); |
442 | Q_ASSERT_X(!d->insideCDATA, "XMLWriter::processingInstruction()" , |
443 | "Only characters() can be received when inside CDATA." ); |
444 | |
445 | d->handleElement(); |
446 | |
447 | serialize(QLatin1String("<?" )); |
448 | serialize(target); |
449 | serialize(' '); |
450 | serialize(data); |
451 | serialize(QLatin1String("?>" )); |
452 | return true; |
453 | } |
454 | |
455 | bool XMLWriter::characters(const QString &ch) |
456 | { |
457 | Q_ASSERT_X(d->tags.count() >= 1, Q_FUNC_INFO, |
458 | "Text nodes can only appear inside elements(no elements sent)." ); |
459 | d->handleElement(); |
460 | |
461 | if(d->insideCDATA) |
462 | serialize(d->escapeCDATAContent(ch)); |
463 | else |
464 | serialize(d->escapeElementContent(ch)); |
465 | |
466 | return true; |
467 | } |
468 | |
469 | bool XMLWriter::(const QString &ch) |
470 | { |
471 | Q_ASSERT_X(!d->insideCDATA, Q_FUNC_INFO, |
472 | "Only characters() can be received when inside CDATA." ); |
473 | Q_ASSERT_X(!ch.contains(QLatin1String("--" )), Q_FUNC_INFO, |
474 | "XML comments may not contain double-hyphens(\"--\")." ); |
475 | Q_ASSERT_X(!ch.endsWith(QLatin1Char('-')), Q_FUNC_INFO, |
476 | "XML comments cannot end with a hyphen, \"-\"(add a space, for example)." ); |
477 | /* A comment starting with "<!---" is ok. */ |
478 | |
479 | d->handleElement(); |
480 | |
481 | serialize(QLatin1String("<!--" )); |
482 | serialize(ch); |
483 | serialize(QLatin1String("-->" )); |
484 | |
485 | return true; |
486 | } |
487 | |
488 | bool XMLWriter::startCDATA() |
489 | { |
490 | Q_ASSERT_X(d->insideCDATA, Q_FUNC_INFO, |
491 | "startCDATA() has already been called." ); |
492 | Q_ASSERT_X(d->tags.count() >= 1, Q_FUNC_INFO, |
493 | "CDATA sections can only appear inside elements(no elements sent)." ); |
494 | d->insideCDATA = true; |
495 | serialize(QLatin1String("<![CDATA[" )); |
496 | return true; |
497 | } |
498 | |
499 | bool XMLWriter::endCDATA() |
500 | { |
501 | d->insideCDATA = false; |
502 | serialize("]]>" ); |
503 | return true; |
504 | } |
505 | |
506 | bool XMLWriter::startDTD(const QString &name, |
507 | const QString &publicId, |
508 | const QString &systemId) |
509 | { |
510 | Q_ASSERT_X(!d->insideCDATA, Q_FUNC_INFO, |
511 | "Only characters() can be received when inside CDATA." ); |
512 | Q_ASSERT_X(!name.isEmpty(), Q_FUNC_INFO, |
513 | "The DOCTYPE name cannot be empty." ); |
514 | Q_ASSERT_X(d->tags.isEmpty() && d->namespaces.isEmpty(), Q_FUNC_INFO, |
515 | "No content such as namespace declarations or elements can be serialized " |
516 | "before the DOCTYPE declaration, the XML is invalid." ); |
517 | Q_ASSERT_X(!publicId.contains(QLatin1Char('"')), Q_FUNC_INFO, |
518 | "The PUBLIC ID cannot contain quotes('\"')." ); |
519 | Q_ASSERT_X(!systemId.contains(QLatin1Char('"')), Q_FUNC_INFO, |
520 | "The SYSTEM ID cannot contain quotes('\"')." ); |
521 | |
522 | serialize(QLatin1String("<!DOCTYPE " )); |
523 | serialize(name); |
524 | |
525 | if(!publicId.isEmpty()) |
526 | { |
527 | Q_ASSERT_X(!systemId.isEmpty(), Q_FUNC_INFO, |
528 | "When a public identifier is specified, a system identifier " |
529 | "must also be specified in order to produce valid XML." ); |
530 | serialize(" PUBLIC \"" ); |
531 | serialize(publicId); |
532 | serialize('"'); |
533 | } |
534 | |
535 | if(!systemId.isEmpty()) |
536 | { |
537 | if (publicId.isEmpty()) { |
538 | serialize(" SYSTEM" ); |
539 | } |
540 | |
541 | serialize(" \"" ); |
542 | serialize(systemId); |
543 | serialize('"'); |
544 | } |
545 | |
546 | return true; |
547 | } |
548 | |
549 | bool XMLWriter::endDTD() |
550 | { |
551 | Q_ASSERT_X(d->tags.isEmpty() && d->namespaces.isEmpty(), Q_FUNC_INFO, |
552 | "Content such as namespace declarations or elements cannot occur inside " |
553 | "the DOCTYPE declaration, the XML is invalid." ); |
554 | serialize(QLatin1String(">\n" )); |
555 | return true; |
556 | } |
557 | |
558 | bool XMLWriter::startEntity(const QString &) |
559 | { |
560 | return true; |
561 | } |
562 | |
563 | bool XMLWriter::endEntity(const QString &) |
564 | { |
565 | return true; |
566 | } |
567 | |
568 | void XMLWriter::setMessage(const QString &msg) |
569 | { |
570 | d->msg = msg; |
571 | } |
572 | |
573 | QString XMLWriter::modificationMessage() const |
574 | { |
575 | return d->msg; |
576 | } |
577 | |
578 | bool XMLWriter::endDocument() |
579 | { |
580 | Q_ASSERT_X(d->tags.isEmpty(), Q_FUNC_INFO, |
581 | "endDocument() called before all elements were closed with endElement()." ); |
582 | d->device()->close(); |
583 | return true; |
584 | } |
585 | |
586 | QString XMLWriter::errorString() const |
587 | { |
588 | return d->errorString; |
589 | } |
590 | |
591 | bool XMLWriter::ignorableWhitespace(const QString &ch) |
592 | { |
593 | return characters(ch); |
594 | } |
595 | |
596 | QIODevice *XMLWriter::device() const |
597 | { |
598 | return d->dev; |
599 | } |
600 | |
601 | void XMLWriter::setDevice(QIODevice *dev) |
602 | { |
603 | d->dev = dev; |
604 | } |
605 | |
606 | void XMLWriter::setAddMessage(const bool toggle) |
607 | { |
608 | d->addModificationNote = toggle; |
609 | } |
610 | |
611 | bool XMLWriter::addModificationMessage() const |
612 | { |
613 | return d->addModificationNote; |
614 | } |
615 | |
616 | #undef serialize |
617 | // vim: et:ts=4:sw=4:sts=4 |
618 | |
619 | |