1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | #include "qdynamiccontext_p.h" |
41 | #include "qpatternistlocale_p.h" |
42 | #include "qitem_p.h" |
43 | #include "qxmlquery_p.h" |
44 | #include "qxmlserializer_p.h" |
45 | #include "qxmlserializer.h" |
46 | |
47 | QT_BEGIN_NAMESPACE |
48 | |
49 | using namespace QPatternist; |
50 | |
51 | QXmlSerializerPrivate::QXmlSerializerPrivate(const QXmlQuery &query, |
52 | QIODevice *outputDevice) |
53 | : isPreviousAtomic(false), |
54 | state(QXmlSerializer::BeforeDocumentElement), |
55 | np(query.namePool().d), |
56 | device(outputDevice), |
57 | codec(QTextCodec::codecForMib(mib: 106)), /* UTF-8 */ |
58 | query(query) |
59 | { |
60 | hasClosedElement.reserve(asize: EstimatedTreeDepth); |
61 | namespaces.reserve(asize: EstimatedTreeDepth); |
62 | nameCache.reserve(asize: EstimatedNameCount); |
63 | |
64 | hasClosedElement.push(t: qMakePair(x: QXmlName(), y: true)); |
65 | |
66 | /* |
67 | We push the empty namespace such that first of all |
68 | namespaceBinding() won't assert on an empty QStack, |
69 | and such that the empty namespace is in-scope and |
70 | that the code doesn't attempt to declare it. |
71 | |
72 | We push the XML namespace. Although we won't receive |
73 | declarations for it, we may output attributes by that |
74 | name. |
75 | */ |
76 | QVector<QXmlName> defNss; |
77 | defNss.resize(asize: 2); |
78 | defNss[0] = QXmlName(StandardNamespaces::empty, |
79 | StandardLocalNames::empty, |
80 | StandardPrefixes::empty); |
81 | defNss[1] = QXmlName(StandardNamespaces::xml, |
82 | StandardLocalNames::empty, |
83 | StandardPrefixes::xml); |
84 | |
85 | namespaces.push(t: defNss); |
86 | |
87 | /* If we don't set this flag, QTextCodec will generate a BOM. */ |
88 | converterState.flags = QTextCodec::IgnoreHeader; |
89 | } |
90 | |
91 | /*! |
92 | \class QXmlSerializer |
93 | \brief The QXmlSerializer class is an implementation of QAbstractXmlReceiver for transforming XQuery output into unformatted XML. |
94 | |
95 | \reentrant |
96 | \since 4.4 |
97 | \ingroup xml-tools |
98 | \inmodule QtXmlPatterns |
99 | |
100 | QXmlSerializer translates an \l {XQuery Sequence} {XQuery sequence}, usually |
101 | the output of an QXmlQuery, into XML. Consider the example: |
102 | |
103 | \snippet code/src_xmlpatterns_api_qxmlserializer.cpp 0 |
104 | |
105 | First it constructs a \l {QXmlQuery} {query} that gets the |
106 | first paragraph from document \c index.html. Then it constructs |
107 | an instance of this class with the \l {QXmlQuery} {query} and |
108 | \l {QIODevice} {myOutputDevice}. Finally, it |
109 | \l {QXmlQuery::evaluateTo()} {evaluates} the |
110 | \l {QXmlQuery} {query}, producing an ordered sequence of calls |
111 | to the serializer's callback functions. The sequence of callbacks |
112 | transforms the query output to XML and writes it to |
113 | \l {QIODevice} {myOutputDevice}. |
114 | |
115 | QXmlSerializer will: |
116 | |
117 | \list |
118 | \li Declare namespaces when needed, |
119 | |
120 | \li Use appropriate escaping, when characters can't be |
121 | represented in the XML, |
122 | |
123 | \li Handle line endings appropriately, |
124 | |
125 | \li Report errors, when it can't serialize the content, e.g., |
126 | when asked to serialize an attribute that is a top-level node, |
127 | or when more than one top-level element is encountered. |
128 | |
129 | \endlist |
130 | |
131 | If an error occurs during serialization, result is undefined |
132 | unless the serializer is driven through a call to |
133 | QXmlQuery::evaluateTo(). |
134 | |
135 | If the generated XML should be indented and formatted for reading, |
136 | use QXmlFormatter. |
137 | |
138 | \sa {http://www.w3.org/TR/xslt-xquery-serialization/}{XSLT 2.0 and XQuery 1.0 Serialization} |
139 | |
140 | \sa QXmlFormatter |
141 | */ |
142 | |
143 | /*! |
144 | Constructs a serializer that uses the name pool and message |
145 | handler in \a query, and writes the output to \a outputDevice. |
146 | |
147 | \a outputDevice must be a valid, non-null device that is open in |
148 | write mode, otherwise behavior is undefined. |
149 | |
150 | \a outputDevice must not be opened with QIODevice::Text because it |
151 | will cause the output to be incorrect. This class will ensure line |
152 | endings are serialized as according with the XML specification. |
153 | QXmlSerializer does not take ownership of \a outputDevice. |
154 | */ |
155 | QXmlSerializer::QXmlSerializer(const QXmlQuery &query, |
156 | QIODevice *outputDevice) : QAbstractXmlReceiver(new QXmlSerializerPrivate(query, outputDevice)) |
157 | { |
158 | if(!outputDevice) |
159 | { |
160 | qWarning(msg: "outputDevice cannot be null." ); |
161 | return; |
162 | } |
163 | |
164 | if(!outputDevice->isWritable()) |
165 | { |
166 | qWarning(msg: "outputDevice must be opened in write mode." ); |
167 | return; |
168 | } |
169 | } |
170 | |
171 | /*! |
172 | \internal |
173 | */ |
174 | QXmlSerializer::QXmlSerializer(QAbstractXmlReceiverPrivate *d) : QAbstractXmlReceiver(d) |
175 | { |
176 | } |
177 | |
178 | /*! |
179 | \internal |
180 | */ |
181 | bool QXmlSerializer::atDocumentRoot() const |
182 | { |
183 | Q_D(const QXmlSerializer); |
184 | return d->state == BeforeDocumentElement || |
185 | (d->state == InsideDocumentElement && d->hasClosedElement.size() == 1); |
186 | } |
187 | |
188 | /*! |
189 | \internal |
190 | */ |
191 | void QXmlSerializer::startContent() |
192 | { |
193 | Q_D(QXmlSerializer); |
194 | if (!d->hasClosedElement.top().second) { |
195 | d->write(c: '>'); |
196 | d->hasClosedElement.top().second = true; |
197 | } |
198 | } |
199 | |
200 | /*! |
201 | \internal |
202 | */ |
203 | void QXmlSerializer::writeEscaped(const QString &toEscape) |
204 | { |
205 | if(toEscape.isEmpty()) /* Early exit. */ |
206 | return; |
207 | |
208 | QString result; |
209 | result.reserve(asize: int(toEscape.length() * 1.1)); |
210 | const int length = toEscape.length(); |
211 | |
212 | for(int i = 0; i < length; ++i) |
213 | { |
214 | const QChar c(toEscape.at(i)); |
215 | |
216 | if(c == QLatin1Char('<')) |
217 | result += QLatin1String("<" ); |
218 | else if(c == QLatin1Char('>')) |
219 | result += QLatin1String(">" ); |
220 | else if(c == QLatin1Char('&')) |
221 | result += QLatin1String("&" ); |
222 | else |
223 | result += toEscape.at(i); |
224 | } |
225 | |
226 | write(content: result); |
227 | } |
228 | |
229 | /*! |
230 | \internal |
231 | */ |
232 | void QXmlSerializer::writeEscapedAttribute(const QString &toEscape) |
233 | { |
234 | if(toEscape.isEmpty()) /* Early exit. */ |
235 | return; |
236 | |
237 | QString result; |
238 | result.reserve(asize: int(toEscape.length() * 1.1)); |
239 | const int length = toEscape.length(); |
240 | |
241 | for(int i = 0; i < length; ++i) |
242 | { |
243 | const QChar c(toEscape.at(i)); |
244 | |
245 | if(c == QLatin1Char('<')) |
246 | result += QLatin1String("<" ); |
247 | else if(c == QLatin1Char('>')) |
248 | result += QLatin1String(">" ); |
249 | else if(c == QLatin1Char('&')) |
250 | result += QLatin1String("&" ); |
251 | else if(c == QLatin1Char('"')) |
252 | result += QLatin1String(""" ); |
253 | else |
254 | result += toEscape.at(i); |
255 | } |
256 | |
257 | write(content: result); |
258 | } |
259 | |
260 | /*! |
261 | \internal |
262 | */ |
263 | void QXmlSerializer::write(const QString &content) |
264 | { |
265 | Q_D(QXmlSerializer); |
266 | d->device->write(data: d->codec->fromUnicode(in: content.constData(), length: content.length(), state: &d->converterState)); |
267 | } |
268 | |
269 | /*! |
270 | \internal |
271 | */ |
272 | void QXmlSerializer::write(const QXmlName &name) |
273 | { |
274 | Q_D(QXmlSerializer); |
275 | const QByteArray &cell = d->nameCache[name.code()]; |
276 | |
277 | if(cell.isNull()) |
278 | { |
279 | QByteArray &mutableCell = d->nameCache[name.code()]; |
280 | |
281 | const QString content(d->np->toLexical(qName: name)); |
282 | mutableCell = d->codec->fromUnicode(in: content.constData(), |
283 | length: content.length(), |
284 | state: &d->converterState); |
285 | d->device->write(data: mutableCell); |
286 | } |
287 | else |
288 | d->device->write(data: cell); |
289 | } |
290 | |
291 | /*! |
292 | \internal |
293 | */ |
294 | void QXmlSerializer::write(const char *const chars) |
295 | { |
296 | Q_D(QXmlSerializer); |
297 | d->device->write(data: chars); |
298 | } |
299 | |
300 | /*! |
301 | \reimp |
302 | */ |
303 | void QXmlSerializer::startElement(const QXmlName &name) |
304 | { |
305 | Q_D(QXmlSerializer); |
306 | Q_ASSERT(d->device); |
307 | Q_ASSERT(d->device->isWritable()); |
308 | Q_ASSERT(d->codec); |
309 | Q_ASSERT(!name.isNull()); |
310 | |
311 | d->namespaces.push(t: QVector<QXmlName>()); |
312 | |
313 | if(atDocumentRoot()) |
314 | { |
315 | if(d->state == BeforeDocumentElement) |
316 | d->state = InsideDocumentElement; |
317 | else if(d->state != InsideDocumentElement) |
318 | { |
319 | d->query.d->staticContext()->error(message: QtXmlPatterns::tr( |
320 | sourceText: "Element %1 can't be serialized because it appears outside " |
321 | "the document element." ).arg(a: formatKeyword(np: d->np, name)), |
322 | errorCode: ReportContext::SENR0001, |
323 | reflection: d->query.d->expression().data()); |
324 | } |
325 | } |
326 | |
327 | startContent(); |
328 | d->write(c: '<'); |
329 | write(name); |
330 | |
331 | /* Ensure that the namespace URI used in the name gets outputted. */ |
332 | namespaceBinding(nb: name); |
333 | |
334 | d->hasClosedElement.push(t: qMakePair(x: name, y: false)); |
335 | d->isPreviousAtomic = false; |
336 | } |
337 | |
338 | /*! |
339 | \reimp |
340 | */ |
341 | void QXmlSerializer::endElement() |
342 | { |
343 | Q_D(QXmlSerializer); |
344 | const QPair<QXmlName, bool> e(d->hasClosedElement.pop()); |
345 | d->namespaces.pop(); |
346 | |
347 | if(e.second) |
348 | { |
349 | write(chars: "</" ); |
350 | write(name: e.first); |
351 | d->write(c: '>'); |
352 | } |
353 | else |
354 | write(chars: "/>" ); |
355 | |
356 | d->isPreviousAtomic = false; |
357 | } |
358 | |
359 | /*! |
360 | \reimp |
361 | */ |
362 | void QXmlSerializer::attribute(const QXmlName &name, |
363 | const QStringRef &value) |
364 | { |
365 | Q_D(QXmlSerializer); |
366 | Q_ASSERT(!name.isNull()); |
367 | |
368 | /* Ensure that the namespace URI used in the name gets outputted. */ |
369 | { |
370 | /* Since attributes doesn't pick up the default namespace, a |
371 | * namespace declaration would cause trouble if we output it. */ |
372 | if(name.prefix() != StandardPrefixes::empty) |
373 | namespaceBinding(nb: name); |
374 | } |
375 | |
376 | if(atDocumentRoot()) |
377 | { |
378 | Q_UNUSED(d); |
379 | d->query.d->staticContext()->error(message: QtXmlPatterns::tr( |
380 | sourceText: "Attribute %1 can't be serialized because it appears at " |
381 | "the top level." ).arg(a: formatKeyword(np: d->np, name)), |
382 | errorCode: ReportContext::SENR0001, |
383 | reflection: d->query.d->expression().data()); |
384 | } |
385 | else |
386 | { |
387 | d->write(c: ' '); |
388 | write(name); |
389 | write(chars: "=\"" ); |
390 | writeEscapedAttribute(toEscape: value.toString()); |
391 | d->write(c: '"'); |
392 | } |
393 | } |
394 | |
395 | /*! |
396 | \internal |
397 | */ |
398 | bool QXmlSerializer::isBindingInScope(const QXmlName nb) const |
399 | { |
400 | Q_D(const QXmlSerializer); |
401 | const int levelLen = d->namespaces.size(); |
402 | |
403 | if(nb.prefix() == StandardPrefixes::empty) |
404 | { |
405 | for(int lvl = levelLen - 1; lvl >= 0; --lvl) |
406 | { |
407 | const QVector<QXmlName> &scope = d->namespaces.at(i: lvl); |
408 | const int vectorLen = scope.size(); |
409 | |
410 | for(int s = vectorLen - 1; s >= 0; --s) |
411 | { |
412 | const QXmlName &nsb = scope.at(i: s); |
413 | |
414 | if(nsb.prefix() == StandardPrefixes::empty) |
415 | return nsb.namespaceURI() == nb.namespaceURI(); |
416 | } |
417 | } |
418 | } |
419 | else |
420 | { |
421 | for(int lvl = 0; lvl < levelLen; ++lvl) |
422 | { |
423 | const QVector<QXmlName> &scope = d->namespaces.at(i: lvl); |
424 | const int vectorLen = scope.size(); |
425 | |
426 | for(int s = 0; s < vectorLen; ++s) |
427 | { |
428 | const QXmlName &n = scope.at(i: s); |
429 | if (n.prefix() == nb.prefix() && |
430 | n.namespaceURI() == nb.namespaceURI()) |
431 | return true; |
432 | } |
433 | } |
434 | } |
435 | |
436 | return false; |
437 | } |
438 | |
439 | /*! |
440 | \reimp |
441 | */ |
442 | void QXmlSerializer::namespaceBinding(const QXmlName &nb) |
443 | { |
444 | /* |
445 | * Writes out \a nb. |
446 | * |
447 | * Namespace bindings aren't looked up in a cache, because |
448 | * we typically receive very few. |
449 | */ |
450 | |
451 | Q_D(QXmlSerializer); |
452 | Q_ASSERT_X(!nb.isNull(), Q_FUNC_INFO, |
453 | "It makes no sense to pass a null QXmlName." ); |
454 | |
455 | Q_ASSERT_X((nb.namespaceURI() != StandardNamespaces::empty) || |
456 | (nb.prefix() == StandardPrefixes::empty), |
457 | Q_FUNC_INFO, |
458 | "Undeclarations of prefixes aren't allowed in XML 1.0 " |
459 | "and aren't supposed to be received." ); |
460 | |
461 | if(nb.namespaceURI() == QPatternist::StandardNamespaces::StopNamespaceInheritance) |
462 | return; |
463 | |
464 | if(isBindingInScope(nb)) |
465 | return; |
466 | |
467 | d->namespaces.top().append(t: nb); |
468 | |
469 | if(nb.prefix() == StandardPrefixes::empty) |
470 | write(chars: " xmlns" ); |
471 | else |
472 | { |
473 | write(chars: " xmlns:" ); |
474 | write(content: d->np->stringForPrefix(code: nb.prefix())); |
475 | } |
476 | |
477 | write(chars: "=\"" ); |
478 | writeEscapedAttribute(toEscape: d->np->stringForNamespace(code: nb.namespaceURI())); |
479 | d->write(c: '"'); |
480 | } |
481 | |
482 | /*! |
483 | \reimp |
484 | */ |
485 | void QXmlSerializer::(const QString &value) |
486 | { |
487 | Q_D(QXmlSerializer); |
488 | Q_ASSERT_X(!value.contains(QLatin1String("--" )), |
489 | Q_FUNC_INFO, |
490 | "Invalid input; it's the caller's responsibility to ensure " |
491 | "the input is correct." ); |
492 | |
493 | startContent(); |
494 | write(chars: "<!--" ); |
495 | write(content: value); |
496 | write(chars: "-->" ); |
497 | d->isPreviousAtomic = false; |
498 | } |
499 | |
500 | /*! |
501 | \reimp |
502 | */ |
503 | void QXmlSerializer::characters(const QStringRef &value) |
504 | { |
505 | Q_D(QXmlSerializer); |
506 | d->isPreviousAtomic = false; |
507 | startContent(); |
508 | writeEscaped(toEscape: value.toString()); |
509 | } |
510 | |
511 | /*! |
512 | \reimp |
513 | */ |
514 | void QXmlSerializer::processingInstruction(const QXmlName &name, |
515 | const QString &value) |
516 | { |
517 | Q_D(QXmlSerializer); |
518 | Q_ASSERT_X(!value.contains(QLatin1String("?>" )), |
519 | Q_FUNC_INFO, |
520 | "Invalid input; it's the caller's responsibility to ensure " |
521 | "the input is correct." ); |
522 | |
523 | startContent(); |
524 | write(chars: "<?" ); |
525 | write(name); |
526 | d->write(c: ' '); |
527 | write(content: value); |
528 | write(chars: "?>" ); |
529 | |
530 | d->isPreviousAtomic = false; |
531 | } |
532 | |
533 | /*! |
534 | \internal |
535 | */ |
536 | void QXmlSerializer::item(const QPatternist::Item &outputItem) |
537 | { |
538 | Q_D(QXmlSerializer); |
539 | |
540 | if(outputItem.isAtomicValue()) |
541 | { |
542 | if(d->isPreviousAtomic) |
543 | { |
544 | startContent(); |
545 | d->write(c: ' '); |
546 | writeEscaped(toEscape: outputItem.stringValue()); |
547 | } |
548 | else |
549 | { |
550 | d->isPreviousAtomic = true; |
551 | const QString value(outputItem.stringValue()); |
552 | |
553 | if(!value.isEmpty()) |
554 | { |
555 | startContent(); |
556 | writeEscaped(toEscape: value); |
557 | } |
558 | } |
559 | } |
560 | else |
561 | { |
562 | startContent(); |
563 | Q_ASSERT(outputItem.isNode()); |
564 | sendAsNode(outputItem); |
565 | } |
566 | } |
567 | |
568 | /*! |
569 | \reimp |
570 | */ |
571 | void QXmlSerializer::atomicValue(const QVariant &value) |
572 | { |
573 | Q_UNUSED(value); |
574 | } |
575 | |
576 | /*! |
577 | \reimp |
578 | */ |
579 | void QXmlSerializer::startDocument() |
580 | { |
581 | Q_D(QXmlSerializer); |
582 | d->isPreviousAtomic = false; |
583 | } |
584 | |
585 | /*! |
586 | \reimp |
587 | */ |
588 | void QXmlSerializer::endDocument() |
589 | { |
590 | Q_D(QXmlSerializer); |
591 | d->isPreviousAtomic = false; |
592 | } |
593 | |
594 | /*! |
595 | |
596 | Returns a pointer to the output device. There is no corresponding |
597 | function to \e set the output device, because the output device must |
598 | be passed to the constructor. The serializer does not take ownership |
599 | of its IO device. |
600 | */ |
601 | QIODevice *QXmlSerializer::outputDevice() const |
602 | { |
603 | Q_D(const QXmlSerializer); |
604 | return d->device; |
605 | } |
606 | |
607 | /*! |
608 | Sets the codec the serializer will use for encoding its XML output. |
609 | The output codec is set to \a outputCodec. By default, the output |
610 | codec is set to the one for \c UTF-8. The serializer does not take |
611 | ownership of the codec. |
612 | |
613 | \sa codec() |
614 | |
615 | */ |
616 | void QXmlSerializer::setCodec(const QTextCodec *outputCodec) |
617 | { |
618 | Q_D(QXmlSerializer); |
619 | d->codec = outputCodec; |
620 | } |
621 | |
622 | /*! |
623 | Returns the codec being used by the serializer for encoding its |
624 | XML output. |
625 | |
626 | \sa setCodec() |
627 | */ |
628 | const QTextCodec *QXmlSerializer::codec() const |
629 | { |
630 | Q_D(const QXmlSerializer); |
631 | return d->codec; |
632 | } |
633 | |
634 | /*! |
635 | \reimp |
636 | */ |
637 | void QXmlSerializer::startOfSequence() |
638 | { |
639 | } |
640 | |
641 | /*! |
642 | \reimp |
643 | */ |
644 | void QXmlSerializer::endOfSequence() |
645 | { |
646 | /* If this function is changed to flush or close or something like that, |
647 | * take into consideration QXmlFormatter, especially |
648 | * QXmlFormatter::endOfSequence(). |
649 | */ |
650 | } |
651 | |
652 | QT_END_NAMESPACE |
653 | |