1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | // |
5 | // W A R N I N G |
6 | // ------------- |
7 | // |
8 | // This file is not part of the Qt API. It exists for the convenience |
9 | // of other Qt classes. This header file may change from version to |
10 | // version without notice, or even be removed. |
11 | // |
12 | // We mean it. |
13 | // |
14 | |
15 | #include <QtCore/private/qglobal_p.h> |
16 | #include <qstringconverter.h> |
17 | #include <qxmlstream.h> |
18 | #include "qxmlstreamgrammar_p.h" |
19 | #include <QtCore/qhash.h> |
20 | #include <QCoreApplication> // Q_DECLARE_TR_FUNCTIONS |
21 | |
22 | |
23 | #include <memory> |
24 | #include <optional> |
25 | |
26 | #ifndef QXMLSTREAM_P_H |
27 | #define QXMLSTREAM_P_H |
28 | |
29 | QT_BEGIN_NAMESPACE |
30 | |
31 | namespace QtPrivate { |
32 | |
33 | class XmlStringRef |
34 | { |
35 | public: |
36 | const QString *m_string = nullptr; |
37 | qsizetype m_pos = 0; |
38 | qsizetype m_size = 0; |
39 | |
40 | constexpr XmlStringRef() = default; |
41 | constexpr inline XmlStringRef(const QString *string, qsizetype pos, qsizetype length) |
42 | : m_string(string), m_pos(pos), m_size((Q_ASSERT(length >= 0), length)) |
43 | { |
44 | } |
45 | XmlStringRef(const QString *string) |
46 | : XmlStringRef(string, 0, string->size()) |
47 | { |
48 | } |
49 | |
50 | operator QXmlString() const { |
51 | if (!m_string) |
52 | return QXmlString(); |
53 | QStringPrivate d = m_string->data_ptr(); |
54 | d.setBegin(d.data() + m_pos); |
55 | d.size = m_size; |
56 | return QXmlString(std::move(d)); |
57 | } |
58 | |
59 | void clear() { m_string = nullptr; m_pos = 0; m_size= 0; } |
60 | QStringView view() const { return m_string ? QStringView(m_string->data() + m_pos, m_size) : QStringView(); } |
61 | bool isEmpty() const { return m_size == 0; } |
62 | bool isNull() const { return !m_string; } |
63 | QString toString() const { return view().toString(); } |
64 | |
65 | using value_type = QStringView::value_type; |
66 | using size_type = QStringView::size_type; |
67 | using difference_type = QStringView::difference_type; |
68 | using pointer = QStringView::pointer; |
69 | using const_pointer = QStringView::const_pointer; |
70 | using reference = QStringView::reference; |
71 | using const_reference = QStringView::const_reference; |
72 | using iterator = QStringView::iterator; |
73 | using const_iterator = QStringView::const_iterator; |
74 | using reverse_iterator = QStringView::reverse_iterator; |
75 | using const_reverse_iterator = QStringView::const_reverse_iterator; |
76 | |
77 | #define MAKE_MEMBER(name) \ |
78 | auto name () const noexcept { return view(). name (); } |
79 | MAKE_MEMBER(data) |
80 | MAKE_MEMBER(size) |
81 | MAKE_MEMBER(empty) |
82 | MAKE_MEMBER(begin) |
83 | MAKE_MEMBER(end) |
84 | MAKE_MEMBER(cbegin) |
85 | MAKE_MEMBER(cend) |
86 | MAKE_MEMBER(rbegin) |
87 | MAKE_MEMBER(rend) |
88 | MAKE_MEMBER(crbegin) |
89 | MAKE_MEMBER(crend) |
90 | #undef MAKE_MEMBER |
91 | |
92 | #define MAKE_OP(op) \ |
93 | friend auto operator op(const XmlStringRef &lhs, const XmlStringRef &rhs) noexcept { return lhs.view() op rhs.view(); } \ |
94 | /*end*/ |
95 | MAKE_OP(==) |
96 | MAKE_OP(!=) |
97 | MAKE_OP(<=) |
98 | MAKE_OP(>=) |
99 | MAKE_OP(<) |
100 | MAKE_OP(>) |
101 | #undef MAKE_OP |
102 | #define MAKE_OP(op) \ |
103 | friend auto operator op(const XmlStringRef &lhs, QStringView rhs) noexcept { return lhs.view() op rhs; } \ |
104 | friend auto operator op(QStringView lhs, const XmlStringRef &rhs) noexcept { return lhs op rhs.view(); } \ |
105 | /*end*/ |
106 | MAKE_OP(==) |
107 | MAKE_OP(!=) |
108 | MAKE_OP(<=) |
109 | MAKE_OP(>=) |
110 | MAKE_OP(<) |
111 | MAKE_OP(>) |
112 | #undef MAKE_OP |
113 | }; |
114 | |
115 | } |
116 | |
117 | using namespace QtPrivate; |
118 | |
119 | template <typename T> class QXmlStreamSimpleStack |
120 | { |
121 | Q_DISABLE_COPY_MOVE(QXmlStreamSimpleStack) |
122 | |
123 | T *data; |
124 | qsizetype tos, cap; |
125 | public: |
126 | inline QXmlStreamSimpleStack() |
127 | : data(nullptr), tos(-1), cap(0) |
128 | {} |
129 | inline ~QXmlStreamSimpleStack() |
130 | { |
131 | if (data) { |
132 | std::destroy_n(data, size()); |
133 | free(data); |
134 | } |
135 | } |
136 | |
137 | inline void reserve(qsizetype ) |
138 | { |
139 | if (tos + extraCapacity + 1 > cap) { |
140 | cap = qMax(a: tos + extraCapacity + 1, b: cap << 1 ); |
141 | void *ptr = realloc(ptr: static_cast<void *>(data), size: cap * sizeof(T)); |
142 | data = reinterpret_cast<T *>(ptr); |
143 | Q_CHECK_PTR(data); |
144 | } |
145 | } |
146 | |
147 | inline T &push() { reserve(extraCapacity: 1); return rawPush(); } |
148 | inline T &rawPush() { return *new (data + (++tos)) T; } |
149 | inline const T &top() const { return data[tos]; } |
150 | inline T &top() { return data[tos]; } |
151 | inline T pop() { T t = std::move(data[tos]); std::destroy_at(data + tos); --tos; return t; } |
152 | inline T &operator[](qsizetype index) { return data[index]; } |
153 | inline const T &at(qsizetype index) const { return data[index]; } |
154 | inline qsizetype size() const { return tos + 1; } |
155 | inline void resize(qsizetype s) { tos = s - 1; } |
156 | inline bool isEmpty() const { return tos < 0; } |
157 | inline void clear() { tos = -1; } |
158 | |
159 | using const_iterator = const T*; |
160 | using iterator = T*; |
161 | T *begin() { return data; } |
162 | const T *begin() const { return data; } |
163 | const T *cbegin() const { return begin(); } |
164 | T *end() { return data + size(); } |
165 | const T *end() const { return data + size(); } |
166 | const T *cend() const { return end(); } |
167 | }; |
168 | |
169 | class QXmlStream |
170 | { |
171 | Q_DECLARE_TR_FUNCTIONS(QXmlStream) |
172 | }; |
173 | |
174 | class QXmlStreamPrivateTagStack { |
175 | public: |
176 | struct NamespaceDeclaration |
177 | { |
178 | XmlStringRef prefix; |
179 | XmlStringRef namespaceUri; |
180 | }; |
181 | |
182 | struct Tag |
183 | { |
184 | XmlStringRef name; |
185 | XmlStringRef qualifiedName; |
186 | NamespaceDeclaration namespaceDeclaration; |
187 | qsizetype tagStackStringStorageSize; |
188 | qsizetype namespaceDeclarationsSize; |
189 | }; |
190 | |
191 | |
192 | QXmlStreamPrivateTagStack(); |
193 | QXmlStreamSimpleStack<NamespaceDeclaration> namespaceDeclarations; |
194 | QString tagStackStringStorage; |
195 | qsizetype tagStackStringStorageSize; |
196 | qsizetype initialTagStackStringStorageSize; |
197 | bool tagsDone; |
198 | |
199 | XmlStringRef addToStringStorage(QAnyStringView s) |
200 | { |
201 | qsizetype pos = tagStackStringStorageSize; |
202 | if (pos != tagStackStringStorage.size()) |
203 | tagStackStringStorage.resize(size: pos); |
204 | s.visit(v: [&](auto s) { tagStackStringStorage.append(s); }); |
205 | qsizetype sz = (tagStackStringStorage.size() - pos); |
206 | tagStackStringStorageSize += sz; |
207 | return XmlStringRef(&tagStackStringStorage, pos, sz); |
208 | } |
209 | |
210 | QXmlStreamSimpleStack<Tag> tagStack; |
211 | |
212 | |
213 | inline Tag tagStack_pop() { |
214 | Tag tag = tagStack.pop(); |
215 | tagStackStringStorageSize = tag.tagStackStringStorageSize; |
216 | namespaceDeclarations.resize(s: tag.namespaceDeclarationsSize); |
217 | tagsDone = tagStack.isEmpty(); |
218 | return tag; |
219 | } |
220 | inline Tag &tagStack_push() { |
221 | Tag &tag = tagStack.push(); |
222 | tag.tagStackStringStorageSize = tagStackStringStorageSize; |
223 | tag.namespaceDeclarationsSize = namespaceDeclarations.size(); |
224 | return tag; |
225 | } |
226 | }; |
227 | |
228 | |
229 | class QXmlStreamEntityResolver; |
230 | class QXmlStreamReaderPrivate : public QXmlStreamGrammar, public QXmlStreamPrivateTagStack |
231 | { |
232 | QXmlStreamReader *q_ptr; |
233 | Q_DECLARE_PUBLIC(QXmlStreamReader) |
234 | public: |
235 | QXmlStreamReaderPrivate(QXmlStreamReader *q); |
236 | ~QXmlStreamReaderPrivate(); |
237 | void init(); |
238 | |
239 | QByteArray rawReadBuffer; |
240 | QByteArray dataBuffer; |
241 | uchar firstByte; |
242 | qint64 nbytesread; |
243 | QString readBuffer; |
244 | qsizetype readBufferPos; |
245 | QXmlStreamSimpleStack<uint> putStack; |
246 | struct Entity { |
247 | Entity() = default; |
248 | Entity(const QString &name, const QString &value) |
249 | : name(name), value(value), external(false), unparsed(false), literal(false), |
250 | hasBeenParsed(false), isCurrentlyReferenced(false){} |
251 | static inline Entity createLiteral(QLatin1StringView name, QLatin1StringView value) |
252 | { Entity result(name, value); result.literal = result.hasBeenParsed = true; return result; } |
253 | QString name, value; |
254 | uint external : 1; |
255 | uint unparsed : 1; |
256 | uint literal : 1; |
257 | uint hasBeenParsed : 1; |
258 | uint isCurrentlyReferenced : 1; |
259 | }; |
260 | // these hash tables use a QStringView as a key to avoid creating QStrings |
261 | // just for lookup. The keys are usually views into Entity::name and thus |
262 | // are guaranteed to have the same lifetime as the referenced data: |
263 | QHash<QStringView, Entity> entityHash; |
264 | QHash<QStringView, Entity> parameterEntityHash; |
265 | struct QEntityReference |
266 | { |
267 | QHash<QStringView, Entity> *hash; |
268 | QStringView name; |
269 | }; |
270 | QXmlStreamSimpleStack<QEntityReference> entityReferenceStack; |
271 | int entityExpansionLimit = 4096; |
272 | int entityLength = 0; |
273 | inline bool referenceEntity(QHash<QStringView, Entity> *hash, Entity &entity) |
274 | { |
275 | Q_ASSERT(hash); |
276 | if (entity.isCurrentlyReferenced) { |
277 | raiseWellFormedError(message: QXmlStream::tr(sourceText: "Self-referencing entity detected." )); |
278 | return false; |
279 | } |
280 | // entityLength represents the amount of additional characters the |
281 | // entity expands into (can be negative for e.g. &). It's used to |
282 | // avoid DoS attacks through recursive entity expansions |
283 | entityLength += entity.value.size() - entity.name.size() - 2; |
284 | if (entityLength > entityExpansionLimit) { |
285 | raiseWellFormedError(message: QXmlStream::tr(sourceText: "Entity expands to more characters than the entity expansion limit." )); |
286 | return false; |
287 | } |
288 | entity.isCurrentlyReferenced = true; |
289 | entityReferenceStack.push() = { .hash: hash, .name: entity.name }; |
290 | injectToken(tokenToInject: ENTITY_DONE); |
291 | return true; |
292 | } |
293 | |
294 | |
295 | QIODevice *device; |
296 | bool deleteDevice; |
297 | QStringDecoder decoder; |
298 | bool atEnd; |
299 | |
300 | enum class XmlContext |
301 | { |
302 | Prolog, |
303 | Body, |
304 | }; |
305 | |
306 | XmlContext currentContext = XmlContext::Prolog; |
307 | bool foundDTD = false; |
308 | bool isValidToken(QXmlStreamReader::TokenType type); |
309 | void checkToken(); |
310 | |
311 | /*! |
312 | \sa setType() |
313 | */ |
314 | QXmlStreamReader::TokenType type; |
315 | QXmlStreamReader::Error error; |
316 | QString errorString; |
317 | QString unresolvedEntity; |
318 | |
319 | qint64 lineNumber, lastLineStart, characterOffset; |
320 | |
321 | |
322 | void write(const QString &); |
323 | void write(const char *); |
324 | |
325 | |
326 | QXmlStreamAttributes attributes; |
327 | XmlStringRef namespaceForPrefix(QStringView prefix); |
328 | void resolveTag(); |
329 | void resolvePublicNamespaces(); |
330 | void resolveDtd(); |
331 | uint resolveCharRef(int symbolIndex); |
332 | bool checkStartDocument(); |
333 | void startDocument(); |
334 | void parseError(); |
335 | void checkPublicLiteral(QStringView publicId); |
336 | |
337 | bool scanDtd; |
338 | XmlStringRef lastAttributeValue; |
339 | bool lastAttributeIsCData; |
340 | struct DtdAttribute { |
341 | XmlStringRef tagName; |
342 | XmlStringRef attributeQualifiedName; |
343 | XmlStringRef attributePrefix; |
344 | XmlStringRef attributeName; |
345 | XmlStringRef defaultValue; |
346 | bool isCDATA; |
347 | bool isNamespaceAttribute; |
348 | }; |
349 | QXmlStreamSimpleStack<DtdAttribute> dtdAttributes; |
350 | struct NotationDeclaration { |
351 | XmlStringRef name; |
352 | XmlStringRef publicId; |
353 | XmlStringRef systemId; |
354 | }; |
355 | QXmlStreamSimpleStack<NotationDeclaration> notationDeclarations; |
356 | QXmlStreamNotationDeclarations publicNotationDeclarations; |
357 | QXmlStreamNamespaceDeclarations publicNamespaceDeclarations; |
358 | |
359 | struct EntityDeclaration { |
360 | XmlStringRef name; |
361 | XmlStringRef notationName; |
362 | XmlStringRef publicId; |
363 | XmlStringRef systemId; |
364 | XmlStringRef value; |
365 | bool parameter; |
366 | bool external; |
367 | inline void clear() { |
368 | name.clear(); |
369 | notationName.clear(); |
370 | publicId.clear(); |
371 | systemId.clear(); |
372 | value.clear(); |
373 | parameter = external = false; |
374 | } |
375 | }; |
376 | QXmlStreamSimpleStack<EntityDeclaration> entityDeclarations; |
377 | QXmlStreamEntityDeclarations publicEntityDeclarations; |
378 | |
379 | XmlStringRef text; |
380 | |
381 | XmlStringRef prefix, namespaceUri, qualifiedName, name; |
382 | XmlStringRef processingInstructionTarget, processingInstructionData; |
383 | XmlStringRef dtdName, dtdPublicId, dtdSystemId; |
384 | XmlStringRef documentVersion, documentEncoding; |
385 | uint isEmptyElement : 1; |
386 | uint isWhitespace : 1; |
387 | uint isCDATA : 1; |
388 | uint standalone : 1; |
389 | uint hasCheckedStartDocument : 1; |
390 | uint normalizeLiterals : 1; |
391 | uint hasSeenTag : 1; |
392 | uint inParseEntity : 1; |
393 | uint referenceToUnparsedEntityDetected : 1; |
394 | uint referenceToParameterEntityDetected : 1; |
395 | uint hasExternalDtdSubset : 1; |
396 | uint lockEncoding : 1; |
397 | uint namespaceProcessing : 1; |
398 | uint hasStandalone : 1; // TODO: expose in public API |
399 | |
400 | int resumeReduction; |
401 | void resume(int rule); |
402 | |
403 | inline bool entitiesMustBeDeclared() const { |
404 | return (!inParseEntity |
405 | && (standalone |
406 | || (!referenceToUnparsedEntityDetected |
407 | && !referenceToParameterEntityDetected // Errata 13 as of 2006-04-25 |
408 | && !hasExternalDtdSubset))); |
409 | } |
410 | |
411 | // qlalr parser |
412 | int tos; |
413 | int stack_size; |
414 | struct Value { |
415 | qsizetype pos; // offset into textBuffer |
416 | qsizetype len; // length incl. prefix (if any) |
417 | qint16 prefix; // prefix of a name (as in "prefix:name") limited to 4k in fastScanName() |
418 | ushort c; |
419 | }; |
420 | |
421 | Value *sym_stack; |
422 | int *state_stack; |
423 | inline void reallocateStack(); |
424 | inline Value &sym(int index) const |
425 | { return sym_stack[tos + index - 1]; } |
426 | QString textBuffer; |
427 | inline void clearTextBuffer() { |
428 | if (!scanDtd) { |
429 | textBuffer.resize(size: 0); |
430 | textBuffer.reserve(asize: 256); |
431 | } |
432 | } |
433 | struct Attribute { |
434 | Value key; |
435 | Value value; |
436 | }; |
437 | QXmlStreamSimpleStack<Attribute> attributeStack; |
438 | |
439 | inline XmlStringRef symString(int index) { |
440 | const Value &symbol = sym(index); |
441 | return XmlStringRef(&textBuffer, symbol.pos + symbol.prefix, symbol.len - symbol.prefix); |
442 | } |
443 | QStringView symView(int index) const |
444 | { |
445 | const Value &symbol = sym(index); |
446 | return QStringView(textBuffer.data() + symbol.pos, symbol.len).mid(pos: symbol.prefix); |
447 | } |
448 | inline XmlStringRef symName(int index) { |
449 | const Value &symbol = sym(index); |
450 | return XmlStringRef(&textBuffer, symbol.pos, symbol.len); |
451 | } |
452 | inline XmlStringRef symString(int index, int offset) { |
453 | const Value &symbol = sym(index); |
454 | return XmlStringRef(&textBuffer, symbol.pos + symbol.prefix + offset, symbol.len - symbol.prefix - offset); |
455 | } |
456 | inline XmlStringRef symPrefix(int index) { |
457 | const Value &symbol = sym(index); |
458 | if (symbol.prefix) |
459 | return XmlStringRef(&textBuffer, symbol.pos, symbol.prefix - 1); |
460 | return XmlStringRef(); |
461 | } |
462 | inline XmlStringRef symString(const Value &symbol) { |
463 | return XmlStringRef(&textBuffer, symbol.pos + symbol.prefix, symbol.len - symbol.prefix); |
464 | } |
465 | inline XmlStringRef symName(const Value &symbol) { |
466 | return XmlStringRef(&textBuffer, symbol.pos, symbol.len); |
467 | } |
468 | inline XmlStringRef symPrefix(const Value &symbol) { |
469 | if (symbol.prefix) |
470 | return XmlStringRef(&textBuffer, symbol.pos, symbol.prefix - 1); |
471 | return XmlStringRef(); |
472 | } |
473 | |
474 | inline void clearSym() { Value &val = sym(index: 1); val.pos = textBuffer.size(); val.len = 0; } |
475 | |
476 | |
477 | short token; |
478 | uint token_char; |
479 | |
480 | uint filterCarriageReturn(); |
481 | inline uint getChar(); |
482 | inline uint peekChar(); |
483 | inline void putChar(uint c) { putStack.push() = c; } |
484 | inline void putChar(QChar c) { putStack.push() = c.unicode(); } |
485 | void putString(QStringView s, qsizetype from = 0); |
486 | void putStringLiteral(QStringView s); |
487 | void putReplacement(QStringView s); |
488 | void putReplacementInAttributeValue(QStringView s); |
489 | uint getChar_helper(); |
490 | |
491 | bool scanUntil(const char *str, short tokenToInject = -1); |
492 | bool scanString(const char *str, short tokenToInject, bool requireSpace = true); |
493 | inline void injectToken(ushort tokenToInject) { |
494 | putChar(c: int(tokenToInject) << 16); |
495 | } |
496 | |
497 | QString resolveUndeclaredEntity(const QString &name); |
498 | void parseEntity(const QString &value); |
499 | std::unique_ptr<QXmlStreamReaderPrivate> entityParser; |
500 | |
501 | bool scanAfterLangleBang(); |
502 | bool scanPublicOrSystem(); |
503 | bool scanNData(); |
504 | bool scanAfterDefaultDecl(); |
505 | bool scanAttType(); |
506 | |
507 | |
508 | // scan optimization functions. Not strictly necessary but LALR is |
509 | // not very well suited for scanning fast |
510 | qsizetype fastScanLiteralContent(); |
511 | qsizetype fastScanSpace(); |
512 | qsizetype fastScanContentCharList(); |
513 | std::optional<qsizetype> fastScanName(Value *val = nullptr); |
514 | inline qsizetype fastScanNMTOKEN(); |
515 | |
516 | |
517 | bool parse(); |
518 | inline void consumeRule(int); |
519 | |
520 | void raiseError(QXmlStreamReader::Error error, const QString& message = QString()); |
521 | void raiseWellFormedError(const QString &message); |
522 | void raiseNamePrefixTooLongError(); |
523 | |
524 | QXmlStreamEntityResolver *entityResolver; |
525 | |
526 | private: |
527 | /*! \internal |
528 | Never assign to variable type directly. Instead use this function. |
529 | |
530 | This prevents errors from being ignored. |
531 | */ |
532 | inline void setType(const QXmlStreamReader::TokenType t) |
533 | { |
534 | if (type != QXmlStreamReader::Invalid) |
535 | type = t; |
536 | } |
537 | }; |
538 | |
539 | QT_END_NAMESPACE |
540 | |
541 | #endif // QXMLSTREAM_P_H |
542 | |
543 | |