1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | // |
41 | // W A R N I N G |
42 | // ------------- |
43 | // |
44 | // This file is not part of the Qt API. It exists purely as an |
45 | // implementation detail. This header file may change from version to |
46 | // version without notice, or even be removed. |
47 | // |
48 | // We mean it. |
49 | |
50 | #ifndef Patternist_XSLTTokenizer_H |
51 | #define Patternist_XSLTTokenizer_H |
52 | |
53 | #include <QQueue> |
54 | #include <QStack> |
55 | #include <QUrl> |
56 | |
57 | #include <private/qmaintainingreader_p.h> |
58 | #include <private/qreportcontext_p.h> |
59 | #include <private/qtokenizer_p.h> |
60 | #include <private/qxslttokenlookup_p.h> |
61 | |
62 | QT_BEGIN_NAMESPACE |
63 | |
64 | namespace QPatternist |
65 | { |
66 | /** |
67 | * @short A TokenSource which contains one Tokenizer::Token. |
68 | * |
69 | * One possible way to optimize this is to let SingleTokenContainer |
70 | * actually contain a list of tokens, such that XSLTTokenizer::queueToken() |
71 | * could append to that, instead of instansiating a SingleTokenContainer |
72 | * all the time. |
73 | * |
74 | * @author Frans Englich <frans.englich@nokia.com> |
75 | */ |
76 | class SingleTokenContainer : public TokenSource |
77 | { |
78 | public: |
79 | inline SingleTokenContainer(const Tokenizer::Token &token, |
80 | const XPATHLTYPE &location); |
81 | |
82 | virtual Tokenizer::Token nextToken(XPATHLTYPE *const sourceLocator); |
83 | private: |
84 | const Tokenizer::Token m_token; |
85 | const XPATHLTYPE m_location; |
86 | bool m_hasDelivered; |
87 | }; |
88 | |
89 | SingleTokenContainer::SingleTokenContainer(const Tokenizer::Token &token, |
90 | const XPATHLTYPE &location) |
91 | : m_token(token) |
92 | , m_location(location) |
93 | , m_hasDelivered(false) |
94 | { |
95 | } |
96 | |
97 | /** |
98 | * @short Tokenizes XSL-T 2.0 documents. |
99 | * |
100 | * XSLTTokenizer takes in its constructor a pointer to a QIODevice which is |
101 | * supposed to contain an XSL-T document. XSLTTokenizer then rewrites that |
102 | * document into XQuery tokens delivered via nextToken(), which the regular |
103 | * XQuery parser then reads. Hence, the XSL-T language is rewritten into |
104 | * XQuery code, slightly extended to handle the featuress specific to |
105 | * XSL-T. |
106 | * |
107 | * @author Frans Englich <frans.englich@nokia.com> |
108 | */ |
109 | class XSLTTokenizer : public Tokenizer |
110 | , private MaintainingReader<XSLTTokenLookup> |
111 | { |
112 | public: |
113 | /** |
114 | * XSLTTokenizer do not own @p queryDevice. |
115 | */ |
116 | XSLTTokenizer(QIODevice *const queryDevice, |
117 | const QUrl &location, |
118 | const ReportContext::Ptr &context, |
119 | const NamePool::Ptr &np); |
120 | |
121 | virtual Token nextToken(XPATHLTYPE *const sourceLocator); |
122 | |
123 | /** |
124 | * For XSLT we don't need this mechanism, so we do nothing. |
125 | */ |
126 | virtual int commenceScanOnly(); |
127 | |
128 | /** |
129 | * For XSLT we don't need this mechanism, so we do nothing. |
130 | */ |
131 | virtual void resumeTokenizationFrom(const int position); |
132 | |
133 | virtual void setParserContext(const ParserContext::Ptr &parseInfo); |
134 | |
135 | virtual QUrl documentURI() const |
136 | { |
137 | return queryURI(); |
138 | } |
139 | |
140 | protected: |
141 | virtual bool isAnyAttributeAllowed() const; |
142 | |
143 | private: |
144 | inline void validateElement() const; |
145 | |
146 | XPATHLTYPE currentSourceLocator() const; |
147 | |
148 | enum State |
149 | { |
150 | OutsideDocumentElement, |
151 | InsideStylesheetModule, |
152 | InsideSequenceConstructor |
153 | }; |
154 | |
155 | enum VariableType |
156 | { |
157 | FunctionParameter, |
158 | GlobalParameter, |
159 | TemplateParameter, |
160 | VariableDeclaration, |
161 | VariableInstruction, |
162 | WithParamVariable |
163 | }; |
164 | |
165 | void queueNamespaceDeclarations(TokenSource::Queue *const ts, |
166 | QStack<Token> *const target, |
167 | const bool isDeclaration = false); |
168 | |
169 | inline void queueToken(const Token &token, |
170 | TokenSource::Queue *const ts); |
171 | void queueEmptySequence(TokenSource::Queue *const to); |
172 | void queueSequenceType(const QString &expr); |
173 | /** |
174 | * If @p emptynessAllowed is @c true, the @c select attribute may |
175 | * be empty while there also is no sequence constructor. |
176 | */ |
177 | void queueSimpleContentConstructor(const ReportContext::ErrorCode code, |
178 | const bool emptynessAllowed, |
179 | TokenSource::Queue *const to, |
180 | const bool selectOnlyFirst = false); |
181 | /** |
182 | * Tokenizes and queues @p expr as if it was an attribute value |
183 | * template. |
184 | */ |
185 | void queueAVT(const QString &expr, |
186 | TokenSource::Queue *const to); |
187 | |
188 | void hasWrittenExpression(bool &beacon); |
189 | void commencingExpression(bool &hasWrittenExpression, |
190 | TokenSource::Queue *const to); |
191 | |
192 | void outsideDocumentElement(); |
193 | void insideChoose(TokenSource::Queue *const to); |
194 | void insideFunction(); |
195 | |
196 | bool attributeYesNo(const QString &localName) const; |
197 | |
198 | /** |
199 | * Scans/skips @c xsl:fallback elements only. This is the case of the |
200 | * children of @c xsl:sequence, for instance. |
201 | */ |
202 | void parseFallbacksOnly(); |
203 | |
204 | /** |
205 | * Returns true if the current element is either @c stylesheet |
206 | * or the synonym @c transform. |
207 | * |
208 | * This function assumes that m_reader is positioned at an element |
209 | * and that the namespace is XSL-T. |
210 | */ |
211 | bool isStylesheetElement() const; |
212 | |
213 | /** |
214 | * Returns true if the current element name is @p name. |
215 | * |
216 | * It is assumed that the namespace is XSL-T and that the current |
217 | * state in m_reader is either QXmlStreamReader::StartElement or |
218 | * QXmlStreamReader::EndElement. |
219 | */ |
220 | bool isElement(const NodeName &name) const; |
221 | |
222 | /** |
223 | * Queues a text constructor for @p chars, if @p chars is |
224 | * not empty. |
225 | */ |
226 | void queueTextConstructor(QString &chars, |
227 | bool &hasWrittenExpression, |
228 | TokenSource::Queue *const to); |
229 | |
230 | /** |
231 | * |
232 | * @see <a href="http://www.w3.org/TR/xslt20/#stylesheet-structure">XSL |
233 | * Transformations (XSLT) Version 2, 3.6 Stylesheet Element</a> |
234 | */ |
235 | void insideStylesheetModule(); |
236 | void insideTemplate(); |
237 | |
238 | /** |
239 | * Takes @p expr for an XPath expression, and pushes the necessary |
240 | * things for having it delivered as a stream of token, appropriate |
241 | * for Effective Boolean Value parsing. |
242 | */ |
243 | void queueExpression(const QString &expr, |
244 | TokenSource::Queue *const to, |
245 | const bool wrapWithParantheses = true); |
246 | |
247 | void skipBodyOfParam(const ReportContext::ErrorCode code); |
248 | |
249 | void queueParams(const NodeName parentName, |
250 | TokenSource::Queue *const to); |
251 | |
252 | /** |
253 | * Used for @c xsl:apply-templates and @c xsl:call-templates. |
254 | */ |
255 | void queueWithParams(const NodeName parentName, |
256 | TokenSource::Queue *const to, |
257 | const bool initialAdvance = true); |
258 | |
259 | /** |
260 | * Queues an @c xsl:variable declaration. If @p isInstruction is @c |
261 | * true, it is assumed to be a an instruction, otherwise a top-level |
262 | * declaration element. |
263 | */ |
264 | void queueVariableDeclaration(const VariableType variableType, |
265 | TokenSource::Queue *const to); |
266 | |
267 | /** |
268 | * Skips the current sub-tree. |
269 | * |
270 | * If text nodes that aren't strippable whitespace, or elements are |
271 | * encountered, @c true is returned, otherwise @c false. |
272 | * |
273 | * If @p exitOnContent is @c true, this function exits immediately |
274 | * if content is encountered for which it would return @c false. |
275 | */ |
276 | bool skipSubTree(const bool exitOnContent = false); |
277 | |
278 | /** |
279 | * Queues the necessary tokens for the expression that is either |
280 | * supplied using a @c select attribute or a sequence constructor, |
281 | * while doing the necessary error handling for ensuring they are |
282 | * mutually exclusive. |
283 | * |
284 | * It is assumed that the current state of m_reader is |
285 | * QXmlStreamReader::StartElement, or that the attributes for the |
286 | * element is supplied through @p atts. This function advances m_reader |
287 | * up until the corresponding QXmlStreamReader::EndElement. |
288 | * |
289 | * If @p emptynessAllowed is @c false, the element must either have a |
290 | * sequence constructor or a @c select attribute. If @c true, both may |
291 | * be absent. |
292 | * |
293 | * Returns @c true if the queued expression was supplied through the |
294 | * @c select attribute otherwise @c false. |
295 | */ |
296 | bool queueSelectOrSequenceConstructor(const ReportContext::ErrorCode code, |
297 | const bool emptynessAllowed, |
298 | TokenSource::Queue *const to, |
299 | const QXmlStreamAttributes *const atts = 0, |
300 | const bool queueEmptyOnEmpty = true); |
301 | |
302 | /** |
303 | * If @p initialAdvance is @c true, insideSequenceConstructor() will |
304 | * advance m_reader, otherwise it won't. Not doing so is useful |
305 | * when the caller is already inside a sequence constructor. |
306 | * |
307 | * Returns @c true if a sequence constructor was found and queued. |
308 | * Returns @c false if none was found, and the empty sequence was |
309 | * synthesized. |
310 | */ |
311 | bool insideSequenceConstructor(TokenSource::Queue *const to, |
312 | const bool initialAdvance = true, |
313 | const bool queueEmptyOnEmpty = true); |
314 | |
315 | bool insideSequenceConstructor(TokenSource::Queue *const to, |
316 | QStack<Token> &queueOnExit, |
317 | const bool initialAdvance = true, |
318 | const bool queueEmptyOnEmpty = true); |
319 | |
320 | void insideAttributeSet(); |
321 | void pushState(const State nextState); |
322 | void leaveState(); |
323 | |
324 | /** |
325 | * @short Handles @c xml:space and standard attributes. |
326 | * |
327 | * If @p isXSLTElement is @c true, the current element is an XSL-T |
328 | * element, as opposed to a Literal Result Element. |
329 | * |
330 | * handleStandardAttributes() must be called before validateElement(), |
331 | * because the former determines the version in use, and |
332 | * validateElement() depends on that. |
333 | * |
334 | * The core of this function can't be run many times because it pushes |
335 | * whitespace handling onto m_stripWhitespace. |
336 | * m_hasHandledStandardAttributes protects helping against this. |
337 | * |
338 | * @see validateElement() |
339 | * @see <a href="http://www.w3.org/TR/xslt20/#standard-attributes">XSL |
340 | * Transformations (XSLT) Version 2.0, 3.5 Standard Attributes</a> |
341 | */ |
342 | void handleStandardAttributes(const bool isXSLTElement); |
343 | |
344 | /** |
345 | * @short Sends the tokens in @p source to @p destination. |
346 | */ |
347 | inline void queueOnExit(QStack<Token> &source, |
348 | TokenSource::Queue *const destination); |
349 | |
350 | /** |
351 | * Handles the @c type and @c validation attribute on instructions and |
352 | * literal result elements. |
353 | * |
354 | * @p isLRE should be true if the current element is not in the XSL-T |
355 | * namespace, that is if it's a Literal Result Element. |
356 | * |
357 | * @see <a href="http://www.w3.org/TR/xslt20/#validation">XSL |
358 | * Transformations (XSLT) Version 2.0, 19.2 Validation</a> |
359 | */ |
360 | void handleValidationAttributes(const bool isLRE) const; |
361 | |
362 | void unexpectedContent(const ReportContext::ErrorCode code = ReportContext::XTSE0010) const; |
363 | |
364 | void checkForParseError() const; |
365 | |
366 | inline void startStorageOfCurrent(TokenSource::Queue *const to); |
367 | inline void endStorageOfCurrent(TokenSource::Queue *const to); |
368 | |
369 | /** |
370 | * Checks that @p attribute has a value in accordance with what |
371 | * is allowed and supported. |
372 | */ |
373 | void handleXSLTVersion(TokenSource::Queue *const to, |
374 | QStack<Token> *const queueOnExit, |
375 | const bool isXSLTElement, |
376 | const QXmlStreamAttributes *atts = 0, |
377 | const bool generateCode = true, |
378 | const bool setGlobalVersion = false); |
379 | |
380 | /** |
381 | * @short Generates code for reflecting @c xml:base attributes. |
382 | */ |
383 | void handleXMLBase(TokenSource::Queue *const to, |
384 | QStack<Token> *const queueOnExit, |
385 | const bool isInstruction = true, |
386 | const QXmlStreamAttributes *atts = 0); |
387 | |
388 | /** |
389 | * Concatenates text nodes, ignores comments and processing |
390 | * instructions, and raises errors on everything else. |
391 | * |
392 | * Hence, similar to QXmlStreamReader::readElementText(), except |
393 | * for error handling. |
394 | */ |
395 | QString readElementText(); |
396 | |
397 | /** |
398 | * Tokenizes and validate xsl:sort statements, if any, until |
399 | * other content is encountered. The produced tokens are returned |
400 | * in a list. |
401 | * |
402 | * If @p oneSortRequired, at least one @c sort element must appear, |
403 | * otherwise an error is raised. |
404 | * |
405 | * If @p speciallyTreatWhitespace whitespace will be treated as if it |
406 | * was one of the elements mentioned in step 4 in section 4.2 Stripping |
407 | * Whitespace from the Stylesheet. |
408 | */ |
409 | void queueSorting(const bool oneSortRequired, |
410 | TokenSource::Queue *const to, |
411 | const bool speciallyTreatWhitespace = false); |
412 | |
413 | static ElementDescription<XSLTTokenLookup>::Hash createElementDescriptions(); |
414 | static QHash<QString, int> createValidationAlternatives(); |
415 | static QSet<NodeName> createStandardAttributes(); |
416 | |
417 | /** |
418 | * Reads the attribute by name @p attributeName, and returns @c true if |
419 | * its value is @p isTrue, @c false if it is @p isFalse, and raise an |
420 | * error otherwise. |
421 | */ |
422 | bool readToggleAttribute(const QString &attributeName, |
423 | const QString &isTrue, |
424 | const QString &isFalse, |
425 | const QXmlStreamAttributes *const atts = 0) const; |
426 | |
427 | int readAlternativeAttribute(const QHash<QString, int> &alternatives, |
428 | const QXmlStreamAttribute &attr) const; |
429 | |
430 | /** |
431 | * Returns @c true if the current text node can be skipped without |
432 | * it leading to a validation error, with respect to whitespace. |
433 | */ |
434 | inline bool whitespaceToSkip() const; |
435 | |
436 | const QUrl m_location; |
437 | const NamePool::Ptr m_namePool; |
438 | QStack<State> m_state; |
439 | TokenSource::Queue m_tokenSource; |
440 | |
441 | enum ProcessMode |
442 | { |
443 | BackwardsCompatible, |
444 | ForwardCompatible, |
445 | NormalProcessing |
446 | }; |
447 | |
448 | /** |
449 | * Whether we're processing in Forwards-Compatible or |
450 | * Backwards-Compatible mode. |
451 | * |
452 | * This is set by handleStandardAttributes(). |
453 | * |
454 | * ParserContext have similar information in |
455 | * ParserContext::isBackwardsCompat. A big distinction is that both the |
456 | * tokenizer and the parser buffer tokens and have positions disjoint |
457 | * to each other. E.g, the state the parser has when reducing into |
458 | * non-terminals, is different from the tokenizer's. |
459 | */ |
460 | QStack<ProcessMode> m_processingMode; |
461 | |
462 | /** |
463 | * Returns @c true if the current state in m_reader is in the XSLT |
464 | * namespace. It is assumed that the current state is an element. |
465 | */ |
466 | inline bool isXSLT() const; |
467 | |
468 | const QHash<QString, int> m_validationAlternatives; |
469 | |
470 | ParserContext::Ptr m_parseInfo; |
471 | }; |
472 | } |
473 | |
474 | QT_END_NAMESPACE |
475 | |
476 | #endif |
477 | |