1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtXmlPatterns module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40//
41// W A R N I N G
42// -------------
43//
44// This file is not part of the Qt API. It exists purely as an
45// implementation detail. This header file may change from version to
46// version without notice, or even be removed.
47//
48// We mean it.
49
50#ifndef Patternist_XSLTTokenizer_H
51#define Patternist_XSLTTokenizer_H
52
53#include <QQueue>
54#include <QStack>
55#include <QUrl>
56
57#include <private/qmaintainingreader_p.h>
58#include <private/qreportcontext_p.h>
59#include <private/qtokenizer_p.h>
60#include <private/qxslttokenlookup_p.h>
61
62QT_BEGIN_NAMESPACE
63
64namespace QPatternist
65{
66 /**
67 * @short A TokenSource which contains one Tokenizer::Token.
68 *
69 * One possible way to optimize this is to let SingleTokenContainer
70 * actually contain a list of tokens, such that XSLTTokenizer::queueToken()
71 * could append to that, instead of instansiating a SingleTokenContainer
72 * all the time.
73 *
74 * @author Frans Englich <frans.englich@nokia.com>
75 */
76 class SingleTokenContainer : public TokenSource
77 {
78 public:
79 inline SingleTokenContainer(const Tokenizer::Token &token,
80 const XPATHLTYPE &location);
81
82 virtual Tokenizer::Token nextToken(XPATHLTYPE *const sourceLocator);
83 private:
84 const Tokenizer::Token m_token;
85 const XPATHLTYPE m_location;
86 bool m_hasDelivered;
87 };
88
89 SingleTokenContainer::SingleTokenContainer(const Tokenizer::Token &token,
90 const XPATHLTYPE &location)
91 : m_token(token)
92 , m_location(location)
93 , m_hasDelivered(false)
94 {
95 }
96
97 /**
98 * @short Tokenizes XSL-T 2.0 documents.
99 *
100 * XSLTTokenizer takes in its constructor a pointer to a QIODevice which is
101 * supposed to contain an XSL-T document. XSLTTokenizer then rewrites that
102 * document into XQuery tokens delivered via nextToken(), which the regular
103 * XQuery parser then reads. Hence, the XSL-T language is rewritten into
104 * XQuery code, slightly extended to handle the featuress specific to
105 * XSL-T.
106 *
107 * @author Frans Englich <frans.englich@nokia.com>
108 */
109 class XSLTTokenizer : public Tokenizer
110 , private MaintainingReader<XSLTTokenLookup>
111 {
112 public:
113 /**
114 * XSLTTokenizer do not own @p queryDevice.
115 */
116 XSLTTokenizer(QIODevice *const queryDevice,
117 const QUrl &location,
118 const ReportContext::Ptr &context,
119 const NamePool::Ptr &np);
120
121 virtual Token nextToken(XPATHLTYPE *const sourceLocator);
122
123 /**
124 * For XSLT we don't need this mechanism, so we do nothing.
125 */
126 virtual int commenceScanOnly();
127
128 /**
129 * For XSLT we don't need this mechanism, so we do nothing.
130 */
131 virtual void resumeTokenizationFrom(const int position);
132
133 virtual void setParserContext(const ParserContext::Ptr &parseInfo);
134
135 virtual QUrl documentURI() const
136 {
137 return queryURI();
138 }
139
140 protected:
141 virtual bool isAnyAttributeAllowed() const;
142
143 private:
144 inline void validateElement() const;
145
146 XPATHLTYPE currentSourceLocator() const;
147
148 enum State
149 {
150 OutsideDocumentElement,
151 InsideStylesheetModule,
152 InsideSequenceConstructor
153 };
154
155 enum VariableType
156 {
157 FunctionParameter,
158 GlobalParameter,
159 TemplateParameter,
160 VariableDeclaration,
161 VariableInstruction,
162 WithParamVariable
163 };
164
165 void queueNamespaceDeclarations(TokenSource::Queue *const ts,
166 QStack<Token> *const target,
167 const bool isDeclaration = false);
168
169 inline void queueToken(const Token &token,
170 TokenSource::Queue *const ts);
171 void queueEmptySequence(TokenSource::Queue *const to);
172 void queueSequenceType(const QString &expr);
173 /**
174 * If @p emptynessAllowed is @c true, the @c select attribute may
175 * be empty while there also is no sequence constructor.
176 */
177 void queueSimpleContentConstructor(const ReportContext::ErrorCode code,
178 const bool emptynessAllowed,
179 TokenSource::Queue *const to,
180 const bool selectOnlyFirst = false);
181 /**
182 * Tokenizes and queues @p expr as if it was an attribute value
183 * template.
184 */
185 void queueAVT(const QString &expr,
186 TokenSource::Queue *const to);
187
188 void hasWrittenExpression(bool &beacon);
189 void commencingExpression(bool &hasWrittenExpression,
190 TokenSource::Queue *const to);
191
192 void outsideDocumentElement();
193 void insideChoose(TokenSource::Queue *const to);
194 void insideFunction();
195
196 bool attributeYesNo(const QString &localName) const;
197
198 /**
199 * Scans/skips @c xsl:fallback elements only. This is the case of the
200 * children of @c xsl:sequence, for instance.
201 */
202 void parseFallbacksOnly();
203
204 /**
205 * Returns true if the current element is either @c stylesheet
206 * or the synonym @c transform.
207 *
208 * This function assumes that m_reader is positioned at an element
209 * and that the namespace is XSL-T.
210 */
211 bool isStylesheetElement() const;
212
213 /**
214 * Returns true if the current element name is @p name.
215 *
216 * It is assumed that the namespace is XSL-T and that the current
217 * state in m_reader is either QXmlStreamReader::StartElement or
218 * QXmlStreamReader::EndElement.
219 */
220 bool isElement(const NodeName &name) const;
221
222 /**
223 * Queues a text constructor for @p chars, if @p chars is
224 * not empty.
225 */
226 void queueTextConstructor(QString &chars,
227 bool &hasWrittenExpression,
228 TokenSource::Queue *const to);
229
230 /**
231 *
232 * @see <a href="http://www.w3.org/TR/xslt20/#stylesheet-structure">XSL
233 * Transformations (XSLT) Version 2, 3.6 Stylesheet Element</a>
234 */
235 void insideStylesheetModule();
236 void insideTemplate();
237
238 /**
239 * Takes @p expr for an XPath expression, and pushes the necessary
240 * things for having it delivered as a stream of token, appropriate
241 * for Effective Boolean Value parsing.
242 */
243 void queueExpression(const QString &expr,
244 TokenSource::Queue *const to,
245 const bool wrapWithParantheses = true);
246
247 void skipBodyOfParam(const ReportContext::ErrorCode code);
248
249 void queueParams(const NodeName parentName,
250 TokenSource::Queue *const to);
251
252 /**
253 * Used for @c xsl:apply-templates and @c xsl:call-templates.
254 */
255 void queueWithParams(const NodeName parentName,
256 TokenSource::Queue *const to,
257 const bool initialAdvance = true);
258
259 /**
260 * Queues an @c xsl:variable declaration. If @p isInstruction is @c
261 * true, it is assumed to be a an instruction, otherwise a top-level
262 * declaration element.
263 */
264 void queueVariableDeclaration(const VariableType variableType,
265 TokenSource::Queue *const to);
266
267 /**
268 * Skips the current sub-tree.
269 *
270 * If text nodes that aren't strippable whitespace, or elements are
271 * encountered, @c true is returned, otherwise @c false.
272 *
273 * If @p exitOnContent is @c true, this function exits immediately
274 * if content is encountered for which it would return @c false.
275 */
276 bool skipSubTree(const bool exitOnContent = false);
277
278 /**
279 * Queues the necessary tokens for the expression that is either
280 * supplied using a @c select attribute or a sequence constructor,
281 * while doing the necessary error handling for ensuring they are
282 * mutually exclusive.
283 *
284 * It is assumed that the current state of m_reader is
285 * QXmlStreamReader::StartElement, or that the attributes for the
286 * element is supplied through @p atts. This function advances m_reader
287 * up until the corresponding QXmlStreamReader::EndElement.
288 *
289 * If @p emptynessAllowed is @c false, the element must either have a
290 * sequence constructor or a @c select attribute. If @c true, both may
291 * be absent.
292 *
293 * Returns @c true if the queued expression was supplied through the
294 * @c select attribute otherwise @c false.
295 */
296 bool queueSelectOrSequenceConstructor(const ReportContext::ErrorCode code,
297 const bool emptynessAllowed,
298 TokenSource::Queue *const to,
299 const QXmlStreamAttributes *const atts = 0,
300 const bool queueEmptyOnEmpty = true);
301
302 /**
303 * If @p initialAdvance is @c true, insideSequenceConstructor() will
304 * advance m_reader, otherwise it won't. Not doing so is useful
305 * when the caller is already inside a sequence constructor.
306 *
307 * Returns @c true if a sequence constructor was found and queued.
308 * Returns @c false if none was found, and the empty sequence was
309 * synthesized.
310 */
311 bool insideSequenceConstructor(TokenSource::Queue *const to,
312 const bool initialAdvance = true,
313 const bool queueEmptyOnEmpty = true);
314
315 bool insideSequenceConstructor(TokenSource::Queue *const to,
316 QStack<Token> &queueOnExit,
317 const bool initialAdvance = true,
318 const bool queueEmptyOnEmpty = true);
319
320 void insideAttributeSet();
321 void pushState(const State nextState);
322 void leaveState();
323
324 /**
325 * @short Handles @c xml:space and standard attributes.
326 *
327 * If @p isXSLTElement is @c true, the current element is an XSL-T
328 * element, as opposed to a Literal Result Element.
329 *
330 * handleStandardAttributes() must be called before validateElement(),
331 * because the former determines the version in use, and
332 * validateElement() depends on that.
333 *
334 * The core of this function can't be run many times because it pushes
335 * whitespace handling onto m_stripWhitespace.
336 * m_hasHandledStandardAttributes protects helping against this.
337 *
338 * @see validateElement()
339 * @see <a href="http://www.w3.org/TR/xslt20/#standard-attributes">XSL
340 * Transformations (XSLT) Version 2.0, 3.5 Standard Attributes</a>
341 */
342 void handleStandardAttributes(const bool isXSLTElement);
343
344 /**
345 * @short Sends the tokens in @p source to @p destination.
346 */
347 inline void queueOnExit(QStack<Token> &source,
348 TokenSource::Queue *const destination);
349
350 /**
351 * Handles the @c type and @c validation attribute on instructions and
352 * literal result elements.
353 *
354 * @p isLRE should be true if the current element is not in the XSL-T
355 * namespace, that is if it's a Literal Result Element.
356 *
357 * @see <a href="http://www.w3.org/TR/xslt20/#validation">XSL
358 * Transformations (XSLT) Version 2.0, 19.2 Validation</a>
359 */
360 void handleValidationAttributes(const bool isLRE) const;
361
362 void unexpectedContent(const ReportContext::ErrorCode code = ReportContext::XTSE0010) const;
363
364 void checkForParseError() const;
365
366 inline void startStorageOfCurrent(TokenSource::Queue *const to);
367 inline void endStorageOfCurrent(TokenSource::Queue *const to);
368
369 /**
370 * Checks that @p attribute has a value in accordance with what
371 * is allowed and supported.
372 */
373 void handleXSLTVersion(TokenSource::Queue *const to,
374 QStack<Token> *const queueOnExit,
375 const bool isXSLTElement,
376 const QXmlStreamAttributes *atts = 0,
377 const bool generateCode = true,
378 const bool setGlobalVersion = false);
379
380 /**
381 * @short Generates code for reflecting @c xml:base attributes.
382 */
383 void handleXMLBase(TokenSource::Queue *const to,
384 QStack<Token> *const queueOnExit,
385 const bool isInstruction = true,
386 const QXmlStreamAttributes *atts = 0);
387
388 /**
389 * Concatenates text nodes, ignores comments and processing
390 * instructions, and raises errors on everything else.
391 *
392 * Hence, similar to QXmlStreamReader::readElementText(), except
393 * for error handling.
394 */
395 QString readElementText();
396
397 /**
398 * Tokenizes and validate xsl:sort statements, if any, until
399 * other content is encountered. The produced tokens are returned
400 * in a list.
401 *
402 * If @p oneSortRequired, at least one @c sort element must appear,
403 * otherwise an error is raised.
404 *
405 * If @p speciallyTreatWhitespace whitespace will be treated as if it
406 * was one of the elements mentioned in step 4 in section 4.2 Stripping
407 * Whitespace from the Stylesheet.
408 */
409 void queueSorting(const bool oneSortRequired,
410 TokenSource::Queue *const to,
411 const bool speciallyTreatWhitespace = false);
412
413 static ElementDescription<XSLTTokenLookup>::Hash createElementDescriptions();
414 static QHash<QString, int> createValidationAlternatives();
415 static QSet<NodeName> createStandardAttributes();
416
417 /**
418 * Reads the attribute by name @p attributeName, and returns @c true if
419 * its value is @p isTrue, @c false if it is @p isFalse, and raise an
420 * error otherwise.
421 */
422 bool readToggleAttribute(const QString &attributeName,
423 const QString &isTrue,
424 const QString &isFalse,
425 const QXmlStreamAttributes *const atts = 0) const;
426
427 int readAlternativeAttribute(const QHash<QString, int> &alternatives,
428 const QXmlStreamAttribute &attr) const;
429
430 /**
431 * Returns @c true if the current text node can be skipped without
432 * it leading to a validation error, with respect to whitespace.
433 */
434 inline bool whitespaceToSkip() const;
435
436 const QUrl m_location;
437 const NamePool::Ptr m_namePool;
438 QStack<State> m_state;
439 TokenSource::Queue m_tokenSource;
440
441 enum ProcessMode
442 {
443 BackwardsCompatible,
444 ForwardCompatible,
445 NormalProcessing
446 };
447
448 /**
449 * Whether we're processing in Forwards-Compatible or
450 * Backwards-Compatible mode.
451 *
452 * This is set by handleStandardAttributes().
453 *
454 * ParserContext have similar information in
455 * ParserContext::isBackwardsCompat. A big distinction is that both the
456 * tokenizer and the parser buffer tokens and have positions disjoint
457 * to each other. E.g, the state the parser has when reducing into
458 * non-terminals, is different from the tokenizer's.
459 */
460 QStack<ProcessMode> m_processingMode;
461
462 /**
463 * Returns @c true if the current state in m_reader is in the XSLT
464 * namespace. It is assumed that the current state is an element.
465 */
466 inline bool isXSLT() const;
467
468 const QHash<QString, int> m_validationAlternatives;
469
470 ParserContext::Ptr m_parseInfo;
471 };
472}
473
474QT_END_NAMESPACE
475
476#endif
477

source code of qtxmlpatterns/src/xmlpatterns/parser/qxslttokenizer_p.h