| 1 | /**************************************************************************** | 
| 2 | ** | 
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. | 
| 4 | ** Contact: https://www.qt.io/licensing/ | 
| 5 | ** | 
| 6 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit. | 
| 7 | ** | 
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ | 
| 9 | ** Commercial License Usage | 
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in | 
| 11 | ** accordance with the commercial license agreement provided with the | 
| 12 | ** Software or, alternatively, in accordance with the terms contained in | 
| 13 | ** a written agreement between you and The Qt Company. For licensing terms | 
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further | 
| 15 | ** information use the contact form at https://www.qt.io/contact-us. | 
| 16 | ** | 
| 17 | ** GNU Lesser General Public License Usage | 
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser | 
| 19 | ** General Public License version 3 as published by the Free Software | 
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the | 
| 21 | ** packaging of this file. Please review the following information to | 
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements | 
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. | 
| 24 | ** | 
| 25 | ** GNU General Public License Usage | 
| 26 | ** Alternatively, this file may be used under the terms of the GNU | 
| 27 | ** General Public License version 2.0 or (at your option) the GNU General | 
| 28 | ** Public license version 3 or any later version approved by the KDE Free | 
| 29 | ** Qt Foundation. The licenses are as published by the Free Software | 
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 | 
| 31 | ** included in the packaging of this file. Please review the following | 
| 32 | ** information to ensure the GNU General Public License requirements will | 
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and | 
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. | 
| 35 | ** | 
| 36 | ** $QT_END_LICENSE$ | 
| 37 | ** | 
| 38 | ****************************************************************************/ | 
| 39 |  | 
| 40 | // | 
| 41 | //  W A R N I N G | 
| 42 | //  ------------- | 
| 43 | // | 
| 44 | // This file is not part of the Qt API.  It exists purely as an | 
| 45 | // implementation detail.  This header file may change from version to | 
| 46 | // version without notice, or even be removed. | 
| 47 | // | 
| 48 | // We mean it. | 
| 49 |  | 
| 50 | #ifndef Patternist_XSLTTokenizer_H | 
| 51 | #define Patternist_XSLTTokenizer_H | 
| 52 |  | 
| 53 | #include <QQueue> | 
| 54 | #include <QStack> | 
| 55 | #include <QUrl> | 
| 56 |  | 
| 57 | #include <private/qmaintainingreader_p.h> | 
| 58 | #include <private/qreportcontext_p.h> | 
| 59 | #include <private/qtokenizer_p.h> | 
| 60 | #include <private/qxslttokenlookup_p.h> | 
| 61 |  | 
| 62 | QT_BEGIN_NAMESPACE | 
| 63 |  | 
| 64 | namespace QPatternist | 
| 65 | { | 
| 66 |     /** | 
| 67 |      * @short A TokenSource which contains one Tokenizer::Token. | 
| 68 |      * | 
| 69 |      * One possible way to optimize this is to let SingleTokenContainer | 
| 70 |      * actually contain a list of tokens, such that XSLTTokenizer::queueToken() | 
| 71 |      * could append to that, instead of instansiating a SingleTokenContainer | 
| 72 |      * all the time. | 
| 73 |      * | 
| 74 |      * @author Frans Englich <frans.englich@nokia.com> | 
| 75 |      */ | 
| 76 |     class SingleTokenContainer : public TokenSource | 
| 77 |     { | 
| 78 |     public: | 
| 79 |         inline SingleTokenContainer(const Tokenizer::Token &token, | 
| 80 |                                     const XPATHLTYPE &location); | 
| 81 |  | 
| 82 |         virtual Tokenizer::Token nextToken(XPATHLTYPE *const sourceLocator); | 
| 83 |     private: | 
| 84 |         const Tokenizer::Token m_token; | 
| 85 |         const XPATHLTYPE       m_location; | 
| 86 |         bool                   m_hasDelivered; | 
| 87 |     }; | 
| 88 |  | 
| 89 |     SingleTokenContainer::SingleTokenContainer(const Tokenizer::Token &token, | 
| 90 |                                                const XPATHLTYPE &location) | 
| 91 |         : m_token(token) | 
| 92 |         , m_location(location) | 
| 93 |         , m_hasDelivered(false) | 
| 94 |     { | 
| 95 |     } | 
| 96 |  | 
| 97 |     /** | 
| 98 |      * @short Tokenizes XSL-T 2.0 documents. | 
| 99 |      * | 
| 100 |      * XSLTTokenizer takes in its constructor a pointer to a QIODevice which is | 
| 101 |      * supposed to contain an XSL-T document. XSLTTokenizer then rewrites that | 
| 102 |      * document into XQuery tokens delivered via nextToken(), which the regular | 
| 103 |      * XQuery parser then reads. Hence, the XSL-T language is rewritten into | 
| 104 |      * XQuery code, slightly extended to handle the featuress specific to | 
| 105 |      * XSL-T. | 
| 106 |      * | 
| 107 |      * @author Frans Englich <frans.englich@nokia.com> | 
| 108 |      */ | 
| 109 |     class XSLTTokenizer : public Tokenizer | 
| 110 |                         , private MaintainingReader<XSLTTokenLookup> | 
| 111 |     { | 
| 112 |     public: | 
| 113 |         /** | 
| 114 |          * XSLTTokenizer do not own @p queryDevice. | 
| 115 |          */ | 
| 116 |         XSLTTokenizer(QIODevice *const queryDevice, | 
| 117 |                       const QUrl &location, | 
| 118 |                       const ReportContext::Ptr &context, | 
| 119 |                       const NamePool::Ptr &np); | 
| 120 |  | 
| 121 |         virtual Token nextToken(XPATHLTYPE *const sourceLocator); | 
| 122 |  | 
| 123 |         /** | 
| 124 |          * For XSLT we don't need this mechanism, so we do nothing. | 
| 125 |          */ | 
| 126 |         virtual int commenceScanOnly(); | 
| 127 |  | 
| 128 |         /** | 
| 129 |          * For XSLT we don't need this mechanism, so we do nothing. | 
| 130 |          */ | 
| 131 |         virtual void resumeTokenizationFrom(const int position); | 
| 132 |  | 
| 133 |         virtual void setParserContext(const ParserContext::Ptr &parseInfo); | 
| 134 |  | 
| 135 |         virtual QUrl documentURI() const | 
| 136 |         { | 
| 137 |             return queryURI(); | 
| 138 |         } | 
| 139 |  | 
| 140 |     protected: | 
| 141 |         virtual bool isAnyAttributeAllowed() const; | 
| 142 |  | 
| 143 |     private: | 
| 144 |         inline void validateElement() const; | 
| 145 |  | 
| 146 |         XPATHLTYPE currentSourceLocator() const; | 
| 147 |  | 
| 148 |         enum State | 
| 149 |         { | 
| 150 |             OutsideDocumentElement, | 
| 151 |             InsideStylesheetModule, | 
| 152 |             InsideSequenceConstructor | 
| 153 |         }; | 
| 154 |  | 
| 155 |         enum VariableType | 
| 156 |         { | 
| 157 |             FunctionParameter, | 
| 158 |             GlobalParameter, | 
| 159 |             TemplateParameter, | 
| 160 |             VariableDeclaration, | 
| 161 |             VariableInstruction, | 
| 162 |             WithParamVariable | 
| 163 |         }; | 
| 164 |  | 
| 165 |         void queueNamespaceDeclarations(TokenSource::Queue *const ts, | 
| 166 |                                         QStack<Token> *const target, | 
| 167 |                                         const bool isDeclaration = false); | 
| 168 |  | 
| 169 |         inline void queueToken(const Token &token, | 
| 170 |                                TokenSource::Queue *const ts); | 
| 171 |         void queueEmptySequence(TokenSource::Queue *const to); | 
| 172 |         void queueSequenceType(const QString &expr); | 
| 173 |         /** | 
| 174 |          * If @p emptynessAllowed is @c true, the @c select attribute may | 
| 175 |          * be empty while there also is no sequence constructor. | 
| 176 |          */ | 
| 177 |         void queueSimpleContentConstructor(const ReportContext::ErrorCode code, | 
| 178 |                                            const bool emptynessAllowed, | 
| 179 |                                            TokenSource::Queue *const to, | 
| 180 |                                            const bool selectOnlyFirst = false); | 
| 181 |         /** | 
| 182 |          * Tokenizes and queues @p expr as if it was an attribute value | 
| 183 |          * template. | 
| 184 |          */ | 
| 185 |         void queueAVT(const QString &expr, | 
| 186 |                       TokenSource::Queue *const to); | 
| 187 |  | 
| 188 |         void hasWrittenExpression(bool &beacon); | 
| 189 |         void commencingExpression(bool &hasWrittenExpression, | 
| 190 |                                   TokenSource::Queue *const to); | 
| 191 |  | 
| 192 |         void outsideDocumentElement(); | 
| 193 |         void insideChoose(TokenSource::Queue *const to); | 
| 194 |         void insideFunction(); | 
| 195 |  | 
| 196 |         bool attributeYesNo(const QString &localName) const; | 
| 197 |  | 
| 198 |         /** | 
| 199 |          * Scans/skips @c xsl:fallback elements only. This is the case of the | 
| 200 |          * children of @c xsl:sequence, for instance. | 
| 201 |          */ | 
| 202 |         void parseFallbacksOnly(); | 
| 203 |  | 
| 204 |         /** | 
| 205 |          * Returns true if the current element is either @c stylesheet | 
| 206 |          * or the synonym @c transform. | 
| 207 |          * | 
| 208 |          * This function assumes that m_reader is positioned at an element | 
| 209 |          * and that the namespace is XSL-T. | 
| 210 |          */ | 
| 211 |         bool isStylesheetElement() const; | 
| 212 |  | 
| 213 |         /** | 
| 214 |          * Returns true if the current element name is @p name. | 
| 215 |          * | 
| 216 |          * It is assumed that the namespace is XSL-T and that the current | 
| 217 |          * state in m_reader is either QXmlStreamReader::StartElement or | 
| 218 |          * QXmlStreamReader::EndElement. | 
| 219 |          */ | 
| 220 |         bool isElement(const NodeName &name) const; | 
| 221 |  | 
| 222 |         /** | 
| 223 |          * Queues a text constructor for @p chars, if @p chars is | 
| 224 |          * not empty. | 
| 225 |          */ | 
| 226 |         void queueTextConstructor(QString &chars, | 
| 227 |                                   bool &hasWrittenExpression, | 
| 228 |                                   TokenSource::Queue *const to); | 
| 229 |  | 
| 230 |         /** | 
| 231 |          * | 
| 232 |          * @see <a href="http://www.w3.org/TR/xslt20/#stylesheet-structure">XSL | 
| 233 |          * Transformations (XSLT) Version 2, 3.6 Stylesheet Element</a> | 
| 234 |          */ | 
| 235 |         void insideStylesheetModule(); | 
| 236 |         void insideTemplate(); | 
| 237 |  | 
| 238 |         /** | 
| 239 |          * Takes @p expr for an XPath expression, and pushes the necessary | 
| 240 |          * things for having it delivered as a stream of token, appropriate | 
| 241 |          * for Effective Boolean Value parsing. | 
| 242 |          */ | 
| 243 |         void queueExpression(const QString &expr, | 
| 244 |                              TokenSource::Queue *const to, | 
| 245 |                              const bool wrapWithParantheses = true); | 
| 246 |  | 
| 247 |         void skipBodyOfParam(const ReportContext::ErrorCode code); | 
| 248 |  | 
| 249 |         void queueParams(const NodeName parentName, | 
| 250 |                          TokenSource::Queue *const to); | 
| 251 |  | 
| 252 |         /** | 
| 253 |          * Used for @c xsl:apply-templates and @c xsl:call-templates. | 
| 254 |          */ | 
| 255 |         void queueWithParams(const NodeName parentName, | 
| 256 |                              TokenSource::Queue *const to, | 
| 257 |                              const bool initialAdvance = true); | 
| 258 |  | 
| 259 |         /** | 
| 260 |          * Queues an @c xsl:variable declaration. If @p isInstruction is @c | 
| 261 |          * true, it is assumed to be a an instruction, otherwise a top-level | 
| 262 |          * declaration element. | 
| 263 |          */ | 
| 264 |         void queueVariableDeclaration(const VariableType variableType, | 
| 265 |                                       TokenSource::Queue *const to); | 
| 266 |  | 
| 267 |         /** | 
| 268 |          * Skips the current sub-tree. | 
| 269 |          * | 
| 270 |          * If text nodes that aren't strippable whitespace, or elements are | 
| 271 |          * encountered, @c true is returned, otherwise @c false. | 
| 272 |          * | 
| 273 |          * If @p exitOnContent is @c true, this function exits immediately | 
| 274 |          * if content is encountered for which it would return @c false. | 
| 275 |          */ | 
| 276 |         bool skipSubTree(const bool exitOnContent = false); | 
| 277 |  | 
| 278 |         /** | 
| 279 |          * Queues the necessary tokens for the expression that is either | 
| 280 |          * supplied using a @c select attribute or a sequence constructor, | 
| 281 |          * while doing the necessary error handling for ensuring they are | 
| 282 |          * mutually exclusive. | 
| 283 |          * | 
| 284 |          * It is assumed that the current state of m_reader is | 
| 285 |          * QXmlStreamReader::StartElement, or that the attributes for the | 
| 286 |          * element is supplied through @p atts. This function advances m_reader | 
| 287 |          * up until the corresponding QXmlStreamReader::EndElement. | 
| 288 |          * | 
| 289 |          * If @p emptynessAllowed is @c false, the element must either have a | 
| 290 |          * sequence constructor or a @c select attribute. If @c true, both may | 
| 291 |          * be absent. | 
| 292 |          * | 
| 293 |          * Returns @c true if the queued expression was supplied through the | 
| 294 |          * @c select attribute otherwise @c false. | 
| 295 |          */ | 
| 296 |         bool queueSelectOrSequenceConstructor(const ReportContext::ErrorCode code, | 
| 297 |                                               const bool emptynessAllowed, | 
| 298 |                                               TokenSource::Queue *const to, | 
| 299 |                                               const QXmlStreamAttributes *const atts = 0, | 
| 300 |                                               const bool queueEmptyOnEmpty = true); | 
| 301 |  | 
| 302 |         /** | 
| 303 |          * If @p initialAdvance is @c true, insideSequenceConstructor() will | 
| 304 |          * advance m_reader, otherwise it won't. Not doing so is useful | 
| 305 |          * when the caller is already inside a sequence constructor. | 
| 306 |          * | 
| 307 |          * Returns @c true if a sequence constructor was found and queued. | 
| 308 |          * Returns @c false if none was found, and the empty sequence was | 
| 309 |          * synthesized. | 
| 310 |          */ | 
| 311 |         bool insideSequenceConstructor(TokenSource::Queue *const to, | 
| 312 |                                        const bool initialAdvance = true, | 
| 313 |                                        const bool queueEmptyOnEmpty = true); | 
| 314 |  | 
| 315 |         bool insideSequenceConstructor(TokenSource::Queue *const to, | 
| 316 |                                        QStack<Token> &queueOnExit, | 
| 317 |                                        const bool initialAdvance = true, | 
| 318 |                                        const bool queueEmptyOnEmpty = true); | 
| 319 |  | 
| 320 |         void insideAttributeSet(); | 
| 321 |         void pushState(const State nextState); | 
| 322 |         void leaveState(); | 
| 323 |  | 
| 324 |         /** | 
| 325 |          * @short Handles @c xml:space and standard attributes. | 
| 326 |          * | 
| 327 |          * If @p isXSLTElement is @c true, the current element is an XSL-T | 
| 328 |          * element, as opposed to a Literal Result Element. | 
| 329 |          * | 
| 330 |          * handleStandardAttributes() must be called before validateElement(), | 
| 331 |          * because the former determines the version in use, and | 
| 332 |          * validateElement() depends on that. | 
| 333 |          * | 
| 334 |          * The core of this function can't be run many times because it pushes | 
| 335 |          * whitespace handling onto m_stripWhitespace. | 
| 336 |          * m_hasHandledStandardAttributes protects helping against this. | 
| 337 |          * | 
| 338 |          * @see validateElement() | 
| 339 |          * @see <a href="http://www.w3.org/TR/xslt20/#standard-attributes">XSL | 
| 340 |          * Transformations (XSLT) Version 2.0, 3.5 Standard Attributes</a> | 
| 341 |          */ | 
| 342 |         void handleStandardAttributes(const bool isXSLTElement); | 
| 343 |  | 
| 344 |         /** | 
| 345 |          * @short Sends the tokens in @p source to @p destination. | 
| 346 |          */ | 
| 347 |         inline void queueOnExit(QStack<Token> &source, | 
| 348 |                                 TokenSource::Queue *const destination); | 
| 349 |  | 
| 350 |         /** | 
| 351 |          * Handles the @c type and @c validation attribute on instructions and | 
| 352 |          * literal result elements. | 
| 353 |          * | 
| 354 |          * @p isLRE should be true if the current element is not in the XSL-T | 
| 355 |          * namespace, that is if it's a Literal Result Element. | 
| 356 |          * | 
| 357 |          * @see <a href="http://www.w3.org/TR/xslt20/#validation">XSL | 
| 358 |          * Transformations (XSLT) Version 2.0, 19.2 Validation</a> | 
| 359 |          */ | 
| 360 |         void handleValidationAttributes(const bool isLRE) const; | 
| 361 |  | 
| 362 |         void unexpectedContent(const ReportContext::ErrorCode code = ReportContext::XTSE0010) const; | 
| 363 |  | 
| 364 |         void checkForParseError() const; | 
| 365 |  | 
| 366 |         inline void startStorageOfCurrent(TokenSource::Queue *const to); | 
| 367 |         inline void endStorageOfCurrent(TokenSource::Queue *const to); | 
| 368 |  | 
| 369 |         /** | 
| 370 |          * Checks that @p attribute has a value in accordance with what | 
| 371 |          * is allowed and supported. | 
| 372 |          */ | 
| 373 |         void handleXSLTVersion(TokenSource::Queue *const to, | 
| 374 |                                QStack<Token> *const queueOnExit, | 
| 375 |                                const bool isXSLTElement, | 
| 376 |                                const QXmlStreamAttributes *atts = 0, | 
| 377 |                                const bool generateCode = true, | 
| 378 |                                const bool setGlobalVersion = false); | 
| 379 |  | 
| 380 |         /** | 
| 381 |          * @short Generates code for reflecting @c xml:base attributes. | 
| 382 |          */ | 
| 383 |         void handleXMLBase(TokenSource::Queue *const to, | 
| 384 |                            QStack<Token> *const queueOnExit, | 
| 385 |                            const bool isInstruction = true, | 
| 386 |                            const QXmlStreamAttributes *atts = 0); | 
| 387 |  | 
| 388 |         /** | 
| 389 |          * Concatenates text nodes, ignores comments and processing | 
| 390 |          * instructions, and raises errors on everything else. | 
| 391 |          * | 
| 392 |          * Hence, similar to QXmlStreamReader::readElementText(), except | 
| 393 |          * for error handling. | 
| 394 |          */ | 
| 395 |         QString readElementText(); | 
| 396 |  | 
| 397 |         /** | 
| 398 |          * Tokenizes and validate xsl:sort statements, if any, until | 
| 399 |          * other content is encountered. The produced tokens are returned | 
| 400 |          * in a list. | 
| 401 |          * | 
| 402 |          * If @p oneSortRequired, at least one @c sort element must appear, | 
| 403 |          * otherwise an error is raised. | 
| 404 |          * | 
| 405 |          * If @p speciallyTreatWhitespace whitespace will be treated as if it | 
| 406 |          * was one of the elements mentioned in step 4 in section 4.2 Stripping | 
| 407 |          * Whitespace from the Stylesheet. | 
| 408 |          */ | 
| 409 |         void queueSorting(const bool oneSortRequired, | 
| 410 |                           TokenSource::Queue *const to, | 
| 411 |                           const bool speciallyTreatWhitespace = false); | 
| 412 |  | 
| 413 |         static ElementDescription<XSLTTokenLookup>::Hash createElementDescriptions(); | 
| 414 |         static QHash<QString, int> createValidationAlternatives(); | 
| 415 |         static QSet<NodeName> createStandardAttributes(); | 
| 416 |  | 
| 417 |         /** | 
| 418 |          * Reads the attribute by name @p attributeName, and returns @c true if | 
| 419 |          * its value is @p isTrue, @c false if it is @p isFalse, and raise an | 
| 420 |          * error otherwise. | 
| 421 |          */ | 
| 422 |         bool readToggleAttribute(const QString &attributeName, | 
| 423 |                                  const QString &isTrue, | 
| 424 |                                  const QString &isFalse, | 
| 425 |                                  const QXmlStreamAttributes *const atts = 0) const; | 
| 426 |  | 
| 427 |         int readAlternativeAttribute(const QHash<QString, int> &alternatives, | 
| 428 |                                      const QXmlStreamAttribute &attr) const; | 
| 429 |  | 
| 430 |         /** | 
| 431 |          * Returns @c true if the current text node can be skipped without | 
| 432 |          * it leading to a validation error, with respect to whitespace. | 
| 433 |          */ | 
| 434 |         inline bool whitespaceToSkip() const; | 
| 435 |  | 
| 436 |         const QUrl                                  m_location; | 
| 437 |         const NamePool::Ptr                         m_namePool; | 
| 438 |         QStack<State>                               m_state; | 
| 439 |         TokenSource::Queue                          m_tokenSource; | 
| 440 |  | 
| 441 |         enum ProcessMode | 
| 442 |         { | 
| 443 |             BackwardsCompatible, | 
| 444 |             ForwardCompatible, | 
| 445 |             NormalProcessing | 
| 446 |         }; | 
| 447 |  | 
| 448 |         /** | 
| 449 |          * Whether we're processing in Forwards-Compatible or | 
| 450 |          * Backwards-Compatible mode. | 
| 451 |          * | 
| 452 |          * This is set by handleStandardAttributes(). | 
| 453 |          * | 
| 454 |          * ParserContext have similar information in | 
| 455 |          * ParserContext::isBackwardsCompat. A big distinction is that both the | 
| 456 |          * tokenizer and the parser buffer tokens and have positions disjoint | 
| 457 |          * to each other. E.g, the state the parser has when reducing into | 
| 458 |          * non-terminals, is different from the tokenizer's. | 
| 459 |          */ | 
| 460 |         QStack<ProcessMode>                         m_processingMode; | 
| 461 |  | 
| 462 |         /** | 
| 463 |          * Returns @c true if the current state in m_reader is in the XSLT | 
| 464 |          * namespace. It is assumed that the current state is an element. | 
| 465 |          */ | 
| 466 |         inline bool isXSLT() const; | 
| 467 |  | 
| 468 |         const QHash<QString, int>                   m_validationAlternatives; | 
| 469 |  | 
| 470 |         ParserContext::Ptr                          m_parseInfo; | 
| 471 |     }; | 
| 472 | } | 
| 473 |  | 
| 474 | QT_END_NAMESPACE | 
| 475 |  | 
| 476 | #endif | 
| 477 |  |