| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. |
| 4 | ** Contact: https://www.qt.io/licensing/ |
| 5 | ** |
| 6 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit. |
| 7 | ** |
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ |
| 9 | ** Commercial License Usage |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | ** accordance with the commercial license agreement provided with the |
| 12 | ** Software or, alternatively, in accordance with the terms contained in |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms |
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
| 15 | ** information use the contact form at https://www.qt.io/contact-us. |
| 16 | ** |
| 17 | ** GNU Lesser General Public License Usage |
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
| 19 | ** General Public License version 3 as published by the Free Software |
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| 21 | ** packaging of this file. Please review the following information to |
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements |
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| 24 | ** |
| 25 | ** GNU General Public License Usage |
| 26 | ** Alternatively, this file may be used under the terms of the GNU |
| 27 | ** General Public License version 2.0 or (at your option) the GNU General |
| 28 | ** Public license version 3 or any later version approved by the KDE Free |
| 29 | ** Qt Foundation. The licenses are as published by the Free Software |
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| 31 | ** included in the packaging of this file. Please review the following |
| 32 | ** information to ensure the GNU General Public License requirements will |
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
| 35 | ** |
| 36 | ** $QT_END_LICENSE$ |
| 37 | ** |
| 38 | ****************************************************************************/ |
| 39 | |
| 40 | // |
| 41 | // W A R N I N G |
| 42 | // ------------- |
| 43 | // |
| 44 | // This file is not part of the Qt API. It exists purely as an |
| 45 | // implementation detail. This header file may change from version to |
| 46 | // version without notice, or even be removed. |
| 47 | // |
| 48 | // We mean it. |
| 49 | |
| 50 | #ifndef Patternist_XSLTTokenizer_H |
| 51 | #define Patternist_XSLTTokenizer_H |
| 52 | |
| 53 | #include <QQueue> |
| 54 | #include <QStack> |
| 55 | #include <QUrl> |
| 56 | |
| 57 | #include <private/qmaintainingreader_p.h> |
| 58 | #include <private/qreportcontext_p.h> |
| 59 | #include <private/qtokenizer_p.h> |
| 60 | #include <private/qxslttokenlookup_p.h> |
| 61 | |
| 62 | QT_BEGIN_NAMESPACE |
| 63 | |
| 64 | namespace QPatternist |
| 65 | { |
| 66 | /** |
| 67 | * @short A TokenSource which contains one Tokenizer::Token. |
| 68 | * |
| 69 | * One possible way to optimize this is to let SingleTokenContainer |
| 70 | * actually contain a list of tokens, such that XSLTTokenizer::queueToken() |
| 71 | * could append to that, instead of instansiating a SingleTokenContainer |
| 72 | * all the time. |
| 73 | * |
| 74 | * @author Frans Englich <frans.englich@nokia.com> |
| 75 | */ |
| 76 | class SingleTokenContainer : public TokenSource |
| 77 | { |
| 78 | public: |
| 79 | inline SingleTokenContainer(const Tokenizer::Token &token, |
| 80 | const XPATHLTYPE &location); |
| 81 | |
| 82 | virtual Tokenizer::Token nextToken(XPATHLTYPE *const sourceLocator); |
| 83 | private: |
| 84 | const Tokenizer::Token m_token; |
| 85 | const XPATHLTYPE m_location; |
| 86 | bool m_hasDelivered; |
| 87 | }; |
| 88 | |
| 89 | SingleTokenContainer::SingleTokenContainer(const Tokenizer::Token &token, |
| 90 | const XPATHLTYPE &location) |
| 91 | : m_token(token) |
| 92 | , m_location(location) |
| 93 | , m_hasDelivered(false) |
| 94 | { |
| 95 | } |
| 96 | |
| 97 | /** |
| 98 | * @short Tokenizes XSL-T 2.0 documents. |
| 99 | * |
| 100 | * XSLTTokenizer takes in its constructor a pointer to a QIODevice which is |
| 101 | * supposed to contain an XSL-T document. XSLTTokenizer then rewrites that |
| 102 | * document into XQuery tokens delivered via nextToken(), which the regular |
| 103 | * XQuery parser then reads. Hence, the XSL-T language is rewritten into |
| 104 | * XQuery code, slightly extended to handle the featuress specific to |
| 105 | * XSL-T. |
| 106 | * |
| 107 | * @author Frans Englich <frans.englich@nokia.com> |
| 108 | */ |
| 109 | class XSLTTokenizer : public Tokenizer |
| 110 | , private MaintainingReader<XSLTTokenLookup> |
| 111 | { |
| 112 | public: |
| 113 | /** |
| 114 | * XSLTTokenizer do not own @p queryDevice. |
| 115 | */ |
| 116 | XSLTTokenizer(QIODevice *const queryDevice, |
| 117 | const QUrl &location, |
| 118 | const ReportContext::Ptr &context, |
| 119 | const NamePool::Ptr &np); |
| 120 | |
| 121 | virtual Token nextToken(XPATHLTYPE *const sourceLocator); |
| 122 | |
| 123 | /** |
| 124 | * For XSLT we don't need this mechanism, so we do nothing. |
| 125 | */ |
| 126 | virtual int commenceScanOnly(); |
| 127 | |
| 128 | /** |
| 129 | * For XSLT we don't need this mechanism, so we do nothing. |
| 130 | */ |
| 131 | virtual void resumeTokenizationFrom(const int position); |
| 132 | |
| 133 | virtual void setParserContext(const ParserContext::Ptr &parseInfo); |
| 134 | |
| 135 | virtual QUrl documentURI() const |
| 136 | { |
| 137 | return queryURI(); |
| 138 | } |
| 139 | |
| 140 | protected: |
| 141 | virtual bool isAnyAttributeAllowed() const; |
| 142 | |
| 143 | private: |
| 144 | inline void validateElement() const; |
| 145 | |
| 146 | XPATHLTYPE currentSourceLocator() const; |
| 147 | |
| 148 | enum State |
| 149 | { |
| 150 | OutsideDocumentElement, |
| 151 | InsideStylesheetModule, |
| 152 | InsideSequenceConstructor |
| 153 | }; |
| 154 | |
| 155 | enum VariableType |
| 156 | { |
| 157 | FunctionParameter, |
| 158 | GlobalParameter, |
| 159 | TemplateParameter, |
| 160 | VariableDeclaration, |
| 161 | VariableInstruction, |
| 162 | WithParamVariable |
| 163 | }; |
| 164 | |
| 165 | void queueNamespaceDeclarations(TokenSource::Queue *const ts, |
| 166 | QStack<Token> *const target, |
| 167 | const bool isDeclaration = false); |
| 168 | |
| 169 | inline void queueToken(const Token &token, |
| 170 | TokenSource::Queue *const ts); |
| 171 | void queueEmptySequence(TokenSource::Queue *const to); |
| 172 | void queueSequenceType(const QString &expr); |
| 173 | /** |
| 174 | * If @p emptynessAllowed is @c true, the @c select attribute may |
| 175 | * be empty while there also is no sequence constructor. |
| 176 | */ |
| 177 | void queueSimpleContentConstructor(const ReportContext::ErrorCode code, |
| 178 | const bool emptynessAllowed, |
| 179 | TokenSource::Queue *const to, |
| 180 | const bool selectOnlyFirst = false); |
| 181 | /** |
| 182 | * Tokenizes and queues @p expr as if it was an attribute value |
| 183 | * template. |
| 184 | */ |
| 185 | void queueAVT(const QString &expr, |
| 186 | TokenSource::Queue *const to); |
| 187 | |
| 188 | void hasWrittenExpression(bool &beacon); |
| 189 | void commencingExpression(bool &hasWrittenExpression, |
| 190 | TokenSource::Queue *const to); |
| 191 | |
| 192 | void outsideDocumentElement(); |
| 193 | void insideChoose(TokenSource::Queue *const to); |
| 194 | void insideFunction(); |
| 195 | |
| 196 | bool attributeYesNo(const QString &localName) const; |
| 197 | |
| 198 | /** |
| 199 | * Scans/skips @c xsl:fallback elements only. This is the case of the |
| 200 | * children of @c xsl:sequence, for instance. |
| 201 | */ |
| 202 | void parseFallbacksOnly(); |
| 203 | |
| 204 | /** |
| 205 | * Returns true if the current element is either @c stylesheet |
| 206 | * or the synonym @c transform. |
| 207 | * |
| 208 | * This function assumes that m_reader is positioned at an element |
| 209 | * and that the namespace is XSL-T. |
| 210 | */ |
| 211 | bool isStylesheetElement() const; |
| 212 | |
| 213 | /** |
| 214 | * Returns true if the current element name is @p name. |
| 215 | * |
| 216 | * It is assumed that the namespace is XSL-T and that the current |
| 217 | * state in m_reader is either QXmlStreamReader::StartElement or |
| 218 | * QXmlStreamReader::EndElement. |
| 219 | */ |
| 220 | bool isElement(const NodeName &name) const; |
| 221 | |
| 222 | /** |
| 223 | * Queues a text constructor for @p chars, if @p chars is |
| 224 | * not empty. |
| 225 | */ |
| 226 | void queueTextConstructor(QString &chars, |
| 227 | bool &hasWrittenExpression, |
| 228 | TokenSource::Queue *const to); |
| 229 | |
| 230 | /** |
| 231 | * |
| 232 | * @see <a href="http://www.w3.org/TR/xslt20/#stylesheet-structure">XSL |
| 233 | * Transformations (XSLT) Version 2, 3.6 Stylesheet Element</a> |
| 234 | */ |
| 235 | void insideStylesheetModule(); |
| 236 | void insideTemplate(); |
| 237 | |
| 238 | /** |
| 239 | * Takes @p expr for an XPath expression, and pushes the necessary |
| 240 | * things for having it delivered as a stream of token, appropriate |
| 241 | * for Effective Boolean Value parsing. |
| 242 | */ |
| 243 | void queueExpression(const QString &expr, |
| 244 | TokenSource::Queue *const to, |
| 245 | const bool wrapWithParantheses = true); |
| 246 | |
| 247 | void skipBodyOfParam(const ReportContext::ErrorCode code); |
| 248 | |
| 249 | void queueParams(const NodeName parentName, |
| 250 | TokenSource::Queue *const to); |
| 251 | |
| 252 | /** |
| 253 | * Used for @c xsl:apply-templates and @c xsl:call-templates. |
| 254 | */ |
| 255 | void queueWithParams(const NodeName parentName, |
| 256 | TokenSource::Queue *const to, |
| 257 | const bool initialAdvance = true); |
| 258 | |
| 259 | /** |
| 260 | * Queues an @c xsl:variable declaration. If @p isInstruction is @c |
| 261 | * true, it is assumed to be a an instruction, otherwise a top-level |
| 262 | * declaration element. |
| 263 | */ |
| 264 | void queueVariableDeclaration(const VariableType variableType, |
| 265 | TokenSource::Queue *const to); |
| 266 | |
| 267 | /** |
| 268 | * Skips the current sub-tree. |
| 269 | * |
| 270 | * If text nodes that aren't strippable whitespace, or elements are |
| 271 | * encountered, @c true is returned, otherwise @c false. |
| 272 | * |
| 273 | * If @p exitOnContent is @c true, this function exits immediately |
| 274 | * if content is encountered for which it would return @c false. |
| 275 | */ |
| 276 | bool skipSubTree(const bool exitOnContent = false); |
| 277 | |
| 278 | /** |
| 279 | * Queues the necessary tokens for the expression that is either |
| 280 | * supplied using a @c select attribute or a sequence constructor, |
| 281 | * while doing the necessary error handling for ensuring they are |
| 282 | * mutually exclusive. |
| 283 | * |
| 284 | * It is assumed that the current state of m_reader is |
| 285 | * QXmlStreamReader::StartElement, or that the attributes for the |
| 286 | * element is supplied through @p atts. This function advances m_reader |
| 287 | * up until the corresponding QXmlStreamReader::EndElement. |
| 288 | * |
| 289 | * If @p emptynessAllowed is @c false, the element must either have a |
| 290 | * sequence constructor or a @c select attribute. If @c true, both may |
| 291 | * be absent. |
| 292 | * |
| 293 | * Returns @c true if the queued expression was supplied through the |
| 294 | * @c select attribute otherwise @c false. |
| 295 | */ |
| 296 | bool queueSelectOrSequenceConstructor(const ReportContext::ErrorCode code, |
| 297 | const bool emptynessAllowed, |
| 298 | TokenSource::Queue *const to, |
| 299 | const QXmlStreamAttributes *const atts = 0, |
| 300 | const bool queueEmptyOnEmpty = true); |
| 301 | |
| 302 | /** |
| 303 | * If @p initialAdvance is @c true, insideSequenceConstructor() will |
| 304 | * advance m_reader, otherwise it won't. Not doing so is useful |
| 305 | * when the caller is already inside a sequence constructor. |
| 306 | * |
| 307 | * Returns @c true if a sequence constructor was found and queued. |
| 308 | * Returns @c false if none was found, and the empty sequence was |
| 309 | * synthesized. |
| 310 | */ |
| 311 | bool insideSequenceConstructor(TokenSource::Queue *const to, |
| 312 | const bool initialAdvance = true, |
| 313 | const bool queueEmptyOnEmpty = true); |
| 314 | |
| 315 | bool insideSequenceConstructor(TokenSource::Queue *const to, |
| 316 | QStack<Token> &queueOnExit, |
| 317 | const bool initialAdvance = true, |
| 318 | const bool queueEmptyOnEmpty = true); |
| 319 | |
| 320 | void insideAttributeSet(); |
| 321 | void pushState(const State nextState); |
| 322 | void leaveState(); |
| 323 | |
| 324 | /** |
| 325 | * @short Handles @c xml:space and standard attributes. |
| 326 | * |
| 327 | * If @p isXSLTElement is @c true, the current element is an XSL-T |
| 328 | * element, as opposed to a Literal Result Element. |
| 329 | * |
| 330 | * handleStandardAttributes() must be called before validateElement(), |
| 331 | * because the former determines the version in use, and |
| 332 | * validateElement() depends on that. |
| 333 | * |
| 334 | * The core of this function can't be run many times because it pushes |
| 335 | * whitespace handling onto m_stripWhitespace. |
| 336 | * m_hasHandledStandardAttributes protects helping against this. |
| 337 | * |
| 338 | * @see validateElement() |
| 339 | * @see <a href="http://www.w3.org/TR/xslt20/#standard-attributes">XSL |
| 340 | * Transformations (XSLT) Version 2.0, 3.5 Standard Attributes</a> |
| 341 | */ |
| 342 | void handleStandardAttributes(const bool isXSLTElement); |
| 343 | |
| 344 | /** |
| 345 | * @short Sends the tokens in @p source to @p destination. |
| 346 | */ |
| 347 | inline void queueOnExit(QStack<Token> &source, |
| 348 | TokenSource::Queue *const destination); |
| 349 | |
| 350 | /** |
| 351 | * Handles the @c type and @c validation attribute on instructions and |
| 352 | * literal result elements. |
| 353 | * |
| 354 | * @p isLRE should be true if the current element is not in the XSL-T |
| 355 | * namespace, that is if it's a Literal Result Element. |
| 356 | * |
| 357 | * @see <a href="http://www.w3.org/TR/xslt20/#validation">XSL |
| 358 | * Transformations (XSLT) Version 2.0, 19.2 Validation</a> |
| 359 | */ |
| 360 | void handleValidationAttributes(const bool isLRE) const; |
| 361 | |
| 362 | void unexpectedContent(const ReportContext::ErrorCode code = ReportContext::XTSE0010) const; |
| 363 | |
| 364 | void checkForParseError() const; |
| 365 | |
| 366 | inline void startStorageOfCurrent(TokenSource::Queue *const to); |
| 367 | inline void endStorageOfCurrent(TokenSource::Queue *const to); |
| 368 | |
| 369 | /** |
| 370 | * Checks that @p attribute has a value in accordance with what |
| 371 | * is allowed and supported. |
| 372 | */ |
| 373 | void handleXSLTVersion(TokenSource::Queue *const to, |
| 374 | QStack<Token> *const queueOnExit, |
| 375 | const bool isXSLTElement, |
| 376 | const QXmlStreamAttributes *atts = 0, |
| 377 | const bool generateCode = true, |
| 378 | const bool setGlobalVersion = false); |
| 379 | |
| 380 | /** |
| 381 | * @short Generates code for reflecting @c xml:base attributes. |
| 382 | */ |
| 383 | void handleXMLBase(TokenSource::Queue *const to, |
| 384 | QStack<Token> *const queueOnExit, |
| 385 | const bool isInstruction = true, |
| 386 | const QXmlStreamAttributes *atts = 0); |
| 387 | |
| 388 | /** |
| 389 | * Concatenates text nodes, ignores comments and processing |
| 390 | * instructions, and raises errors on everything else. |
| 391 | * |
| 392 | * Hence, similar to QXmlStreamReader::readElementText(), except |
| 393 | * for error handling. |
| 394 | */ |
| 395 | QString readElementText(); |
| 396 | |
| 397 | /** |
| 398 | * Tokenizes and validate xsl:sort statements, if any, until |
| 399 | * other content is encountered. The produced tokens are returned |
| 400 | * in a list. |
| 401 | * |
| 402 | * If @p oneSortRequired, at least one @c sort element must appear, |
| 403 | * otherwise an error is raised. |
| 404 | * |
| 405 | * If @p speciallyTreatWhitespace whitespace will be treated as if it |
| 406 | * was one of the elements mentioned in step 4 in section 4.2 Stripping |
| 407 | * Whitespace from the Stylesheet. |
| 408 | */ |
| 409 | void queueSorting(const bool oneSortRequired, |
| 410 | TokenSource::Queue *const to, |
| 411 | const bool speciallyTreatWhitespace = false); |
| 412 | |
| 413 | static ElementDescription<XSLTTokenLookup>::Hash createElementDescriptions(); |
| 414 | static QHash<QString, int> createValidationAlternatives(); |
| 415 | static QSet<NodeName> createStandardAttributes(); |
| 416 | |
| 417 | /** |
| 418 | * Reads the attribute by name @p attributeName, and returns @c true if |
| 419 | * its value is @p isTrue, @c false if it is @p isFalse, and raise an |
| 420 | * error otherwise. |
| 421 | */ |
| 422 | bool readToggleAttribute(const QString &attributeName, |
| 423 | const QString &isTrue, |
| 424 | const QString &isFalse, |
| 425 | const QXmlStreamAttributes *const atts = 0) const; |
| 426 | |
| 427 | int readAlternativeAttribute(const QHash<QString, int> &alternatives, |
| 428 | const QXmlStreamAttribute &attr) const; |
| 429 | |
| 430 | /** |
| 431 | * Returns @c true if the current text node can be skipped without |
| 432 | * it leading to a validation error, with respect to whitespace. |
| 433 | */ |
| 434 | inline bool whitespaceToSkip() const; |
| 435 | |
| 436 | const QUrl m_location; |
| 437 | const NamePool::Ptr m_namePool; |
| 438 | QStack<State> m_state; |
| 439 | TokenSource::Queue m_tokenSource; |
| 440 | |
| 441 | enum ProcessMode |
| 442 | { |
| 443 | BackwardsCompatible, |
| 444 | ForwardCompatible, |
| 445 | NormalProcessing |
| 446 | }; |
| 447 | |
| 448 | /** |
| 449 | * Whether we're processing in Forwards-Compatible or |
| 450 | * Backwards-Compatible mode. |
| 451 | * |
| 452 | * This is set by handleStandardAttributes(). |
| 453 | * |
| 454 | * ParserContext have similar information in |
| 455 | * ParserContext::isBackwardsCompat. A big distinction is that both the |
| 456 | * tokenizer and the parser buffer tokens and have positions disjoint |
| 457 | * to each other. E.g, the state the parser has when reducing into |
| 458 | * non-terminals, is different from the tokenizer's. |
| 459 | */ |
| 460 | QStack<ProcessMode> m_processingMode; |
| 461 | |
| 462 | /** |
| 463 | * Returns @c true if the current state in m_reader is in the XSLT |
| 464 | * namespace. It is assumed that the current state is an element. |
| 465 | */ |
| 466 | inline bool isXSLT() const; |
| 467 | |
| 468 | const QHash<QString, int> m_validationAlternatives; |
| 469 | |
| 470 | ParserContext::Ptr m_parseInfo; |
| 471 | }; |
| 472 | } |
| 473 | |
| 474 | QT_END_NAMESPACE |
| 475 | |
| 476 | #endif |
| 477 | |