| 1 | /**************************************************************************** | 
| 2 | ** | 
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. | 
| 4 | ** Contact: https://www.qt.io/licensing/ | 
| 5 | ** | 
| 6 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit. | 
| 7 | ** | 
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ | 
| 9 | ** Commercial License Usage | 
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in | 
| 11 | ** accordance with the commercial license agreement provided with the | 
| 12 | ** Software or, alternatively, in accordance with the terms contained in | 
| 13 | ** a written agreement between you and The Qt Company. For licensing terms | 
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further | 
| 15 | ** information use the contact form at https://www.qt.io/contact-us. | 
| 16 | ** | 
| 17 | ** GNU Lesser General Public License Usage | 
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser | 
| 19 | ** General Public License version 3 as published by the Free Software | 
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the | 
| 21 | ** packaging of this file. Please review the following information to | 
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements | 
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. | 
| 24 | ** | 
| 25 | ** GNU General Public License Usage | 
| 26 | ** Alternatively, this file may be used under the terms of the GNU | 
| 27 | ** General Public License version 2.0 or (at your option) the GNU General | 
| 28 | ** Public license version 3 or any later version approved by the KDE Free | 
| 29 | ** Qt Foundation. The licenses are as published by the Free Software | 
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 | 
| 31 | ** included in the packaging of this file. Please review the following | 
| 32 | ** information to ensure the GNU General Public License requirements will | 
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and | 
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. | 
| 35 | ** | 
| 36 | ** $QT_END_LICENSE$ | 
| 37 | ** | 
| 38 | ****************************************************************************/ | 
| 39 |  | 
| 40 | // | 
| 41 | //  W A R N I N G | 
| 42 | //  ------------- | 
| 43 | // | 
| 44 | // This file is not part of the Qt API.  It exists purely as an | 
| 45 | // implementation detail.  This header file may change from version to | 
| 46 | // version without notice, or even be removed. | 
| 47 | // | 
| 48 | // We mean it. | 
| 49 |  | 
| 50 | #ifndef Patternist_CompressedWhitespace_H | 
| 51 | #define Patternist_CompressedWhitespace_H | 
| 52 |  | 
| 53 | #include <QtGlobal> | 
| 54 |  | 
| 55 | QT_BEGIN_NAMESPACE | 
| 56 |  | 
| 57 | class QChar; | 
| 58 | class QString; | 
| 59 | class QStringRef; | 
| 60 |  | 
| 61 | namespace QPatternist | 
| 62 | { | 
| 63 |     /** | 
| 64 |      * @short A compression facility for whitespace nodes. | 
| 65 |      * | 
| 66 |      * CompressedWhitespace compresses and decompresses strings that consists of | 
| 67 |      * whitespace only, and do so with a scheme that is designed to do this | 
| 68 |      * specialized task in an efficient way. The approach is simple: each | 
| 69 |      * sequence of equal whitespace in the input gets coded into one byte, | 
| 70 |      * where the first two bits signals the type, CharIdentifier, and the | 
| 71 |      * remininding six bits is the count. | 
| 72 |      * | 
| 73 |      * For instance, this scheme manages to compress a sequence of spaces | 
| 74 |      * followed by a new line into 16 bits(one QChar), and QString stores | 
| 75 |      * strings of one QChar quite efficiently, by avoiding a heap allocation. | 
| 76 |      * | 
| 77 |      * There is no way to tell whether a QString is compressed or not. | 
| 78 |      * | 
| 79 |      * The compression scheme originates from Saxon, by Michael Kay. | 
| 80 |      * | 
| 81 |      * @author Frans Englich <frans.englich@nokia.com> | 
| 82 |      */ | 
| 83 |     class Q_AUTOTEST_EXPORT CompressedWhitespace | 
| 84 |     { | 
| 85 |         public: | 
| 86 |             /** | 
| 87 |              * @short Compresses @p input into a compressed format, returned | 
| 88 |              * as a QString. | 
| 89 |              * | 
| 90 |              * The caller guarantees that input is not empty | 
| 91 |              * and consists only of whitespace. | 
| 92 |              * | 
| 93 |              * The returned format is opaque. There is no way to find out | 
| 94 |              * whether a QString contains compressed data or not. | 
| 95 |              * | 
| 96 |              * @see decompress() | 
| 97 |              */ | 
| 98 |             static QString compress(const QStringRef &input); | 
| 99 |  | 
| 100 |             /** | 
| 101 |              * @short Decompresses @p input into a usual QString. | 
| 102 |              * | 
| 103 |              * @p input must be a QString as per returned from compress(). | 
| 104 |              * | 
| 105 |              * @see compress() | 
| 106 |              */ | 
| 107 |             static QString decompress(const QString &input); | 
| 108 |  | 
| 109 |         private: | 
| 110 |             /** | 
| 111 |              * We use the two upper bits for communicating what space it is. | 
| 112 |              */ | 
| 113 |             enum CharIdentifier | 
| 114 |             { | 
| 115 |                 Space   = 0x0, | 
| 116 |  | 
| 117 |                 /** | 
| 118 |                  * 0xA, \\r | 
| 119 |                  * | 
| 120 |                  * Binary: 10000000 | 
| 121 |                  */ | 
| 122 |                 CR      = 0x80, | 
| 123 |  | 
| 124 |                 /** | 
| 125 |                  * 0xD, \\n | 
| 126 |                  * | 
| 127 |                  * Binary: 01000000 | 
| 128 |                  */ | 
| 129 |                 LF      = 0x40, | 
| 130 |  | 
| 131 |                 /** | 
| 132 |                  * Binary: 11000000 | 
| 133 |                  */ | 
| 134 |                 Tab     = 0xC0 | 
| 135 |             }; | 
| 136 |  | 
| 137 |             enum Constants | 
| 138 |             { | 
| 139 |                 /* We can at maximum store this many consecutive characters | 
| 140 |                  * of one type. We use 6 bits for the count. */ | 
| 141 |                 MaxCharCount = (1 << 6) - 1, | 
| 142 |  | 
| 143 |                 /** | 
| 144 |                  * Binary: 11111111 | 
| 145 |                  */ | 
| 146 |                 Lower8Bits = (1 << 8) - 1, | 
| 147 |  | 
| 148 |                 /** | 
| 149 |                  * Binary: 111111 | 
| 150 |                  */ | 
| 151 |                 Lower6Bits = (1 << 6) - 1, | 
| 152 |  | 
| 153 |                 /* | 
| 154 |                  * Binary: 11000000 | 
| 155 |                  */ | 
| 156 |                 UpperTwoBits = 3 << 6 | 
| 157 |             }; | 
| 158 |  | 
| 159 |             static inline CharIdentifier toIdentifier(const QChar ch); | 
| 160 |  | 
| 161 |             static inline quint8 toCompressedChar(const QChar ch, const int len); | 
| 162 |             static inline QChar toChar(const CharIdentifier id); | 
| 163 |  | 
| 164 |             /** | 
| 165 |              * @short Returns @c true if @p number is an even number, otherwise | 
| 166 |              * @c false. | 
| 167 |              */ | 
| 168 |             static inline bool isEven(const int number); | 
| 169 |  | 
| 170 |             /** | 
| 171 |              * @short This class can only be used via its static members. | 
| 172 |              */ | 
| 173 |             inline CompressedWhitespace(); | 
| 174 |             Q_DISABLE_COPY(CompressedWhitespace) | 
| 175 |     }; | 
| 176 | } | 
| 177 |  | 
| 178 | QT_END_NAMESPACE | 
| 179 |  | 
| 180 | #endif | 
| 181 |  |