| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. |
| 4 | ** Contact: https://www.qt.io/licensing/ |
| 5 | ** |
| 6 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit. |
| 7 | ** |
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ |
| 9 | ** Commercial License Usage |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | ** accordance with the commercial license agreement provided with the |
| 12 | ** Software or, alternatively, in accordance with the terms contained in |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms |
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
| 15 | ** information use the contact form at https://www.qt.io/contact-us. |
| 16 | ** |
| 17 | ** GNU Lesser General Public License Usage |
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
| 19 | ** General Public License version 3 as published by the Free Software |
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| 21 | ** packaging of this file. Please review the following information to |
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements |
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| 24 | ** |
| 25 | ** GNU General Public License Usage |
| 26 | ** Alternatively, this file may be used under the terms of the GNU |
| 27 | ** General Public License version 2.0 or (at your option) the GNU General |
| 28 | ** Public license version 3 or any later version approved by the KDE Free |
| 29 | ** Qt Foundation. The licenses are as published by the Free Software |
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| 31 | ** included in the packaging of this file. Please review the following |
| 32 | ** information to ensure the GNU General Public License requirements will |
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
| 35 | ** |
| 36 | ** $QT_END_LICENSE$ |
| 37 | ** |
| 38 | ****************************************************************************/ |
| 39 | |
| 40 | // |
| 41 | // W A R N I N G |
| 42 | // ------------- |
| 43 | // |
| 44 | // This file is not part of the Qt API. It exists purely as an |
| 45 | // implementation detail. This header file may change from version to |
| 46 | // version without notice, or even be removed. |
| 47 | // |
| 48 | // We mean it. |
| 49 | |
| 50 | #ifndef Patternist_CompressedWhitespace_H |
| 51 | #define Patternist_CompressedWhitespace_H |
| 52 | |
| 53 | #include <QtGlobal> |
| 54 | |
| 55 | QT_BEGIN_NAMESPACE |
| 56 | |
| 57 | class QChar; |
| 58 | class QString; |
| 59 | class QStringRef; |
| 60 | |
| 61 | namespace QPatternist |
| 62 | { |
| 63 | /** |
| 64 | * @short A compression facility for whitespace nodes. |
| 65 | * |
| 66 | * CompressedWhitespace compresses and decompresses strings that consists of |
| 67 | * whitespace only, and do so with a scheme that is designed to do this |
| 68 | * specialized task in an efficient way. The approach is simple: each |
| 69 | * sequence of equal whitespace in the input gets coded into one byte, |
| 70 | * where the first two bits signals the type, CharIdentifier, and the |
| 71 | * remininding six bits is the count. |
| 72 | * |
| 73 | * For instance, this scheme manages to compress a sequence of spaces |
| 74 | * followed by a new line into 16 bits(one QChar), and QString stores |
| 75 | * strings of one QChar quite efficiently, by avoiding a heap allocation. |
| 76 | * |
| 77 | * There is no way to tell whether a QString is compressed or not. |
| 78 | * |
| 79 | * The compression scheme originates from Saxon, by Michael Kay. |
| 80 | * |
| 81 | * @author Frans Englich <frans.englich@nokia.com> |
| 82 | */ |
| 83 | class Q_AUTOTEST_EXPORT CompressedWhitespace |
| 84 | { |
| 85 | public: |
| 86 | /** |
| 87 | * @short Compresses @p input into a compressed format, returned |
| 88 | * as a QString. |
| 89 | * |
| 90 | * The caller guarantees that input is not empty |
| 91 | * and consists only of whitespace. |
| 92 | * |
| 93 | * The returned format is opaque. There is no way to find out |
| 94 | * whether a QString contains compressed data or not. |
| 95 | * |
| 96 | * @see decompress() |
| 97 | */ |
| 98 | static QString compress(const QStringRef &input); |
| 99 | |
| 100 | /** |
| 101 | * @short Decompresses @p input into a usual QString. |
| 102 | * |
| 103 | * @p input must be a QString as per returned from compress(). |
| 104 | * |
| 105 | * @see compress() |
| 106 | */ |
| 107 | static QString decompress(const QString &input); |
| 108 | |
| 109 | private: |
| 110 | /** |
| 111 | * We use the two upper bits for communicating what space it is. |
| 112 | */ |
| 113 | enum CharIdentifier |
| 114 | { |
| 115 | Space = 0x0, |
| 116 | |
| 117 | /** |
| 118 | * 0xA, \\r |
| 119 | * |
| 120 | * Binary: 10000000 |
| 121 | */ |
| 122 | CR = 0x80, |
| 123 | |
| 124 | /** |
| 125 | * 0xD, \\n |
| 126 | * |
| 127 | * Binary: 01000000 |
| 128 | */ |
| 129 | LF = 0x40, |
| 130 | |
| 131 | /** |
| 132 | * Binary: 11000000 |
| 133 | */ |
| 134 | Tab = 0xC0 |
| 135 | }; |
| 136 | |
| 137 | enum Constants |
| 138 | { |
| 139 | /* We can at maximum store this many consecutive characters |
| 140 | * of one type. We use 6 bits for the count. */ |
| 141 | MaxCharCount = (1 << 6) - 1, |
| 142 | |
| 143 | /** |
| 144 | * Binary: 11111111 |
| 145 | */ |
| 146 | Lower8Bits = (1 << 8) - 1, |
| 147 | |
| 148 | /** |
| 149 | * Binary: 111111 |
| 150 | */ |
| 151 | Lower6Bits = (1 << 6) - 1, |
| 152 | |
| 153 | /* |
| 154 | * Binary: 11000000 |
| 155 | */ |
| 156 | UpperTwoBits = 3 << 6 |
| 157 | }; |
| 158 | |
| 159 | static inline CharIdentifier toIdentifier(const QChar ch); |
| 160 | |
| 161 | static inline quint8 toCompressedChar(const QChar ch, const int len); |
| 162 | static inline QChar toChar(const CharIdentifier id); |
| 163 | |
| 164 | /** |
| 165 | * @short Returns @c true if @p number is an even number, otherwise |
| 166 | * @c false. |
| 167 | */ |
| 168 | static inline bool isEven(const int number); |
| 169 | |
| 170 | /** |
| 171 | * @short This class can only be used via its static members. |
| 172 | */ |
| 173 | inline CompressedWhitespace(); |
| 174 | Q_DISABLE_COPY(CompressedWhitespace) |
| 175 | }; |
| 176 | } |
| 177 | |
| 178 | QT_END_NAMESPACE |
| 179 | |
| 180 | #endif |
| 181 | |