1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | // |
41 | // W A R N I N G |
42 | // ------------- |
43 | // |
44 | // This file is not part of the Qt API. It exists purely as an |
45 | // implementation detail. This header file may change from version to |
46 | // version without notice, or even be removed. |
47 | // |
48 | // We mean it. |
49 | |
50 | #ifndef Patternist_CompressedWhitespace_H |
51 | #define Patternist_CompressedWhitespace_H |
52 | |
53 | #include <QtGlobal> |
54 | |
55 | QT_BEGIN_NAMESPACE |
56 | |
57 | class QChar; |
58 | class QString; |
59 | class QStringRef; |
60 | |
61 | namespace QPatternist |
62 | { |
63 | /** |
64 | * @short A compression facility for whitespace nodes. |
65 | * |
66 | * CompressedWhitespace compresses and decompresses strings that consists of |
67 | * whitespace only, and do so with a scheme that is designed to do this |
68 | * specialized task in an efficient way. The approach is simple: each |
69 | * sequence of equal whitespace in the input gets coded into one byte, |
70 | * where the first two bits signals the type, CharIdentifier, and the |
71 | * remininding six bits is the count. |
72 | * |
73 | * For instance, this scheme manages to compress a sequence of spaces |
74 | * followed by a new line into 16 bits(one QChar), and QString stores |
75 | * strings of one QChar quite efficiently, by avoiding a heap allocation. |
76 | * |
77 | * There is no way to tell whether a QString is compressed or not. |
78 | * |
79 | * The compression scheme originates from Saxon, by Michael Kay. |
80 | * |
81 | * @author Frans Englich <frans.englich@nokia.com> |
82 | */ |
83 | class Q_AUTOTEST_EXPORT CompressedWhitespace |
84 | { |
85 | public: |
86 | /** |
87 | * @short Compresses @p input into a compressed format, returned |
88 | * as a QString. |
89 | * |
90 | * The caller guarantees that input is not empty |
91 | * and consists only of whitespace. |
92 | * |
93 | * The returned format is opaque. There is no way to find out |
94 | * whether a QString contains compressed data or not. |
95 | * |
96 | * @see decompress() |
97 | */ |
98 | static QString compress(const QStringRef &input); |
99 | |
100 | /** |
101 | * @short Decompresses @p input into a usual QString. |
102 | * |
103 | * @p input must be a QString as per returned from compress(). |
104 | * |
105 | * @see compress() |
106 | */ |
107 | static QString decompress(const QString &input); |
108 | |
109 | private: |
110 | /** |
111 | * We use the two upper bits for communicating what space it is. |
112 | */ |
113 | enum CharIdentifier |
114 | { |
115 | Space = 0x0, |
116 | |
117 | /** |
118 | * 0xA, \\r |
119 | * |
120 | * Binary: 10000000 |
121 | */ |
122 | CR = 0x80, |
123 | |
124 | /** |
125 | * 0xD, \\n |
126 | * |
127 | * Binary: 01000000 |
128 | */ |
129 | LF = 0x40, |
130 | |
131 | /** |
132 | * Binary: 11000000 |
133 | */ |
134 | Tab = 0xC0 |
135 | }; |
136 | |
137 | enum Constants |
138 | { |
139 | /* We can at maximum store this many consecutive characters |
140 | * of one type. We use 6 bits for the count. */ |
141 | MaxCharCount = (1 << 6) - 1, |
142 | |
143 | /** |
144 | * Binary: 11111111 |
145 | */ |
146 | Lower8Bits = (1 << 8) - 1, |
147 | |
148 | /** |
149 | * Binary: 111111 |
150 | */ |
151 | Lower6Bits = (1 << 6) - 1, |
152 | |
153 | /* |
154 | * Binary: 11000000 |
155 | */ |
156 | UpperTwoBits = 3 << 6 |
157 | }; |
158 | |
159 | static inline CharIdentifier toIdentifier(const QChar ch); |
160 | |
161 | static inline quint8 toCompressedChar(const QChar ch, const int len); |
162 | static inline QChar toChar(const CharIdentifier id); |
163 | |
164 | /** |
165 | * @short Returns @c true if @p number is an even number, otherwise |
166 | * @c false. |
167 | */ |
168 | static inline bool isEven(const int number); |
169 | |
170 | /** |
171 | * @short This class can only be used via its static members. |
172 | */ |
173 | inline CompressedWhitespace(); |
174 | Q_DISABLE_COPY(CompressedWhitespace) |
175 | }; |
176 | } |
177 | |
178 | QT_END_NAMESPACE |
179 | |
180 | #endif |
181 | |