| 1 | /**************************************************************************** | 
| 2 | ** | 
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. | 
| 4 | ** Contact: https://www.qt.io/licensing/ | 
| 5 | ** | 
| 6 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit. | 
| 7 | ** | 
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ | 
| 9 | ** Commercial License Usage | 
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in | 
| 11 | ** accordance with the commercial license agreement provided with the | 
| 12 | ** Software or, alternatively, in accordance with the terms contained in | 
| 13 | ** a written agreement between you and The Qt Company. For licensing terms | 
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further | 
| 15 | ** information use the contact form at https://www.qt.io/contact-us. | 
| 16 | ** | 
| 17 | ** GNU Lesser General Public License Usage | 
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser | 
| 19 | ** General Public License version 3 as published by the Free Software | 
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the | 
| 21 | ** packaging of this file. Please review the following information to | 
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements | 
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. | 
| 24 | ** | 
| 25 | ** GNU General Public License Usage | 
| 26 | ** Alternatively, this file may be used under the terms of the GNU | 
| 27 | ** General Public License version 2.0 or (at your option) the GNU General | 
| 28 | ** Public license version 3 or any later version approved by the KDE Free | 
| 29 | ** Qt Foundation. The licenses are as published by the Free Software | 
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 | 
| 31 | ** included in the packaging of this file. Please review the following | 
| 32 | ** information to ensure the GNU General Public License requirements will | 
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and | 
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. | 
| 35 | ** | 
| 36 | ** $QT_END_LICENSE$ | 
| 37 | ** | 
| 38 | ****************************************************************************/ | 
| 39 |  | 
| 40 | #include <QString> | 
| 41 |  | 
| 42 | #include "qcompressedwhitespace_p.h" | 
| 43 |  | 
| 44 | QT_BEGIN_NAMESPACE | 
| 45 |  | 
| 46 | using namespace QPatternist; | 
| 47 |  | 
| 48 | CompressedWhitespace::CharIdentifier CompressedWhitespace::toIdentifier(const QChar ch) | 
| 49 | { | 
| 50 |     switch(ch.unicode()) | 
| 51 |     { | 
| 52 |         case ' ': | 
| 53 |             return Space; | 
| 54 |         case '\n': | 
| 55 |             return LF; | 
| 56 |         case '\r': | 
| 57 |             return CR; | 
| 58 |         case '\t': | 
| 59 |             return Tab; | 
| 60 |         default: | 
| 61 |         { | 
| 62 |             Q_ASSERT_X(false, Q_FUNC_INFO, | 
| 63 |                        "The caller must guarantee only whitespace is passed." ); | 
| 64 |             return Tab; | 
| 65 |         } | 
| 66 |     } | 
| 67 | } | 
| 68 |  | 
| 69 | bool CompressedWhitespace::isEven(const int number) | 
| 70 | { | 
| 71 |     Q_ASSERT(number >= 0); | 
| 72 |     return number % 2 == 0; | 
| 73 | } | 
| 74 |  | 
| 75 | quint8 CompressedWhitespace::toCompressedChar(const QChar ch, const int len) | 
| 76 | { | 
| 77 |     Q_ASSERT(len > 0); | 
| 78 |     Q_ASSERT(len <= MaxCharCount); | 
| 79 |  | 
| 80 |     return len + toIdentifier(ch); | 
| 81 | } | 
| 82 |  | 
| 83 | QChar CompressedWhitespace::toChar(const CharIdentifier id) | 
| 84 | { | 
| 85 |     switch(id) | 
| 86 |     { | 
| 87 |         case Space: return QLatin1Char(' '); | 
| 88 |         case CR:    return QLatin1Char('\r'); | 
| 89 |         case LF:    return QLatin1Char('\n'); | 
| 90 |         case Tab:   return QLatin1Char('\t'); | 
| 91 |         default: | 
| 92 |                     { | 
| 93 |                         Q_ASSERT_X(false, Q_FUNC_INFO, "Unexpected input" ); | 
| 94 |                         return QChar(); | 
| 95 |                     } | 
| 96 |     } | 
| 97 | } | 
| 98 |  | 
| 99 | QString CompressedWhitespace::compress(const QStringRef &input) | 
| 100 | { | 
| 101 |     Q_ASSERT(!isEven(1) && isEven(0) && isEven(2)); | 
| 102 |     Q_ASSERT(!input.isEmpty()); | 
| 103 |  | 
| 104 |     QString result; | 
| 105 |     const int len = input.length(); | 
| 106 |  | 
| 107 |     /* The amount of compressed characters. For instance, if input is | 
| 108 |      * four spaces followed by one tab, compressedChars will be 2, and the resulting | 
| 109 |      * QString will have a length of 1, two compressedChars stored in one QChar. */ | 
| 110 |     int compressedChars = 0; | 
| 111 |  | 
| 112 |     for(int i = 0; i < len; ++i) | 
| 113 |     { | 
| 114 |         const QChar c(input.at(i)); | 
| 115 |  | 
| 116 |         int start = i; | 
| 117 |  | 
| 118 |         while(true) | 
| 119 |         { | 
| 120 |             if(i + 1 == input.length() || input.at(i: i + 1) != c) | 
| 121 |                 break; | 
| 122 |             else | 
| 123 |                 ++i; | 
| 124 |         } | 
| 125 |  | 
| 126 |         /* The length of subsequent whitespace characters in the input. */ | 
| 127 |         int wsLen = (i - start) + 1; | 
| 128 |  | 
| 129 |         /* We might get a sequence of whitespace that is so long, that we can't | 
| 130 |          * store it in one unit/byte. In that case we chop it into as many subsequent | 
| 131 |          * ones that is needed. */ | 
| 132 |         while(true) | 
| 133 |         { | 
| 134 |             const int unitLength = qMin(a: wsLen, b: int(MaxCharCount)); | 
| 135 |             wsLen -= unitLength; | 
| 136 |  | 
| 137 |             ushort resultCP = toCompressedChar(ch: c, len: unitLength); | 
| 138 |  | 
| 139 |             if(isEven(number: compressedChars)) | 
| 140 |                 result += QChar(resultCP); | 
| 141 |             else | 
| 142 |             { | 
| 143 |                 resultCP = resultCP << 8; | 
| 144 |                 resultCP |= result.at(i: result.size() - 1).unicode(); | 
| 145 |                 result[result.size() - 1] = resultCP; | 
| 146 |             } | 
| 147 |  | 
| 148 |             ++compressedChars; | 
| 149 |  | 
| 150 |             if(wsLen == 0) | 
| 151 |                 break; | 
| 152 |         } | 
| 153 |     } | 
| 154 |  | 
| 155 |     return result; | 
| 156 | } | 
| 157 |  | 
| 158 | QString CompressedWhitespace::decompress(const QString &input) | 
| 159 | { | 
| 160 |     Q_ASSERT(!input.isEmpty()); | 
| 161 |     const int len = input.length() * 2; | 
| 162 |     QString retval; | 
| 163 |  | 
| 164 |     for(int i = 0; i < len; ++i) | 
| 165 |     { | 
| 166 |         ushort cp = input.at(i: i / 2).unicode(); | 
| 167 |  | 
| 168 |         if(isEven(number: i)) | 
| 169 |             cp &= Lower8Bits; | 
| 170 |         else | 
| 171 |         { | 
| 172 |             cp = cp >> 8; | 
| 173 |  | 
| 174 |             if(cp == 0) | 
| 175 |                 return retval; | 
| 176 |         } | 
| 177 |  | 
| 178 |         const quint8 wsLen = cp & Lower6Bits; | 
| 179 |         const quint8 id = cp & UpperTwoBits; | 
| 180 |  | 
| 181 |         /* Resize retval, and fill in on the top. */ | 
| 182 |         const int oldSize = retval.size(); | 
| 183 |         const int newSize = retval.size() + wsLen; | 
| 184 |         retval.resize(size: newSize); | 
| 185 |         const QChar ch(toChar(id: CharIdentifier(id))); | 
| 186 |  | 
| 187 |         for(int f = oldSize; f < newSize; ++f) | 
| 188 |             retval[f] = ch; | 
| 189 |     } | 
| 190 |  | 
| 191 |     return retval; | 
| 192 | } | 
| 193 |  | 
| 194 | QT_END_NAMESPACE | 
| 195 |  | 
| 196 |  |