| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2015 The Qt Company Ltd. |
| 4 | ** Contact: http://www.qt.io/licensing/ |
| 5 | ** |
| 6 | ** This file is part of the QtVersit module of the Qt Toolkit. |
| 7 | ** |
| 8 | ** $QT_BEGIN_LICENSE:LGPL21$ |
| 9 | ** Commercial License Usage |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | ** accordance with the commercial license agreement provided with the |
| 12 | ** Software or, alternatively, in accordance with the terms contained in |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms |
| 14 | ** and conditions see http://www.qt.io/terms-conditions. For further |
| 15 | ** information use the contact form at http://www.qt.io/contact-us. |
| 16 | ** |
| 17 | ** GNU Lesser General Public License Usage |
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
| 19 | ** General Public License version 2.1 or version 3 as published by the Free |
| 20 | ** Software Foundation and appearing in the file LICENSE.LGPLv21 and |
| 21 | ** LICENSE.LGPLv3 included in the packaging of this file. Please review the |
| 22 | ** following information to ensure the GNU Lesser General Public License |
| 23 | ** requirements will be met: https://www.gnu.org/licenses/lgpl.html and |
| 24 | ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
| 25 | ** |
| 26 | ** As a special exception, The Qt Company gives you certain additional |
| 27 | ** rights. These rights are described in The Qt Company LGPL Exception |
| 28 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
| 29 | ** |
| 30 | ** $QT_END_LICENSE$ |
| 31 | ** |
| 32 | ****************************************************************************/ |
| 33 | |
| 34 | #include "qversitutils_p.h" |
| 35 | |
| 36 | #include <QtCore/QTextCodec> |
| 37 | #include <QtCore/qstring.h> |
| 38 | #include <QtCore/qtextcodec.h> |
| 39 | #include <QtCore/qvariant.h> |
| 40 | |
| 41 | #include <QtCore/QJsonArray> |
| 42 | #include <QtCore/QJsonDocument> |
| 43 | #include <QtCore/QJsonValue> |
| 44 | |
| 45 | #include "qversitdocument.h" |
| 46 | |
| 47 | QT_BEGIN_NAMESPACE_VERSIT |
| 48 | |
| 49 | QTextCodec* VersitUtils::m_previousCodec = 0; |
| 50 | QList<QByteArrayMatcher>* VersitUtils::m_newlineList = 0; |
| 51 | QByteArray VersitUtils::m_encodingMap[256]; |
| 52 | QBasicMutex VersitUtils::m_staticLock; |
| 53 | |
| 54 | /*! |
| 55 | * Encode \a ch with \a codec, without adding an byte-order mark |
| 56 | */ |
| 57 | QByteArray VersitUtils::encode(char ch, QTextCodec* codec) |
| 58 | { |
| 59 | changeCodec(codec); |
| 60 | return m_encodingMap[(int)ch]; |
| 61 | } |
| 62 | |
| 63 | /*! |
| 64 | * Encode \a ba with \a codec, without adding an byte-order mark. \a ba is interpreted as ASCII |
| 65 | */ |
| 66 | QByteArray VersitUtils::encode(const QByteArray& ba, QTextCodec* codec) |
| 67 | { |
| 68 | QTextCodec::ConverterState state(QTextCodec::IgnoreHeader); |
| 69 | return codec->fromUnicode(in: QString::fromLatin1(str: ba.data()).data(), length: ba.length(), state: &state); |
| 70 | } |
| 71 | |
| 72 | /*! |
| 73 | * Returns the list of DOS, UNIX and Mac newline characters for \a codec. |
| 74 | */ |
| 75 | QList<QByteArrayMatcher>* VersitUtils::newlineList(QTextCodec* codec) |
| 76 | { |
| 77 | changeCodec(codec); |
| 78 | return m_newlineList; |
| 79 | } |
| 80 | |
| 81 | /*! |
| 82 | * Update the cached tables of pregenerated encoded text with \a codec. |
| 83 | */ |
| 84 | void VersitUtils::changeCodec(QTextCodec* codec) { |
| 85 | QMutexLocker readWriterLocker(&VersitUtils::m_staticLock); |
| 86 | |
| 87 | if (VersitUtils::m_newlineList != 0 && codec == VersitUtils::m_previousCodec) |
| 88 | return; |
| 89 | |
| 90 | // Build m_encodingMap |
| 91 | QChar qch; |
| 92 | QTextCodec::ConverterState state(QTextCodec::IgnoreHeader); |
| 93 | for (int c = 0; c < 256; c++) { |
| 94 | qch = QLatin1Char(c); |
| 95 | m_encodingMap[c] = codec->fromUnicode(in: &qch, length: 1, state: &state); |
| 96 | } |
| 97 | |
| 98 | // Build m_newlineList |
| 99 | if (m_newlineList != 0) |
| 100 | delete m_newlineList; |
| 101 | m_newlineList = new QList<QByteArrayMatcher>; |
| 102 | m_newlineList->append(t: QByteArrayMatcher(encode(ba: "\r\n" , codec))); |
| 103 | m_newlineList->append(t: QByteArrayMatcher(encode(ba: "\n" , codec))); |
| 104 | m_newlineList->append(t: QByteArrayMatcher(encode(ba: "\r" , codec))); |
| 105 | |
| 106 | m_previousCodec = codec; |
| 107 | } |
| 108 | |
| 109 | /*! |
| 110 | * Finds a property in the \a document with the given \a propertyName, adds it to \a toBeRemoved, |
| 111 | * and returns it. |
| 112 | */ |
| 113 | QVersitProperty VersitUtils::takeProperty(const QVersitDocument& document, |
| 114 | const QString& propertyName, |
| 115 | QList<QVersitProperty>* toBeRemoved) { |
| 116 | foreach (const QVersitProperty& currentProperty, document.properties()) { |
| 117 | if (currentProperty.name() == propertyName) { |
| 118 | *toBeRemoved << currentProperty; |
| 119 | return currentProperty; |
| 120 | } |
| 121 | } |
| 122 | return QVersitProperty(); |
| 123 | } |
| 124 | |
| 125 | /*! |
| 126 | * Returns true iff \a bytes is a valid UTF-8 sequence. |
| 127 | */ |
| 128 | bool VersitUtils::isValidUtf8(const QByteArray& bytes) { |
| 129 | int sequenceLength = 1; // number of bytes in total for a sequence |
| 130 | int continuation = 0; // number of bytes left in a continuation |
| 131 | quint32 codePoint = 0; |
| 132 | for (int i = 0; i < bytes.size(); i++) { |
| 133 | quint8 byte = bytes[i]; |
| 134 | if (continuation == 0) { |
| 135 | if (byte & 0x80) { // 1xxxxxxx |
| 136 | if (byte & 0x40) { // 11xxxxxx |
| 137 | if (byte == 0xc0 || byte == 0xc1) // 1100000x |
| 138 | return false; // overlong 2 byte sequence |
| 139 | if (byte & 0x20) { // 111xxxxx |
| 140 | if (byte & 0x10) { // 1111xxxx |
| 141 | if (byte & 0x08) { // 11111xxx |
| 142 | // Outside unicode range |
| 143 | return false; |
| 144 | } else { // 11110xxx |
| 145 | sequenceLength = 4; |
| 146 | continuation = 3; // three more bytes |
| 147 | codePoint = byte & 0x07; // take the last 3 bits |
| 148 | } |
| 149 | } else { // 1110xxxx |
| 150 | sequenceLength = 3; |
| 151 | continuation = 2; // two more bytes |
| 152 | codePoint = byte & 0x0f; // take last 4 bits |
| 153 | } |
| 154 | } else { // 110xxxxx |
| 155 | sequenceLength = 2; |
| 156 | continuation = 1; // one more byte |
| 157 | codePoint = byte & 0x1f; // take last 5 bits |
| 158 | } |
| 159 | } else { // 10xxxxxx |
| 160 | // unexpected continuation |
| 161 | return false; |
| 162 | } |
| 163 | } else { // 0xxxxxxx |
| 164 | sequenceLength = 1; |
| 165 | } |
| 166 | } else { // continuation > 0 |
| 167 | if ((byte & 0xc0) != 0x80) // 10xxxxxx |
| 168 | return false; // expected continuation not found |
| 169 | codePoint = (codePoint << 6) | (byte & 0x3f); // append last 6 bits |
| 170 | continuation--; |
| 171 | } |
| 172 | |
| 173 | if (continuation == 0) { |
| 174 | // Finished decoding a character - it's not overlong and that it's in range |
| 175 | switch (sequenceLength) { |
| 176 | // 1-byte sequence can't be overlong |
| 177 | // 2-byte sequence has already been checked for overlongness |
| 178 | case 3: |
| 179 | if (codePoint < 0x800) // overlong |
| 180 | return false; |
| 181 | |
| 182 | // Filter out codepoints outside the Unicode range |
| 183 | if ((codePoint >= 0xd800 && codePoint <= 0xdfff) // utf-16 surrogate halves |
| 184 | || (codePoint >= 0xfffe && codePoint <= 0xffff)) { // reversed utf-16 BOM |
| 185 | return false; |
| 186 | } |
| 187 | break; |
| 188 | case 4: |
| 189 | if (codePoint < 0x10000 // overlong |
| 190 | || codePoint > 0x10ffff) // above Unicode range |
| 191 | return false; |
| 192 | break; |
| 193 | } |
| 194 | codePoint = 0; |
| 195 | } |
| 196 | } |
| 197 | return continuation == 0; |
| 198 | } |
| 199 | |
| 200 | /*! |
| 201 | * Convert variant \a data to string \a json in JSON format. |
| 202 | * |
| 203 | * The data is encoded as an array containing one item |
| 204 | * to allow the same encoding to be used for both |
| 205 | * primitive and compound data types. |
| 206 | * |
| 207 | * Returns true if the conversion is successful, false otherwise. |
| 208 | * |
| 209 | * \sa convertFromJson() |
| 210 | */ |
| 211 | bool VersitUtils::convertToJson(const QVariant &data, QString *json) |
| 212 | { |
| 213 | const QJsonValue dataAsJsonValue = QJsonValue::fromVariant(variant: data); |
| 214 | if (data.isValid() && dataAsJsonValue.isNull()) |
| 215 | return false; |
| 216 | QJsonArray jsonArray; |
| 217 | jsonArray.append(value: dataAsJsonValue); |
| 218 | const QJsonDocument jsonDocument(jsonArray); |
| 219 | *json = QString::fromUtf8(str: jsonDocument.toJson()); |
| 220 | return true; |
| 221 | } |
| 222 | |
| 223 | /*! |
| 224 | * Convert string \a json in JSON format to variant \a data. |
| 225 | * |
| 226 | * The format of the json string is assumed to be a one-item array. |
| 227 | * |
| 228 | * Returns true if the conversion is successful, false otherwise. |
| 229 | * |
| 230 | * \sa convertToJson() |
| 231 | */ |
| 232 | bool VersitUtils::convertFromJson(const QString &json, QVariant *data) |
| 233 | { |
| 234 | const QJsonDocument jsonDoc = QJsonDocument::fromJson(json: json.toUtf8()); |
| 235 | const QJsonValue jsonValue = jsonDoc.array().at(i: 0); |
| 236 | if (jsonValue.isUndefined()) |
| 237 | return false; |
| 238 | *data = jsonValue.toVariant(); |
| 239 | return true; |
| 240 | } |
| 241 | |
| 242 | QT_END_NAMESPACE_VERSIT |
| 243 | |