| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. |
| 4 | ** Contact: https://www.qt.io/licensing/ |
| 5 | ** |
| 6 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit. |
| 7 | ** |
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ |
| 9 | ** Commercial License Usage |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | ** accordance with the commercial license agreement provided with the |
| 12 | ** Software or, alternatively, in accordance with the terms contained in |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms |
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
| 15 | ** information use the contact form at https://www.qt.io/contact-us. |
| 16 | ** |
| 17 | ** GNU Lesser General Public License Usage |
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
| 19 | ** General Public License version 3 as published by the Free Software |
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| 21 | ** packaging of this file. Please review the following information to |
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements |
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| 24 | ** |
| 25 | ** GNU General Public License Usage |
| 26 | ** Alternatively, this file may be used under the terms of the GNU |
| 27 | ** General Public License version 2.0 or (at your option) the GNU General |
| 28 | ** Public license version 3 or any later version approved by the KDE Free |
| 29 | ** Qt Foundation. The licenses are as published by the Free Software |
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| 31 | ** included in the packaging of this file. Please review the following |
| 32 | ** information to ensure the GNU General Public License requirements will |
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
| 35 | ** |
| 36 | ** $QT_END_LICENSE$ |
| 37 | ** |
| 38 | ****************************************************************************/ |
| 39 | |
| 40 | // |
| 41 | // W A R N I N G |
| 42 | // ------------- |
| 43 | // |
| 44 | // This file is not part of the Qt API. It exists purely as an |
| 45 | // implementation detail. This header file may change from version to |
| 46 | // version without notice, or even be removed. |
| 47 | // |
| 48 | // We mean it. |
| 49 | |
| 50 | #ifndef Patternist_DerivedString_H |
| 51 | #define Patternist_DerivedString_H |
| 52 | |
| 53 | #include <QRegExp> |
| 54 | |
| 55 | #include <private/qxmlutils_p.h> |
| 56 | #include <private/qbuiltintypes_p.h> |
| 57 | #include <private/qpatternistlocale_p.h> |
| 58 | #include <private/qvalidationerror_p.h> |
| 59 | |
| 60 | QT_BEGIN_NAMESPACE |
| 61 | |
| 62 | namespace QPatternist |
| 63 | { |
| 64 | /** |
| 65 | * @short Represents instances of derived @c xs:string types, such as @c |
| 66 | * xs:normalizedString. |
| 67 | * |
| 68 | * Whitespace is a significant part for creating values from the lexical |
| 69 | * space. Of course the specification is tricky here. Here's some pointers: |
| 70 | * |
| 71 | * - From <a href="4.3.6.1 The whiteSpace Schema Component">XML Schema Part 2: Datatypes |
| 72 | * Second Edition, 4.3.6 whiteSpace</a>: |
| 73 | * "For all atomic datatypes other than string (and types |
| 74 | * derived by restriction from it) the value of whiteSpace is |
| 75 | * collapse and cannot be changed by a schema author; for string the |
| 76 | * value of whiteSpace is preserve; for any type derived by |
| 77 | * restriction from string the value of whiteSpace can be any of the |
| 78 | * three legal values." |
| 79 | * - From <a href="http://www.w3.org/TR/xmlschema-1/#d0e1654">XML Schema Part 1: Structures |
| 80 | * Second Edition, 3.1.4 White Space Normalization during Validation</a>: |
| 81 | * "[Definition:] The normalized value of an element or attribute |
| 82 | * information item is an initial value whose white space, if any, |
| 83 | * has been normalized according to the value of the whiteSpace facet of |
| 84 | * the simple type definition used in its validation." |
| 85 | * |
| 86 | * @author Frans Englich <frans.englich@nokia.com> |
| 87 | * @ingroup Patternist_xdm |
| 88 | * @todo Documentation is missing |
| 89 | */ |
| 90 | template<TypeOfDerivedString DerivedType> |
| 91 | class DerivedString : public AtomicValue |
| 92 | { |
| 93 | private: |
| 94 | static inline ItemType::Ptr itemType() |
| 95 | { |
| 96 | switch(DerivedType) |
| 97 | { |
| 98 | case TypeNormalizedString: return BuiltinTypes::xsNormalizedString; |
| 99 | case TypeToken: return BuiltinTypes::xsToken; |
| 100 | case TypeLanguage: return BuiltinTypes::xsLanguage; |
| 101 | case TypeNMTOKEN: return BuiltinTypes::xsNMTOKEN; |
| 102 | case TypeName: return BuiltinTypes::xsName; |
| 103 | case TypeNCName: return BuiltinTypes::xsNCName; |
| 104 | case TypeID: return BuiltinTypes::xsID; |
| 105 | case TypeIDREF: return BuiltinTypes::xsIDREF; |
| 106 | case TypeENTITY: return BuiltinTypes::xsENTITY; |
| 107 | case TypeString: return BuiltinTypes::xsString; |
| 108 | } |
| 109 | |
| 110 | Q_ASSERT_X(false, Q_FUNC_INFO, "This line is not supposed to be reached." ); |
| 111 | return ItemType::Ptr(); |
| 112 | } |
| 113 | |
| 114 | const QString m_value; |
| 115 | |
| 116 | inline DerivedString(const QString &value) : m_value(value) |
| 117 | { |
| 118 | } |
| 119 | |
| 120 | /** |
| 121 | * @short This is an incomplete test for whether @p ch conforms to |
| 122 | * the XML 1.0 NameChar production. |
| 123 | */ |
| 124 | static inline bool isNameChar(const QChar &ch) |
| 125 | { |
| 126 | return ch.isLetter() || |
| 127 | ch.isDigit() || |
| 128 | ch == QLatin1Char('.') || |
| 129 | ch == QLatin1Char('-') || |
| 130 | ch == QLatin1Char('_') || |
| 131 | ch == QLatin1Char(':'); |
| 132 | } |
| 133 | |
| 134 | /** |
| 135 | * @returns @c true if @p input is a valid @c xs:Name. |
| 136 | * @see <a href="http://www.w3.org/TR/REC-xml/#NT-Name">Extensible |
| 137 | * Markup Language (XML) 1.0 (Fourth Edition), [5] Name</a> |
| 138 | */ |
| 139 | static inline bool isValidName(const QString &input) |
| 140 | { |
| 141 | if(input.isEmpty()) |
| 142 | return false; |
| 143 | |
| 144 | const QChar first(input.at(i: 0)); |
| 145 | |
| 146 | if(first.isLetter() || |
| 147 | first == QLatin1Char('_') || |
| 148 | first == QLatin1Char(':')) |
| 149 | { |
| 150 | const int len = input.length(); |
| 151 | |
| 152 | if(len == 1) |
| 153 | return true; |
| 154 | |
| 155 | /* Since we've checked the first character above, we start at |
| 156 | * position 1. */ |
| 157 | for(int i = 1; i < len; ++i) |
| 158 | { |
| 159 | if(!isNameChar(ch: input.at(i))) |
| 160 | return false; |
| 161 | } |
| 162 | |
| 163 | return true; |
| 164 | } |
| 165 | else |
| 166 | return false; |
| 167 | } |
| 168 | |
| 169 | /** |
| 170 | * @returns @c true if @p input conforms to the XML 1.0 @c Nmtoken product. |
| 171 | * |
| 172 | * @see <a |
| 173 | * href="http://www.w3.org/TR/2000/WD-xml-2e-20000814#NT-Nmtoken">Extensible |
| 174 | * Markup Language (XML) 1.0 (Second Edition), [7] Nmtoken</a> |
| 175 | */ |
| 176 | static inline bool isValidNMTOKEN(const QString &input) |
| 177 | { |
| 178 | const int len = input.length(); |
| 179 | |
| 180 | if(len == 0) |
| 181 | return false; |
| 182 | |
| 183 | for(int i = 0; i < len; ++i) |
| 184 | { |
| 185 | if(!isNameChar(ch: input.at(i))) |
| 186 | return false; |
| 187 | } |
| 188 | |
| 189 | return true; |
| 190 | } |
| 191 | |
| 192 | /** |
| 193 | * @short Performs attribute value normalization as if @p input was not |
| 194 | * from a @c CDATA section. |
| 195 | * |
| 196 | * Each whitespace character in @p input that's not a space, such as tab |
| 197 | * or new line character, is replaced with a space. This algorithm |
| 198 | * differs from QString::simplified() in that it doesn't collapse |
| 199 | * subsequent whitespace characters to a single one, or remove trailing |
| 200 | * and leading space. |
| 201 | * |
| 202 | * @see <a href="http://www.w3.org/TR/REC-xml/#AVNormalize">Extensible |
| 203 | * Markup Language (XML) 1.0 (Second Edition), 3.3.3 [E70]Attribute-Value Normalization</a> |
| 204 | */ |
| 205 | static QString attributeNormalize(const QString &input) |
| 206 | { |
| 207 | QString retval(input); |
| 208 | const int len = retval.length(); |
| 209 | const QLatin1Char space(' '); |
| 210 | |
| 211 | for(int i = 0; i < len; ++i) |
| 212 | { |
| 213 | const QChar ati(retval.at(i)); |
| 214 | |
| 215 | if(ati.isSpace() && ati != space) |
| 216 | retval[i] = space; |
| 217 | } |
| 218 | |
| 219 | return retval; |
| 220 | } |
| 221 | |
| 222 | static AtomicValue::Ptr error(const NamePool::Ptr &np, const QString &invalidValue) |
| 223 | { |
| 224 | return ValidationError::createError(description: QString::fromLatin1(str: "%1 is not a valid value for " |
| 225 | "type %2." ).arg(a: formatData(data: invalidValue)) |
| 226 | .arg(a: formatType(np, type: itemType()))); |
| 227 | } |
| 228 | |
| 229 | public: |
| 230 | |
| 231 | /** |
| 232 | * @note This function doesn't perform any cleanup/normalizaiton of @p |
| 233 | * value. @p value must be a canonical value space of the type. |
| 234 | * |
| 235 | * If you want cleanup to be performed and/or the lexical space |
| 236 | * checked, use fromLexical(). |
| 237 | */ |
| 238 | static AtomicValue::Ptr fromValue(const QString &value) |
| 239 | { |
| 240 | return AtomicValue::Ptr(new DerivedString(value)); |
| 241 | } |
| 242 | |
| 243 | /** |
| 244 | * Constructs an instance from the lexical |
| 245 | * representation @p lexical. |
| 246 | */ |
| 247 | static AtomicValue::Ptr fromLexical(const NamePool::Ptr &np, const QString &lexical) |
| 248 | { |
| 249 | switch(DerivedType) |
| 250 | { |
| 251 | case TypeString: |
| 252 | return AtomicValue::Ptr(new DerivedString(lexical)); |
| 253 | case TypeNormalizedString: |
| 254 | return AtomicValue::Ptr(new DerivedString(attributeNormalize(input: lexical))); |
| 255 | case TypeToken: |
| 256 | return AtomicValue::Ptr(new DerivedString(lexical.simplified())); |
| 257 | case TypeLanguage: |
| 258 | { |
| 259 | const QString simplified(lexical.trimmed()); |
| 260 | |
| 261 | QRegExp validate(QLatin1String("[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*" )); |
| 262 | Q_ASSERT(validate.isValid()); |
| 263 | |
| 264 | if(validate.exactMatch(str: simplified)) |
| 265 | return AtomicValue::Ptr(new DerivedString(lexical.simplified())); |
| 266 | else |
| 267 | return error(np, invalidValue: simplified); |
| 268 | } |
| 269 | case TypeNMTOKEN: |
| 270 | { |
| 271 | const QString trimmed(lexical.trimmed()); |
| 272 | |
| 273 | if(isValidNMTOKEN(input: trimmed)) |
| 274 | return AtomicValue::Ptr(new DerivedString(trimmed)); |
| 275 | else |
| 276 | return error(np, invalidValue: trimmed); |
| 277 | } |
| 278 | case TypeName: |
| 279 | { |
| 280 | const QString simplified(lexical.simplified()); |
| 281 | |
| 282 | if(isValidName(input: simplified)) |
| 283 | return AtomicValue::Ptr(new DerivedString(simplified)); |
| 284 | else |
| 285 | return error(np, invalidValue: simplified); |
| 286 | } |
| 287 | case TypeID: |
| 288 | case TypeIDREF: |
| 289 | case TypeENTITY: |
| 290 | case TypeNCName: |
| 291 | { |
| 292 | /* We treat xs:ID, xs:ENTITY, xs:IDREF and xs:NCName in the exact same |
| 293 | * way, except for the type annotation. |
| 294 | * |
| 295 | * We use trimmed() instead of simplified() because it's |
| 296 | * faster and whitespace isn't allowed between |
| 297 | * non-whitespace characters anyway, for these types. */ |
| 298 | const QString trimmed(lexical.trimmed()); |
| 299 | |
| 300 | if(QXmlUtils::isNCName(ncName: trimmed)) |
| 301 | return AtomicValue::Ptr(new DerivedString(trimmed)); |
| 302 | else |
| 303 | return error(np, invalidValue: trimmed); |
| 304 | } |
| 305 | default: |
| 306 | { |
| 307 | Q_ASSERT_X(false, Q_FUNC_INFO, "This line is not supposed to be reached." ); |
| 308 | return AtomicValue::Ptr(); |
| 309 | } |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | virtual QString stringValue() const |
| 314 | { |
| 315 | return m_value; |
| 316 | } |
| 317 | |
| 318 | virtual bool evaluateEBV(const QExplicitlySharedDataPointer<DynamicContext> &) const |
| 319 | { |
| 320 | return m_value.length() > 0; |
| 321 | } |
| 322 | |
| 323 | virtual ItemType::Ptr type() const |
| 324 | { |
| 325 | return itemType(); |
| 326 | } |
| 327 | }; |
| 328 | } |
| 329 | |
| 330 | QT_END_NAMESPACE |
| 331 | |
| 332 | #endif |
| 333 | |