1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | // |
41 | // W A R N I N G |
42 | // ------------- |
43 | // |
44 | // This file is not part of the Qt API. It exists purely as an |
45 | // implementation detail. This header file may change from version to |
46 | // version without notice, or even be removed. |
47 | // |
48 | // We mean it. |
49 | |
50 | #ifndef Patternist_DerivedString_H |
51 | #define Patternist_DerivedString_H |
52 | |
53 | #include <QRegExp> |
54 | |
55 | #include <private/qxmlutils_p.h> |
56 | #include <private/qbuiltintypes_p.h> |
57 | #include <private/qpatternistlocale_p.h> |
58 | #include <private/qvalidationerror_p.h> |
59 | |
60 | QT_BEGIN_NAMESPACE |
61 | |
62 | namespace QPatternist |
63 | { |
64 | /** |
65 | * @short Represents instances of derived @c xs:string types, such as @c |
66 | * xs:normalizedString. |
67 | * |
68 | * Whitespace is a significant part for creating values from the lexical |
69 | * space. Of course the specification is tricky here. Here's some pointers: |
70 | * |
71 | * - From <a href="4.3.6.1 The whiteSpace Schema Component">XML Schema Part 2: Datatypes |
72 | * Second Edition, 4.3.6 whiteSpace</a>: |
73 | * "For all atomic datatypes other than string (and types |
74 | * derived by restriction from it) the value of whiteSpace is |
75 | * collapse and cannot be changed by a schema author; for string the |
76 | * value of whiteSpace is preserve; for any type derived by |
77 | * restriction from string the value of whiteSpace can be any of the |
78 | * three legal values." |
79 | * - From <a href="http://www.w3.org/TR/xmlschema-1/#d0e1654">XML Schema Part 1: Structures |
80 | * Second Edition, 3.1.4 White Space Normalization during Validation</a>: |
81 | * "[Definition:] The normalized value of an element or attribute |
82 | * information item is an initial value whose white space, if any, |
83 | * has been normalized according to the value of the whiteSpace facet of |
84 | * the simple type definition used in its validation." |
85 | * |
86 | * @author Frans Englich <frans.englich@nokia.com> |
87 | * @ingroup Patternist_xdm |
88 | * @todo Documentation is missing |
89 | */ |
90 | template<TypeOfDerivedString DerivedType> |
91 | class DerivedString : public AtomicValue |
92 | { |
93 | private: |
94 | static inline ItemType::Ptr itemType() |
95 | { |
96 | switch(DerivedType) |
97 | { |
98 | case TypeNormalizedString: return BuiltinTypes::xsNormalizedString; |
99 | case TypeToken: return BuiltinTypes::xsToken; |
100 | case TypeLanguage: return BuiltinTypes::xsLanguage; |
101 | case TypeNMTOKEN: return BuiltinTypes::xsNMTOKEN; |
102 | case TypeName: return BuiltinTypes::xsName; |
103 | case TypeNCName: return BuiltinTypes::xsNCName; |
104 | case TypeID: return BuiltinTypes::xsID; |
105 | case TypeIDREF: return BuiltinTypes::xsIDREF; |
106 | case TypeENTITY: return BuiltinTypes::xsENTITY; |
107 | case TypeString: return BuiltinTypes::xsString; |
108 | } |
109 | |
110 | Q_ASSERT_X(false, Q_FUNC_INFO, "This line is not supposed to be reached." ); |
111 | return ItemType::Ptr(); |
112 | } |
113 | |
114 | const QString m_value; |
115 | |
116 | inline DerivedString(const QString &value) : m_value(value) |
117 | { |
118 | } |
119 | |
120 | /** |
121 | * @short This is an incomplete test for whether @p ch conforms to |
122 | * the XML 1.0 NameChar production. |
123 | */ |
124 | static inline bool isNameChar(const QChar &ch) |
125 | { |
126 | return ch.isLetter() || |
127 | ch.isDigit() || |
128 | ch == QLatin1Char('.') || |
129 | ch == QLatin1Char('-') || |
130 | ch == QLatin1Char('_') || |
131 | ch == QLatin1Char(':'); |
132 | } |
133 | |
134 | /** |
135 | * @returns @c true if @p input is a valid @c xs:Name. |
136 | * @see <a href="http://www.w3.org/TR/REC-xml/#NT-Name">Extensible |
137 | * Markup Language (XML) 1.0 (Fourth Edition), [5] Name</a> |
138 | */ |
139 | static inline bool isValidName(const QString &input) |
140 | { |
141 | if(input.isEmpty()) |
142 | return false; |
143 | |
144 | const QChar first(input.at(i: 0)); |
145 | |
146 | if(first.isLetter() || |
147 | first == QLatin1Char('_') || |
148 | first == QLatin1Char(':')) |
149 | { |
150 | const int len = input.length(); |
151 | |
152 | if(len == 1) |
153 | return true; |
154 | |
155 | /* Since we've checked the first character above, we start at |
156 | * position 1. */ |
157 | for(int i = 1; i < len; ++i) |
158 | { |
159 | if(!isNameChar(ch: input.at(i))) |
160 | return false; |
161 | } |
162 | |
163 | return true; |
164 | } |
165 | else |
166 | return false; |
167 | } |
168 | |
169 | /** |
170 | * @returns @c true if @p input conforms to the XML 1.0 @c Nmtoken product. |
171 | * |
172 | * @see <a |
173 | * href="http://www.w3.org/TR/2000/WD-xml-2e-20000814#NT-Nmtoken">Extensible |
174 | * Markup Language (XML) 1.0 (Second Edition), [7] Nmtoken</a> |
175 | */ |
176 | static inline bool isValidNMTOKEN(const QString &input) |
177 | { |
178 | const int len = input.length(); |
179 | |
180 | if(len == 0) |
181 | return false; |
182 | |
183 | for(int i = 0; i < len; ++i) |
184 | { |
185 | if(!isNameChar(ch: input.at(i))) |
186 | return false; |
187 | } |
188 | |
189 | return true; |
190 | } |
191 | |
192 | /** |
193 | * @short Performs attribute value normalization as if @p input was not |
194 | * from a @c CDATA section. |
195 | * |
196 | * Each whitespace character in @p input that's not a space, such as tab |
197 | * or new line character, is replaced with a space. This algorithm |
198 | * differs from QString::simplified() in that it doesn't collapse |
199 | * subsequent whitespace characters to a single one, or remove trailing |
200 | * and leading space. |
201 | * |
202 | * @see <a href="http://www.w3.org/TR/REC-xml/#AVNormalize">Extensible |
203 | * Markup Language (XML) 1.0 (Second Edition), 3.3.3 [E70]Attribute-Value Normalization</a> |
204 | */ |
205 | static QString attributeNormalize(const QString &input) |
206 | { |
207 | QString retval(input); |
208 | const int len = retval.length(); |
209 | const QLatin1Char space(' '); |
210 | |
211 | for(int i = 0; i < len; ++i) |
212 | { |
213 | const QChar ati(retval.at(i)); |
214 | |
215 | if(ati.isSpace() && ati != space) |
216 | retval[i] = space; |
217 | } |
218 | |
219 | return retval; |
220 | } |
221 | |
222 | static AtomicValue::Ptr error(const NamePool::Ptr &np, const QString &invalidValue) |
223 | { |
224 | return ValidationError::createError(description: QString::fromLatin1(str: "%1 is not a valid value for " |
225 | "type %2." ).arg(a: formatData(data: invalidValue)) |
226 | .arg(a: formatType(np, type: itemType()))); |
227 | } |
228 | |
229 | public: |
230 | |
231 | /** |
232 | * @note This function doesn't perform any cleanup/normalizaiton of @p |
233 | * value. @p value must be a canonical value space of the type. |
234 | * |
235 | * If you want cleanup to be performed and/or the lexical space |
236 | * checked, use fromLexical(). |
237 | */ |
238 | static AtomicValue::Ptr fromValue(const QString &value) |
239 | { |
240 | return AtomicValue::Ptr(new DerivedString(value)); |
241 | } |
242 | |
243 | /** |
244 | * Constructs an instance from the lexical |
245 | * representation @p lexical. |
246 | */ |
247 | static AtomicValue::Ptr fromLexical(const NamePool::Ptr &np, const QString &lexical) |
248 | { |
249 | switch(DerivedType) |
250 | { |
251 | case TypeString: |
252 | return AtomicValue::Ptr(new DerivedString(lexical)); |
253 | case TypeNormalizedString: |
254 | return AtomicValue::Ptr(new DerivedString(attributeNormalize(input: lexical))); |
255 | case TypeToken: |
256 | return AtomicValue::Ptr(new DerivedString(lexical.simplified())); |
257 | case TypeLanguage: |
258 | { |
259 | const QString simplified(lexical.trimmed()); |
260 | |
261 | QRegExp validate(QLatin1String("[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*" )); |
262 | Q_ASSERT(validate.isValid()); |
263 | |
264 | if(validate.exactMatch(str: simplified)) |
265 | return AtomicValue::Ptr(new DerivedString(lexical.simplified())); |
266 | else |
267 | return error(np, invalidValue: simplified); |
268 | } |
269 | case TypeNMTOKEN: |
270 | { |
271 | const QString trimmed(lexical.trimmed()); |
272 | |
273 | if(isValidNMTOKEN(input: trimmed)) |
274 | return AtomicValue::Ptr(new DerivedString(trimmed)); |
275 | else |
276 | return error(np, invalidValue: trimmed); |
277 | } |
278 | case TypeName: |
279 | { |
280 | const QString simplified(lexical.simplified()); |
281 | |
282 | if(isValidName(input: simplified)) |
283 | return AtomicValue::Ptr(new DerivedString(simplified)); |
284 | else |
285 | return error(np, invalidValue: simplified); |
286 | } |
287 | case TypeID: |
288 | case TypeIDREF: |
289 | case TypeENTITY: |
290 | case TypeNCName: |
291 | { |
292 | /* We treat xs:ID, xs:ENTITY, xs:IDREF and xs:NCName in the exact same |
293 | * way, except for the type annotation. |
294 | * |
295 | * We use trimmed() instead of simplified() because it's |
296 | * faster and whitespace isn't allowed between |
297 | * non-whitespace characters anyway, for these types. */ |
298 | const QString trimmed(lexical.trimmed()); |
299 | |
300 | if(QXmlUtils::isNCName(ncName: trimmed)) |
301 | return AtomicValue::Ptr(new DerivedString(trimmed)); |
302 | else |
303 | return error(np, invalidValue: trimmed); |
304 | } |
305 | default: |
306 | { |
307 | Q_ASSERT_X(false, Q_FUNC_INFO, "This line is not supposed to be reached." ); |
308 | return AtomicValue::Ptr(); |
309 | } |
310 | } |
311 | } |
312 | |
313 | virtual QString stringValue() const |
314 | { |
315 | return m_value; |
316 | } |
317 | |
318 | virtual bool evaluateEBV(const QExplicitlySharedDataPointer<DynamicContext> &) const |
319 | { |
320 | return m_value.length() > 0; |
321 | } |
322 | |
323 | virtual ItemType::Ptr type() const |
324 | { |
325 | return itemType(); |
326 | } |
327 | }; |
328 | } |
329 | |
330 | QT_END_NAMESPACE |
331 | |
332 | #endif |
333 | |