1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2015 The Qt Company Ltd. |
4 | ** Contact: http://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtVersit module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL21$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see http://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at http://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 2.1 or version 3 as published by the Free |
20 | ** Software Foundation and appearing in the file LICENSE.LGPLv21 and |
21 | ** LICENSE.LGPLv3 included in the packaging of this file. Please review the |
22 | ** following information to ensure the GNU Lesser General Public License |
23 | ** requirements will be met: https://www.gnu.org/licenses/lgpl.html and |
24 | ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
25 | ** |
26 | ** As a special exception, The Qt Company gives you certain additional |
27 | ** rights. These rights are described in The Qt Company LGPL Exception |
28 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
29 | ** |
30 | ** $QT_END_LICENSE$ |
31 | ** |
32 | ****************************************************************************/ |
33 | |
34 | #include "qversitreader_p.h" |
35 | |
36 | #include <QtCore/qbuffer.h> |
37 | #include <QtCore/qtextcodec.h> |
38 | #include <QtCore/qvariant.h> |
39 | |
40 | #include "qversitutils_p.h" |
41 | |
42 | QT_BEGIN_NAMESPACE_VERSIT |
43 | |
44 | // Some big enough value for nested versit documents to prevent infinite recursion |
45 | #define MAX_VERSIT_DOCUMENT_NESTING_DEPTH 20 |
46 | |
47 | QHash<QPair<QVersitDocument::VersitType,QString>, QVersitProperty::ValueType>* |
48 | QVersitReaderPrivate::mValueTypeMap = 0; |
49 | |
50 | /*! |
51 | \class LineReader |
52 | \brief The LineReader class is a wrapper around a QIODevice that allows line-by-line reading. |
53 | \internal |
54 | |
55 | This class keeps an internal buffer which it uses to temporarily store data which it has read from |
56 | the device but not returned to the user. |
57 | |
58 | The isCodecCertain constructor parameter/getter can be used by the client to indicate whether |
59 | the codec supplied is known for sure, or if it was a guess. |
60 | */ |
61 | |
62 | /*! |
63 | Constructs a LineReader that reads from the given \a device using the given \a codec. |
64 | If the \a codec is null, it is guessed at by sniffing the first few bytes of the input. |
65 | */ |
66 | LineReader::LineReader(QIODevice* device, QTextCodec *codec) |
67 | : mDevice(device), |
68 | mCodec(codec), |
69 | mIsCodecUtf8Compatible(false), |
70 | mChunkSize(10000), // Read 10kB at a time |
71 | mOdometer(0), |
72 | mSearchFrom(0) |
73 | { |
74 | if (!mCodec) { |
75 | static QTextCodec* utf16be = QTextCodec::codecForName(name: "UTF-16BE" ); |
76 | static QTextCodec* utf16le = QTextCodec::codecForName(name: "UTF-16LE" ); |
77 | static QTextCodec* utf32be = QTextCodec::codecForName(name: "UTF-32BE" ); |
78 | static QTextCodec* utf32le = QTextCodec::codecForName(name: "UTF-32LE" ); |
79 | static const QByteArray beginUtf16be(VersitUtils::encode(ba: "BEGIN:" , codec: utf16be)); |
80 | static const QByteArray beginUtf16le(VersitUtils::encode(ba: "BEGIN:" , codec: utf16le)); |
81 | static const QByteArray beginUtf32be(VersitUtils::encode(ba: "BEGIN:" , codec: utf32be)); |
82 | static const QByteArray beginUtf32le(VersitUtils::encode(ba: "BEGIN:" , codec: utf32le)); |
83 | |
84 | // Do some basic charset detection using the byte-order-mark (BOM) |
85 | // We need 4 bytes to do BOM sniffing for UTF-32, UTF-16 and UTF-8 |
86 | QByteArray firstSixBytes = mDevice->read(maxlen: 6); |
87 | mCodec = QTextCodec::codecForUtfText(ba: firstSixBytes, NULL); |
88 | if (mCodec) { |
89 | mIsCodecCertain = true; |
90 | } else { |
91 | if (beginUtf16be.startsWith(a: firstSixBytes)) { |
92 | mCodec = utf16be; |
93 | mIsCodecCertain = true; |
94 | } else if (beginUtf16le.startsWith(a: firstSixBytes)) { |
95 | mCodec = utf16le; |
96 | mIsCodecCertain = true; |
97 | } else if (beginUtf32be.startsWith(a: firstSixBytes)) { |
98 | mCodec = utf32be; |
99 | mIsCodecCertain = true; |
100 | } else if (beginUtf32le.startsWith(a: firstSixBytes)) { |
101 | mCodec = utf32le; |
102 | mIsCodecCertain = true; |
103 | } else { |
104 | mCodec = QTextCodec::codecForLocale(); |
105 | mIsCodecCertain = false; |
106 | mIsCodecUtf8Compatible = true; |
107 | } |
108 | } |
109 | mBuffer = LByteArray(firstSixBytes, 0, 0); |
110 | } else { |
111 | mIsCodecCertain = true; |
112 | } |
113 | mCrlfList = *VersitUtils::newlineList(codec: mCodec); |
114 | } |
115 | |
116 | /*! |
117 | Constructs a LineReader that reads from the given \a device using the given \a codec. |
118 | \a chunkSize is the number of bytes to read at a time (it is useful for testing but this |
119 | constructor shouldn't otherwise be used). |
120 | */ |
121 | LineReader::LineReader(QIODevice* device, QTextCodec *codec, int chunkSize) |
122 | : mDevice(device), |
123 | mCodec(codec), |
124 | mIsCodecCertain(true), |
125 | mChunkSize(chunkSize), |
126 | mCrlfList(*VersitUtils::newlineList(codec: mCodec)), |
127 | mOdometer(0), |
128 | mSearchFrom(0) |
129 | { |
130 | Q_ASSERT(mCodec != NULL); |
131 | } |
132 | |
133 | /*! |
134 | Attempts to read a line and returns an LByteArray containing the line. |
135 | This wraps around readOneLine and provides a hack to do additional unwrapping for a malformed |
136 | vCard where a space is not added to the start of the line continuation. |
137 | |
138 | Some malformed vCards we get look like this: (Case 1) |
139 | ORG:A |
140 | B |
141 | C |
142 | (CRLF-SPACE wrapping is employed for the first time, then the space is subsequently omitted). |
143 | But a valid vCard can be weirdly wrapped without the CRLF-SPACE, if it's quoted-printable and |
144 | ends in an equals, eg. (Case 2) |
145 | ORG;ENCODING=QUOTED-PRINTABLE:A= |
146 | B= |
147 | C |
148 | Unwrap in Case 1 but not in Case 2 - leave that for the QP-decoder in QVR::unencode |
149 | */ |
150 | LByteArray LineReader::readLine() |
151 | { |
152 | QByteArray colon(VersitUtils::encode(ch: ':', codec: mCodec)); |
153 | QByteArray equals(VersitUtils::encode(ch: '=', codec: mCodec)); |
154 | if (!mPushedLines.isEmpty()) { |
155 | LByteArray retval(mPushedLines.pop()); |
156 | return retval; |
157 | } |
158 | readOneLine(cursor: &mBuffer); |
159 | // Hack: read the next line and see if it's a continuation of this line |
160 | while (true) { |
161 | int prevStart = mBuffer.mStart; |
162 | int prevEnd = mBuffer.mEnd; |
163 | // readOneLine only appends to mBuffer so these saved offsets should remain valid |
164 | readOneLine(cursor: &mBuffer); |
165 | |
166 | // Get an LByteArray of the previous line. This should be fast because copying the |
167 | // LByteArray copies the QByteArray, which is implicitly shared |
168 | LByteArray prevLine(mBuffer.mData, prevStart, prevEnd); |
169 | if (mBuffer.isEmpty() |
170 | || mBuffer.contains(ba: colon) |
171 | || prevLine.endsWith(ba: equals)) { |
172 | // Normal, the next line is empty, or a new property, or it's been wrapped using |
173 | // QUOTED-PRINTABLE. Rewind it back one line so it gets read next time round. |
174 | mBuffer.setBounds(start: prevStart, end: prevEnd); |
175 | break; |
176 | } else { |
177 | // Some silly vCard generator has probably wrapped a line without prepending a space |
178 | // Join the previous line with this line by deleting the characters between prevEnd and |
179 | // mStart (eg. any newline characters) |
180 | int crlfLen = mBuffer.mStart-prevEnd; |
181 | mBuffer.mData.remove(index: prevEnd, len: crlfLen); |
182 | mBuffer.setBounds(start: prevStart, end: mBuffer.mEnd - crlfLen); |
183 | } |
184 | } |
185 | mBuffer.dropOldData(); |
186 | mOdometer += mBuffer.size(); |
187 | return mBuffer; |
188 | } |
189 | |
190 | /*! |
191 | Attempts to read a line and updates \a cursor to contain the line. This performes basic |
192 | line unwrapping as per the vCard specification (eg. if a line begins with a space, it is a |
193 | continuation of the next line) |
194 | */ |
195 | void LineReader::readOneLine(LByteArray* cursor) { |
196 | cursor->mStart = cursor->mEnd; |
197 | mSearchFrom = cursor->mStart; |
198 | |
199 | // First, look for a newline in the already-existing buffer. If found, return the line. |
200 | if (tryReadLine(cursor, atEnd: false)) { |
201 | return; |
202 | } |
203 | |
204 | // Otherwise, keep reading more data until either a CRLF is found, or there's no more to read. |
205 | while (!mDevice->atEnd()) { |
206 | QByteArray temp = mDevice->read(maxlen: mChunkSize); |
207 | if (!temp.isEmpty()) { |
208 | cursor->mData.append(a: temp); |
209 | if (tryReadLine(cursor, atEnd: false)) |
210 | return; |
211 | } else { |
212 | mDevice->waitForReadyRead(msecs: 500); |
213 | } |
214 | } |
215 | |
216 | // We've reached the end of the stream. Find a newline from the buffer (or return what's left). |
217 | tryReadLine(cursor, atEnd: true); |
218 | return; |
219 | } |
220 | |
221 | /*! |
222 | Push a line onto the front of the line reader so it will be returned on the next call to readLine(). |
223 | If multiple lines are pushed onto a line reader, they are read back in first-in-last-out order |
224 | */ |
225 | void LineReader::pushLine(const QByteArray& line) |
226 | { |
227 | mPushedLines.push(t: line); |
228 | } |
229 | |
230 | /*! |
231 | How many bytes have been returned in the LByteArray in the lifetime of the LineReader. |
232 | */ |
233 | int LineReader::odometer() const |
234 | { |
235 | return mOdometer; |
236 | } |
237 | |
238 | /*! |
239 | Returns true if there are no more lines left for readLine() to return. It is possible for atEnd() |
240 | to return false and for there to be no more data left (eg. if there are trailing newlines at the |
241 | end of the input. In this case, readLine() will return an empty line. |
242 | */ |
243 | bool LineReader::atEnd() const |
244 | { |
245 | return mPushedLines.isEmpty() && mDevice->atEnd() && mBuffer.mEnd == mBuffer.mData.size(); |
246 | } |
247 | |
248 | /*! |
249 | Returns the codec that the LineReader reads with. |
250 | */ |
251 | QTextCodec* LineReader::codec() const |
252 | { |
253 | return mCodec; |
254 | } |
255 | |
256 | /*! |
257 | Returns true if the line reader has been told for sure what the codec is, or if a byte-order-mark |
258 | has told us for sure what the codec is. |
259 | */ |
260 | bool LineReader::isCodecCertain() const |
261 | { |
262 | return mIsCodecCertain; |
263 | } |
264 | |
265 | /*! Valid if isCodecCertain(), false iff we've seen an invalid utf8 sequence */ |
266 | bool LineReader::isCodecUtf8Compatible() const { |
267 | return mIsCodecUtf8Compatible; |
268 | } |
269 | |
270 | void LineReader::setCodecUtf8Incompatible() { |
271 | mIsCodecUtf8Compatible = false; |
272 | } |
273 | |
274 | /*! |
275 | * Get the next line of input from the device to parse. Also performs unfolding by removing |
276 | * sequences of newline-space from the retrieved line. Skips over any newlines at the start of the |
277 | * input. |
278 | * |
279 | * \a cursor is filled with a the line |
280 | * \a atEnd is true if we've reached the end of the stream |
281 | * Returns true if a line was completely read (ie. a newline character was found) |
282 | */ |
283 | bool LineReader::tryReadLine(LByteArray *cursor, bool atEnd) |
284 | { |
285 | int crlfPos = -1; |
286 | int doubleCrLfCheck = -1; |
287 | QByteArray space(VersitUtils::encode(ch: ' ', codec: mCodec)); |
288 | QByteArray tab(VersitUtils::encode(ch: '\t', codec: mCodec)); |
289 | QByteArray equals(VersitUtils::encode(ch: '=', codec: mCodec)); |
290 | |
291 | int spaceLength = space.length(); |
292 | int equalsLength = equals.length(); |
293 | |
294 | forever { |
295 | foreach(const QByteArrayMatcher& crlf, mCrlfList) { |
296 | int crlfLength = crlf.pattern().length(); |
297 | crlfPos = crlf.indexIn(ba: cursor->mData, from: mSearchFrom); |
298 | doubleCrLfCheck = crlf.indexIn(ba: cursor->mData, from: mSearchFrom + crlfLength); |
299 | if ((crlfPos == cursor->mStart) && (doubleCrLfCheck != crlfPos + crlfLength)) { |
300 | // Single Newline at start of line. Ignore and Set mStart to directly after it. |
301 | cursor->mStart += crlfLength; |
302 | mSearchFrom = cursor->mStart; |
303 | break; |
304 | } else if ((crlfPos == cursor->mStart) && (doubleCrLfCheck == crlfPos + crlfLength)) { |
305 | // Found '=CrLfCrLf' - We choose to see this as badly formed, |
306 | // but clear end of the versit property. |
307 | cursor->mData.remove(index: crlfPos, len: crlfLength); |
308 | cursor->mEnd = crlfPos; |
309 | if (QVersitReaderPrivate::containsAt(text: cursor->mData, match: equals, index: crlfPos - equalsLength) ) { |
310 | cursor->mData.remove(index: crlfPos -1, len: 1); |
311 | } |
312 | return true; |
313 | } else if (crlfPos > cursor->mStart) { |
314 | // Found the first occurance of CRLF in the current buffer. |
315 | if (QVersitReaderPrivate::containsAt(text: cursor->mData, match: space, index: crlfPos + crlfLength) |
316 | || QVersitReaderPrivate::containsAt(text: cursor->mData, match: tab, index: crlfPos + crlfLength)) { |
317 | // If it's followed by whitespace, collapse it. |
318 | cursor->mData.remove(index: crlfPos, len: crlfLength + spaceLength); |
319 | mSearchFrom = crlfPos; |
320 | break; |
321 | } else if (!atEnd && crlfPos + crlfLength + spaceLength >= cursor->mData.size()) { |
322 | // If our CRLF is at the end of the current buffer but there's more to read, |
323 | // it's possible that a space could be hiding on the next read from the device. |
324 | // Just pretend we didn't see the CRLF and pick it up the next time round. |
325 | mSearchFrom = crlfPos; |
326 | return false; |
327 | } else { |
328 | // Found the CRLF. |
329 | // Hack: if malformed vCard files (having no \r\n or \r\n\r\n ending) are |
330 | // concatenated, we can get a malformed line in the document which looks like: |
331 | // END:VCARDBEGIN:VCARD |
332 | // In that situation, we should actually insert the \r\n sequence manually, |
333 | // and return mEnd after the END:VCARD\r\n position. |
334 | QByteArray cr(VersitUtils::encode(ch: '\r', codec: mCodec)); |
335 | QByteArray lf(VersitUtils::encode(ch: '\n', codec: mCodec)); |
336 | QByteArray ev(VersitUtils::encode(ba: QByteArray("END:VCARD" ), codec: mCodec)); |
337 | QByteArray evbv(VersitUtils::encode(ba: QByteArray("END:VCARDBEGIN:VCARD" ), codec: mCodec)); |
338 | QByteArray evcrlf(VersitUtils::encode(ba: QByteArray("END:VCARD\r\n" ), codec: mCodec)); |
339 | int crSz = cr.size(); |
340 | int lfSz = lf.size(); |
341 | int evSz = ev.size(); |
342 | int evcrlfSz = evcrlf.size(); |
343 | |
344 | QByteArray possiblyMalformedLine = cursor->mData.mid(index: cursor->mStart, len: crlfPos-cursor->mStart); |
345 | int pmlEnd = possiblyMalformedLine.size() - 1; |
346 | while (true) { |
347 | if (QVersitReaderPrivate::containsAt(text: possiblyMalformedLine, match: cr, index: pmlEnd - crSz)) { |
348 | possiblyMalformedLine.chop(n: crSz); |
349 | } else if (QVersitReaderPrivate::containsAt(text: possiblyMalformedLine, match: lf, index: pmlEnd - lfSz)) { |
350 | possiblyMalformedLine.chop(n: lfSz); |
351 | } else { |
352 | break; |
353 | } |
354 | } |
355 | if (possiblyMalformedLine == evbv) { |
356 | // fix up the malformed line, return the end cursor after it. |
357 | cursor->mData.replace(index: cursor->mStart, len: evSz, s: evcrlf); |
358 | cursor->mEnd = cursor->mStart+evcrlfSz; |
359 | return true; |
360 | } else { |
361 | // A well-formed line. |
362 | cursor->mEnd = crlfPos; |
363 | return true; |
364 | } |
365 | } |
366 | } |
367 | } |
368 | if (crlfPos == -1) { |
369 | // No CRLF found. |
370 | cursor->mEnd = cursor->mData.size(); |
371 | // Next time, continue searching from here. |
372 | // The largest CRLF will have a size of 8 bytes, so we should backtrack 8 bytes |
373 | mSearchFrom = qMax(a: mSearchFrom, b: cursor->mEnd-8); |
374 | return false; |
375 | } |
376 | } |
377 | } |
378 | |
379 | /*! Links the signals from this to the signals of \a reader. */ |
380 | void QVersitReaderPrivate::init(QVersitReader* reader) |
381 | { |
382 | qRegisterMetaType<QVersitReader::State>(typeName: "QVersitReader::State" ); |
383 | connect(sender: this, SIGNAL(stateChanged(QVersitReader::State)), |
384 | receiver: reader, SIGNAL(stateChanged(QVersitReader::State)),Qt::DirectConnection); |
385 | connect(sender: this, SIGNAL(resultsAvailable()), |
386 | receiver: reader, SIGNAL(resultsAvailable()), Qt::DirectConnection); |
387 | } |
388 | |
389 | /*! Construct a reader. */ |
390 | QVersitReaderPrivate::QVersitReaderPrivate() |
391 | : mIoDevice(0), |
392 | mDocumentNestingLevel(0), |
393 | mDefaultCodec(0), |
394 | mState(QVersitReader::InactiveState), |
395 | mError(QVersitReader::NoError), |
396 | mIsCanceling(false) |
397 | { |
398 | } |
399 | |
400 | /*! Destroy a reader. */ |
401 | QVersitReaderPrivate::~QVersitReaderPrivate() |
402 | { |
403 | } |
404 | |
405 | QHash<QPair<QVersitDocument::VersitType,QString>, QVersitProperty::ValueType>* |
406 | QVersitReaderPrivate::valueTypeMap() { |
407 | if (mValueTypeMap == 0) { |
408 | mValueTypeMap = new QHash<QPair<QVersitDocument::VersitType,QString>, QVersitProperty::ValueType>(); |
409 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "AGENT" )), |
410 | avalue: QVersitProperty::VersitDocumentType); |
411 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "AGENT" )), |
412 | avalue: QVersitProperty::VersitDocumentType); |
413 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "AGENT" )), |
414 | avalue: QVersitProperty::VersitDocumentType); |
415 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "N" )), |
416 | avalue: QVersitProperty::CompoundType); |
417 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "N" )), |
418 | avalue: QVersitProperty::CompoundType); |
419 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "N" )), |
420 | avalue: QVersitProperty::CompoundType); |
421 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "ADR" )), |
422 | avalue: QVersitProperty::CompoundType); |
423 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "ADR" )), |
424 | avalue: QVersitProperty::CompoundType); |
425 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "ADR" )), |
426 | avalue: QVersitProperty::CompoundType); |
427 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "GEO" )), |
428 | avalue: QVersitProperty::CompoundType); |
429 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "GEO" )), |
430 | avalue: QVersitProperty::CompoundType); |
431 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "GEO" )), |
432 | avalue: QVersitProperty::CompoundType); |
433 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "ORG" )), |
434 | avalue: QVersitProperty::CompoundType); |
435 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "ORG" )), |
436 | avalue: QVersitProperty::CompoundType); |
437 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "ORG" )), |
438 | avalue: QVersitProperty::CompoundType); |
439 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "NICKNAME" )), |
440 | avalue: QVersitProperty::ListType); |
441 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "NICKNAME" )), |
442 | avalue: QVersitProperty::ListType); |
443 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "NICKNAME" )), |
444 | avalue: QVersitProperty::ListType); |
445 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "CATEGORIES" )), |
446 | avalue: QVersitProperty::ListType); |
447 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "CATEGORIES" )), |
448 | avalue: QVersitProperty::ListType); |
449 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "CATEGORIES" )), |
450 | avalue: QVersitProperty::ListType); |
451 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "X-CHILDREN" )), |
452 | avalue: QVersitProperty::ListType); |
453 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "X-CHILDREN" )), |
454 | avalue: QVersitProperty::ListType); |
455 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "X-CHILDREN" )), |
456 | avalue: QVersitProperty::ListType); |
457 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "X-NICKNAME" )), |
458 | avalue: QVersitProperty::ListType); |
459 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "X-NICKNAME" )), |
460 | avalue: QVersitProperty::ListType); |
461 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "X-NICKNAME" )), |
462 | avalue: QVersitProperty::ListType); |
463 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "X-QTPROJECT-EXTENDED-DETAIL" )), |
464 | avalue: QVersitProperty::CompoundType); |
465 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "X-QTPROJECT-EXTENDED-DETAIL" )), |
466 | avalue: QVersitProperty::CompoundType); |
467 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "X-QTPROJECT-EXTENDED-DETAIL" )), |
468 | avalue: QVersitProperty::CompoundType); |
469 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::ICalendar20Type, y: QString::fromLatin1(str: "X-QTPROJECT-EXTENDED-DETAIL" )), |
470 | avalue: QVersitProperty::CompoundType); |
471 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "X-QTPROJECT-FAVORITE" )), |
472 | avalue: QVersitProperty::CompoundType); |
473 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "X-QTPROJECT-FAVORITE" )), |
474 | avalue: QVersitProperty::CompoundType); |
475 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "X-QTPROJECT-FAVORITE" )), |
476 | avalue: QVersitProperty::CompoundType); |
477 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "X-QTPROJECT-VERSION" )), |
478 | avalue: QVersitProperty::CompoundType); |
479 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "X-QTPROJECT-VERSION" )), |
480 | avalue: QVersitProperty::CompoundType); |
481 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "X-QTPROJECT-VERSION" )), |
482 | avalue: QVersitProperty::CompoundType); |
483 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::ICalendar20Type, y: QString::fromLatin1(str: "X-QTPROJECT-VERSION" )), |
484 | avalue: QVersitProperty::CompoundType); |
485 | |
486 | // Some MeeGo specific types, for EDS/SyncEvolution roundtripping until the API allows |
487 | // better control over the type of custom properties. |
488 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "X-EDS-QTCONTACTS" )), |
489 | avalue: QVersitProperty::CompoundType); |
490 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "X-EDS-QTCONTACTS" )), |
491 | avalue: QVersitProperty::CompoundType); |
492 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "X-EDS-QTCONTACTS" )), |
493 | avalue: QVersitProperty::CompoundType); |
494 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard21Type, y: QString::fromLatin1(str: "X-SYNCEVO-QTCONTACTS" )), |
495 | avalue: QVersitProperty::CompoundType); |
496 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard30Type, y: QString::fromLatin1(str: "X-SYNCEVO-QTCONTACTS" )), |
497 | avalue: QVersitProperty::CompoundType); |
498 | mValueTypeMap->insert(akey: qMakePair(x: QVersitDocument::VCard40Type, y: QString::fromLatin1(str: "X-SYNCEVO-QTCONTACTS" )), |
499 | avalue: QVersitProperty::CompoundType); |
500 | |
501 | } |
502 | return mValueTypeMap; |
503 | } |
504 | |
505 | /*! |
506 | * Inherited from QThread, called by QThread when the thread has been started. |
507 | */ |
508 | void QVersitReaderPrivate::run() |
509 | { |
510 | read(); |
511 | } |
512 | |
513 | /*! |
514 | * Does the actual reading and sets the error and state as appropriate. |
515 | * If \a async, then stateChanged() signals are emitted as the reading happens. |
516 | */ |
517 | void QVersitReaderPrivate::read() |
518 | { |
519 | mMutex.lock(); |
520 | mVersitDocuments.clear(); |
521 | mMutex.unlock(); |
522 | bool canceled = false; |
523 | |
524 | LineReader lineReader(mIoDevice, mDefaultCodec); |
525 | while(!lineReader.atEnd()) { |
526 | if (isCanceling()) { |
527 | canceled = true; |
528 | break; |
529 | } |
530 | QVersitDocument document; |
531 | int oldPos = lineReader.odometer(); |
532 | bool ok = parseVersitDocument(lineReader: &lineReader, document: &document); |
533 | |
534 | if (ok) { |
535 | if (document.isEmpty()) |
536 | break; |
537 | else { |
538 | QMutexLocker locker(&mMutex); |
539 | mVersitDocuments.append(t: document); |
540 | emit resultsAvailable(); |
541 | } |
542 | } else { |
543 | setError(QVersitReader::ParseError); |
544 | if (lineReader.odometer() == oldPos) |
545 | break; |
546 | } |
547 | }; |
548 | if (canceled) |
549 | setState(QVersitReader::CanceledState); |
550 | else |
551 | setState(QVersitReader::FinishedState); |
552 | } |
553 | |
554 | void QVersitReaderPrivate::setState(QVersitReader::State state) |
555 | { |
556 | mMutex.lock(); |
557 | mState = state; |
558 | mMutex.unlock(); |
559 | emit stateChanged(state); |
560 | } |
561 | |
562 | QVersitReader::State QVersitReaderPrivate::state() const |
563 | { |
564 | QMutexLocker locker(&mMutex); |
565 | return mState; |
566 | } |
567 | |
568 | void QVersitReaderPrivate::setError(QVersitReader::Error error) |
569 | { |
570 | QMutexLocker locker(&mMutex); |
571 | mError = error; |
572 | } |
573 | |
574 | QVersitReader::Error QVersitReaderPrivate::error() const |
575 | { |
576 | QMutexLocker locker(&mMutex); |
577 | return mError; |
578 | } |
579 | |
580 | void QVersitReaderPrivate::setCanceling(bool canceling) |
581 | { |
582 | QMutexLocker locker(&mMutex); |
583 | mIsCanceling = canceling; |
584 | } |
585 | |
586 | bool QVersitReaderPrivate::isCanceling() |
587 | { |
588 | QMutexLocker locker(&mMutex); |
589 | return mIsCanceling; |
590 | } |
591 | |
592 | /*! |
593 | * Parses a versit document. Returns true if the parsing was successful. |
594 | */ |
595 | bool QVersitReaderPrivate::parseVersitDocument(LineReader* lineReader, QVersitDocument* document) |
596 | { |
597 | if (mDocumentNestingLevel >= MAX_VERSIT_DOCUMENT_NESTING_DEPTH) |
598 | return false; // To prevent infinite recursion |
599 | |
600 | // If we don't know what type it is, just assume it's a vCard 3.0 |
601 | if (document->type() == QVersitDocument::InvalidType) |
602 | document->setType(QVersitDocument::VCard30Type); |
603 | |
604 | QVersitProperty property; |
605 | |
606 | property = parseNextVersitProperty(versitType: document->type(), lineReader); |
607 | QString propertyValue = property.value().trimmed().toUpper(); |
608 | if (property.isEmpty()) { |
609 | // A blank document (or end of file) was found. |
610 | document->clear(); |
611 | return true; |
612 | } else if (property.name() == QStringLiteral("BEGIN" )) { |
613 | if (propertyValue == QStringLiteral("VCARD" )) { |
614 | document->setComponentType(propertyValue); |
615 | } else if (propertyValue == QStringLiteral("VCALENDAR" )) { |
616 | document->setType(QVersitDocument::ICalendar20Type); |
617 | document->setComponentType(propertyValue); |
618 | } else { |
619 | // Unknown document type |
620 | document->clear(); |
621 | return false; |
622 | } |
623 | } else { |
624 | // Some property other than BEGIN was found. |
625 | document->clear(); |
626 | return false; |
627 | } |
628 | |
629 | return parseVersitDocumentBody(lineReader, document); |
630 | } |
631 | |
632 | /*! Parse the rest of a versit document after finding a BEGIN line */ |
633 | bool QVersitReaderPrivate::parseVersitDocumentBody(LineReader* lineReader, QVersitDocument* document) |
634 | { |
635 | mDocumentNestingLevel++; |
636 | bool parsingOk = true; |
637 | while (true) { |
638 | /* Grab it */ |
639 | QVersitProperty property = parseNextVersitProperty(versitType: document->type(), lineReader); |
640 | |
641 | if (property.name() == QStringLiteral("BEGIN" )) { |
642 | // Nested Versit document |
643 | QVersitDocument subDocument; |
644 | subDocument.setType(document->type()); // the nested document inherits the parent's type |
645 | subDocument.setComponentType(property.value().trimmed().toUpper()); |
646 | if (!parseVersitDocumentBody(lineReader, document: &subDocument)) |
647 | break; |
648 | document->addSubDocument(subdocument: subDocument); |
649 | } else if (property.name() == QStringLiteral("VERSION" )) { |
650 | // A version property |
651 | if (!setVersionFromProperty(document, property)) { |
652 | parsingOk = false; |
653 | break; |
654 | } |
655 | } else if (property.name() == QStringLiteral("END" )) { |
656 | // End of document |
657 | break; |
658 | } else if (property.name().isEmpty()) { |
659 | // End of input or some other error |
660 | parsingOk = false; |
661 | break; |
662 | } else { |
663 | // A normal property - just add it. |
664 | document->addProperty(property); |
665 | } |
666 | } |
667 | if (!parsingOk) |
668 | document->clear(); |
669 | mDocumentNestingLevel--; |
670 | |
671 | return parsingOk; |
672 | } |
673 | |
674 | /*! |
675 | * Parses a versit document and returns whether parsing succeeded. |
676 | */ |
677 | QVersitProperty QVersitReaderPrivate::parseNextVersitProperty( |
678 | QVersitDocument::VersitType versitType, |
679 | LineReader* lineReader) |
680 | { |
681 | LByteArray line = lineReader->readLine(); |
682 | if (line.isEmpty()) |
683 | return QVersitProperty(); |
684 | |
685 | // Otherwise, do stuff. |
686 | QPair<QStringList,QString> groupsAndName = |
687 | extractPropertyGroupsAndName(line: &line, codec: lineReader->codec()); |
688 | |
689 | QVersitProperty property; |
690 | property.setGroups(groupsAndName.first); |
691 | property.setName(groupsAndName.second); |
692 | // set the propertyValueType |
693 | QPair<QVersitDocument::VersitType, QString> key = |
694 | qMakePair(x: versitType, y: property.name()); |
695 | if (valueTypeMap()->contains(akey: key)) |
696 | property.setValueType(valueTypeMap()->value(akey: key)); |
697 | |
698 | if (versitType == QVersitDocument::VCard21Type) |
699 | parseVCard21Property(text: &line, property: &property, lineReader); |
700 | else if (versitType == QVersitDocument::VCard30Type |
701 | || versitType == QVersitDocument::VCard40Type |
702 | || versitType == QVersitDocument::ICalendar20Type) |
703 | parseVCard30Property(versitType, text: &line, property: &property, lineReader); |
704 | |
705 | return property; |
706 | } |
707 | |
708 | /*! |
709 | * Parses the property according to vCard 2.1 syntax. |
710 | */ |
711 | void QVersitReaderPrivate::parseVCard21Property(LByteArray* line, QVersitProperty* property, |
712 | LineReader* lineReader) |
713 | { |
714 | property->setParameters(extractVCard21PropertyParams(line, codec: lineReader->codec())); |
715 | |
716 | QByteArray value = line->toByteArray(); |
717 | if (property->valueType() == QVersitProperty::VersitDocumentType) { |
718 | // Hack to handle cases where start of document is on the same or next line as "AGENT:" |
719 | if (value == "BEGIN:VCARD" ) { |
720 | lineReader->pushLine(line: value); |
721 | } else if (value.isEmpty()) { |
722 | } else { |
723 | property->clear(); |
724 | return; |
725 | } |
726 | QVersitDocument subDocument(QVersitDocument::VCard21Type); |
727 | if (!parseVersitDocument(lineReader, document: &subDocument)) { |
728 | property->clear(); |
729 | } else { |
730 | property->setValue(QVariant::fromValue(value: subDocument)); |
731 | } |
732 | } else { |
733 | bool isBinary = unencode(value: &value, property, lineReader); |
734 | if (isBinary) { |
735 | property->setValue(value); |
736 | property->setValueType(QVersitProperty::BinaryType); |
737 | } |
738 | else { |
739 | QTextCodec* ignored = 0; |
740 | property->setValue(decodeCharset(value, property, lineReader, codec: &ignored)); |
741 | splitStructuredValue(property, hasEscapedBackslashes: false); |
742 | } |
743 | } |
744 | } |
745 | |
746 | /*! |
747 | * Parses the property according to vCard 3.0 syntax. This function is called for both vCard 3.0 |
748 | * and iCalendar properties. |
749 | */ |
750 | void QVersitReaderPrivate::parseVCard30Property(QVersitDocument::VersitType versitType, |
751 | LByteArray* line, QVersitProperty* property, |
752 | LineReader* lineReader) |
753 | { |
754 | property->setParameters(extractVCard30PropertyParams(line, codec: lineReader->codec())); |
755 | |
756 | QByteArray value = line->toByteArray(); |
757 | |
758 | |
759 | if (property->valueType() == QVersitProperty::VersitDocumentType) { |
760 | QTextCodec* codec; |
761 | QString valueString(decodeCharset(value, property, lineReader, codec: &codec)); |
762 | removeBackSlashEscaping(text: &valueString); |
763 | // Make a line reader from the value of the property. |
764 | QByteArray subDocumentValue(codec->fromUnicode(uc: valueString)); |
765 | QBuffer subDocumentData(&subDocumentValue); |
766 | subDocumentData.open(openMode: QIODevice::ReadOnly); |
767 | subDocumentData.seek(off: 0); |
768 | LineReader subDocumentLineReader(&subDocumentData, codec); |
769 | |
770 | // Recursive call! |
771 | QVersitDocument subDocument(versitType); |
772 | if (!parseVersitDocument(lineReader: &subDocumentLineReader, document: &subDocument)) { |
773 | property->clear(); |
774 | } else { |
775 | property->setValue(QVariant::fromValue(value: subDocument)); |
776 | } |
777 | } else { |
778 | bool isBinary = unencode(value: &value, property, lineReader); |
779 | if (isBinary) { |
780 | property->setValue(value); |
781 | property->setValueType(QVersitProperty::BinaryType); |
782 | } else { |
783 | QTextCodec* ignored = 0; |
784 | property->setValue(decodeCharset(value, property, lineReader, codec: &ignored)); |
785 | bool isList = splitStructuredValue(property, hasEscapedBackslashes: true); |
786 | // Do backslash unescaping |
787 | if (isList) { |
788 | QStringList list = property->value<QStringList>(); |
789 | for (int i = 0; i < list.length(); i++) { |
790 | removeBackSlashEscaping(text: &list[i]); |
791 | } |
792 | property->setValue(list); |
793 | } else { |
794 | QString value = property->value(); |
795 | removeBackSlashEscaping(text: &value); |
796 | property->setValue(value); |
797 | } |
798 | } |
799 | } |
800 | } |
801 | |
802 | /*! |
803 | * Sets version to \a document if \a property contains a supported version. |
804 | */ |
805 | bool QVersitReaderPrivate::setVersionFromProperty(QVersitDocument* document, const QVersitProperty& property) const |
806 | { |
807 | QString value = property.value().trimmed(); |
808 | if (document->componentType() == QStringLiteral("VCARD" ) |
809 | && value == QStringLiteral("2.1" )) { |
810 | document->setType(QVersitDocument::VCard21Type); |
811 | } else if (document->componentType() == QStringLiteral("VCARD" ) |
812 | && value == QStringLiteral("3.0" )) { |
813 | document->setType(QVersitDocument::VCard30Type); |
814 | } else if (document->componentType() == QStringLiteral("VCARD" ) |
815 | && value == QStringLiteral("4.0" )) { |
816 | document->setType(QVersitDocument::VCard40Type); |
817 | } else if ((document->componentType() == QStringLiteral("VCALENDAR" ) |
818 | || document->type() == QVersitDocument::ICalendar20Type) // covers VEVENT, etc. when nested inside a VCALENDAR |
819 | && value == QStringLiteral("2.0" )) { |
820 | document->setType(QVersitDocument::ICalendar20Type); |
821 | } else { |
822 | return false; |
823 | } |
824 | return true; |
825 | } |
826 | |
827 | /*! |
828 | * On entry, \a value should be the byte array to unencode. It is modified to be the unencoded |
829 | * version. Returns true if and only if the value was base-64 encoded. (This is used as a |
830 | * heuristic later to decide whether to decode the byte array as text) |
831 | * \a lineReader is supplied in case more lines need to be read (for quoted-printable). The |
832 | * \a property is supplied so we know what kind of encoding was used. |
833 | */ |
834 | bool QVersitReaderPrivate::unencode(QByteArray* value, |
835 | QVersitProperty* property, |
836 | LineReader* lineReader) const |
837 | { |
838 | QStringList encodingParameters = property->parameters().values(QStringLiteral("ENCODING" )); |
839 | QStringList typeParameters = property->parameters().values(QStringLiteral("TYPE" )); |
840 | if (encodingParameters.contains(QStringLiteral("QUOTED-PRINTABLE" ), cs: Qt::CaseInsensitive)) { |
841 | // At this point, we need to accumulate bytes until we hit a real line break (no = before |
842 | // it) value already contains everything up to the character before the newline |
843 | while (value->endsWith(c: '=')) { |
844 | value->chop(n: 1); // Get rid of '=' |
845 | // We add each line (minus the escaped = and newline chars) |
846 | value->append(a: lineReader->readLine().toByteArray()); |
847 | } |
848 | decodeQuotedPrintable(text: value); |
849 | // Remove the encoding parameter as the value is now decoded |
850 | property->removeParameters(QStringLiteral("ENCODING" )); |
851 | return false; |
852 | } else if (encodingParameters.contains(QStringLiteral("BASE64" ), cs: Qt::CaseInsensitive) |
853 | || encodingParameters.contains(QStringLiteral("B" ), cs: Qt::CaseInsensitive) |
854 | || typeParameters.contains(QStringLiteral("BASE64" ), cs: Qt::CaseInsensitive) |
855 | || typeParameters.contains(QStringLiteral("B" ), cs: Qt::CaseInsensitive)) { |
856 | *value = QByteArray::fromBase64(base64: *value); |
857 | // Remove the encoding parameter as the value is now decoded |
858 | property->removeParameters(QStringLiteral("ENCODING" )); |
859 | return true; |
860 | } |
861 | return false; |
862 | } |
863 | |
864 | /*! |
865 | * Decodes \a value, after working out what charset it is in using the context of \a property and |
866 | * returns it. The codec used to decode is returned in \a codec. If the CHARSET parameter was |
867 | * specified, *charsetSpecified is set to true (else, false). |
868 | */ |
869 | QString QVersitReaderPrivate::decodeCharset(const QByteArray& value, |
870 | QVersitProperty* property, |
871 | LineReader* lineReader, |
872 | QTextCodec** codec) const |
873 | { |
874 | static const QString charset(QStringLiteral("CHARSET" )); |
875 | |
876 | *codec = NULL; |
877 | if (property->parameters().contains(akey: charset)) { |
878 | QString charsetValue = *property->parameters().find(akey: charset); |
879 | property->removeParameters(name: charset); |
880 | *codec = QTextCodec::codecForName(name: charsetValue.toLatin1()); |
881 | } else if (!lineReader->isCodecCertain() |
882 | && lineReader->isCodecUtf8Compatible()) { |
883 | // Guess the codec because we don't know for sure what it is and it could possibly be |
884 | // either UTF-8 or an 8-bit codec. |
885 | if (VersitUtils::isValidUtf8(bytes: value)) { |
886 | // Valid UTF-8 |
887 | *codec = QTextCodec::codecForName(name: "UTF-8" ); |
888 | } else { |
889 | // Invalid UTF-8 - don't try to test future properties for UTF-8-compatibility |
890 | lineReader->setCodecUtf8Incompatible(); |
891 | } |
892 | } |
893 | |
894 | if (*codec == NULL) |
895 | *codec = lineReader->codec(); |
896 | |
897 | return (*codec)->toUnicode(value); |
898 | } |
899 | |
900 | /*! |
901 | * Decodes Quoted-Printable encoded (RFC 1521) characters in /a text. |
902 | */ |
903 | void QVersitReaderPrivate::decodeQuotedPrintable(QByteArray* text) const |
904 | { |
905 | for (int i=0; i < text->length(); i++) { |
906 | char current = text->at(i); |
907 | if (current == '=' && i+2 < text->length()) { |
908 | char next = text->at(i: i+1); |
909 | char nextAfterNext = text->at(i: i+2); |
910 | if (((next >= 'a' && next <= 'f') || |
911 | (next >= 'A' && next <= 'F') || |
912 | (next >= '0' && next <= '9')) && |
913 | ((nextAfterNext >= 'a' && nextAfterNext <= 'f') || |
914 | (nextAfterNext >= 'A' && nextAfterNext <= 'F') || |
915 | (nextAfterNext >= '0' && nextAfterNext <= '9'))) { |
916 | bool ok; |
917 | char decodedChar(text->mid(index: i+1, len: 2).toInt(ok: &ok,base: 16)); |
918 | if (ok) { |
919 | (*text)[i] = decodedChar; |
920 | text->remove(index: i+1, len: 2); |
921 | } |
922 | } else if (next == '\r' && nextAfterNext == '\n') { |
923 | // Newlines can still be found here if they are encoded in a non-default charset. |
924 | text->remove(index: i, len: 3); |
925 | } |
926 | } |
927 | } |
928 | } |
929 | |
930 | /*! |
931 | * Extracts the groups and the name of the property using \a codec to determine the delimiters |
932 | * |
933 | * On entry, \a line should contain a whole line |
934 | * On exit, \a line will be updated to remove the groups and name |
935 | */ |
936 | QPair<QStringList,QString>QVersitReaderPrivate::extractPropertyGroupsAndName( |
937 | LByteArray* line, QTextCodec *codec) const |
938 | { |
939 | const QByteArray semicolon = VersitUtils::encode(ch: ';', codec); |
940 | const QByteArray colon = VersitUtils::encode(ch: ':', codec); |
941 | const QByteArray backslash = VersitUtils::encode(ch: '\\', codec); |
942 | QPair<QStringList,QString> groupsAndName; |
943 | int length = 0; |
944 | |
945 | int separatorLength = semicolon.length(); |
946 | for (int i = 0; i < line->size() - separatorLength + 1; i++) { |
947 | if ((containsAt(text: *line, match: semicolon, index: i) && !containsAt(text: *line, match: backslash, index: i-separatorLength)) |
948 | || containsAt(text: *line, match: colon, index: i)) { |
949 | length = i; |
950 | break; |
951 | } |
952 | } |
953 | if (length > 0) { |
954 | QString trimmedGroupsAndName = codec->toUnicode(line->left(n: length)).trimmed(); |
955 | QStringList parts = trimmedGroupsAndName.split(sep: QLatin1Char('.')); |
956 | if (parts.count() > 1) { |
957 | groupsAndName.second = parts.takeLast(); |
958 | groupsAndName.first = parts; |
959 | } else { |
960 | groupsAndName.second = trimmedGroupsAndName; |
961 | } |
962 | line->chopLeft(n: length); |
963 | } |
964 | |
965 | return groupsAndName; |
966 | } |
967 | |
968 | /*! |
969 | * Extracts the property parameters as a QMultiHash using \a codec to determine the delimiters. |
970 | * The parameters without names are added as "TYPE" parameters. |
971 | * |
972 | * On entry \a line should contain the line sans the group and name |
973 | * On exit, line will be updated to have the parameters removed. |
974 | */ |
975 | QMultiHash<QString,QString> QVersitReaderPrivate::( |
976 | LByteArray* line, QTextCodec *codec) const |
977 | { |
978 | QMultiHash<QString,QString> result; |
979 | QList<QByteArray> paramList = extractParams(line, codec); |
980 | while (!paramList.isEmpty()) { |
981 | QByteArray param = paramList.takeLast(); |
982 | QString name = paramName(parameter: param, codec); |
983 | QString value = paramValue(parameter: param, codec); |
984 | result.insert(akey: name,avalue: value); |
985 | } |
986 | |
987 | return result; |
988 | } |
989 | |
990 | /*! |
991 | * Extracts the property parameters as a QMultiHash using \a codec to determine the delimiters. |
992 | * The parameters without names are added as "TYPE" parameters. |
993 | * |
994 | * On entry \a line should contain the line sans the group and name |
995 | * On exit, line will be updated to have the parameters removed. |
996 | */ |
997 | QMultiHash<QString,QString> QVersitReaderPrivate::( |
998 | LByteArray* line, QTextCodec *codec) const |
999 | { |
1000 | QMultiHash<QString,QString> result; |
1001 | QList<QByteArray> paramList = extractParams(line, codec); |
1002 | while (!paramList.isEmpty()) { |
1003 | QByteArray param = paramList.takeLast(); |
1004 | QString name(paramName(parameter: param, codec)); |
1005 | removeBackSlashEscaping(text: &name); |
1006 | QString values = paramValue(parameter: param, codec); |
1007 | QStringList valueList = splitValue(string: values, sep: QLatin1Char(','), behaviour: QString::SkipEmptyParts, hasEscapedBackslashes: true); |
1008 | foreach (QString value, valueList) { |
1009 | removeBackSlashEscaping(text: &value); |
1010 | result.insert(akey: name, avalue: value); |
1011 | } |
1012 | } |
1013 | return result; |
1014 | } |
1015 | |
1016 | |
1017 | /*! |
1018 | * Extracts the parameters as delimited by semicolons using \a codec to determine the delimiters. |
1019 | * |
1020 | * On entry \a line should contain the content line sans the group and name |
1021 | * On exit, \a line will be updated to only have the value remain |
1022 | */ |
1023 | QList<QByteArray> QVersitReaderPrivate::(LByteArray* line, QTextCodec *codec) const |
1024 | { |
1025 | const QByteArray colon = VersitUtils::encode(ch: ':', codec); |
1026 | const QByteArray semicolon = VersitUtils::encode(ch: ';', codec); |
1027 | QList<QByteArray> params; |
1028 | |
1029 | /* find the end of the name¶ms */ |
1030 | int colonIndex = line->indexOf(needle: colon); |
1031 | if (colonIndex > 0) { |
1032 | QByteArray nameAndParamsString = line->left(n: colonIndex); |
1033 | params = extractParts(text: nameAndParamsString, separator: semicolon, codec); |
1034 | |
1035 | /* Update line */ |
1036 | line->chopLeft(n: colonIndex + colon.length()); |
1037 | } else if (colonIndex == 0) { |
1038 | // No parameters.. advance past it |
1039 | line->chopLeft(n: colon.length()); |
1040 | } |
1041 | |
1042 | return params; |
1043 | } |
1044 | |
1045 | /*! |
1046 | * Extracts the parts separated by separator discarding the separators escaped with a backslash |
1047 | * encoded with \a codec |
1048 | */ |
1049 | QList<QByteArray> QVersitReaderPrivate::( |
1050 | const QByteArray& text, const QByteArray& separator, QTextCodec* codec) const |
1051 | { |
1052 | QList<QByteArray> parts; |
1053 | int partStartIndex = 0; |
1054 | int textLength = text.length(); |
1055 | int separatorLength = separator.length(); |
1056 | const QByteArray backslash = VersitUtils::encode(ch: '\\', codec); |
1057 | int backslashLength = backslash.length(); |
1058 | |
1059 | for (int i=0; i < textLength-separatorLength+1; i++) { |
1060 | if (containsAt(text, match: separator, index: i) |
1061 | && (i < backslashLength |
1062 | || !containsAt(text, match: backslash, index: i-backslashLength))) { |
1063 | int length = i-partStartIndex; |
1064 | QByteArray part = extractPart(text,startPosition: partStartIndex,length); |
1065 | if (part.length() > 0) |
1066 | parts.append(t: part); |
1067 | partStartIndex = i+separatorLength; |
1068 | } |
1069 | } |
1070 | |
1071 | // Add the last or only part |
1072 | QByteArray part = extractPart(text,startPosition: partStartIndex); |
1073 | if (part.length() > 0) |
1074 | parts.append(t: part); |
1075 | return parts; |
1076 | } |
1077 | |
1078 | /*! |
1079 | * Extracts a substring limited by /a startPosition and /a length. |
1080 | */ |
1081 | QByteArray QVersitReaderPrivate::( |
1082 | const QByteArray& text, int startPosition, int length) const |
1083 | { |
1084 | QByteArray part; |
1085 | if (startPosition >= 0) |
1086 | part = text.mid(index: startPosition,len: length).trimmed(); |
1087 | return part; |
1088 | } |
1089 | |
1090 | /*! |
1091 | * Extracts the name of the parameter using \a codec to determine the delimiters. |
1092 | * No name is interpreted as an implicit "TYPE". |
1093 | */ |
1094 | QString QVersitReaderPrivate::paramName(const QByteArray& parameter, QTextCodec* codec) const |
1095 | { |
1096 | if (parameter.trimmed().length() == 0) |
1097 | return QString(); |
1098 | const QByteArray equals = VersitUtils::encode(ch: '=', codec); |
1099 | int equalsIndex = parameter.indexOf(a: equals); |
1100 | if (equalsIndex > 0) { |
1101 | return codec->toUnicode(parameter.left(len: equalsIndex)).trimmed(); |
1102 | } |
1103 | |
1104 | return QStringLiteral("TYPE" ); |
1105 | } |
1106 | |
1107 | /*! |
1108 | * Extracts the value of the parameter using \a codec to determine the delimiters |
1109 | */ |
1110 | QString QVersitReaderPrivate::paramValue(const QByteArray& parameter, QTextCodec* codec) const |
1111 | { |
1112 | QByteArray value(parameter); |
1113 | const QByteArray equals = VersitUtils::encode(ch: '=', codec); |
1114 | int equalsIndex = parameter.indexOf(a: equals); |
1115 | if (equalsIndex > 0) { |
1116 | int valueLength = parameter.length() - (equalsIndex + equals.length()); |
1117 | value = parameter.right(len: valueLength).trimmed(); |
1118 | } |
1119 | |
1120 | return codec->toUnicode(value); |
1121 | } |
1122 | |
1123 | /* |
1124 | * Returns true if and only if \a text contains \a ba at \a index |
1125 | * |
1126 | * On entry, index must be >= 0 |
1127 | * |
1128 | * T is either a QByteArray or LByteArray |
1129 | */ |
1130 | template <class T> bool QVersitReaderPrivate::containsAt(const T& text, const QByteArray& match, int index) |
1131 | { |
1132 | int n = match.length(); |
1133 | // This check is necessary because constData doesn't ensure it's null terminated at the right place |
1134 | if (text.size() - index < n) |
1135 | return false; |
1136 | const char* textData = text.constData(); |
1137 | const char* matchData = match.constData(); |
1138 | return memcmp(s1: textData+index, s2: matchData, n: n) == 0; |
1139 | } |
1140 | |
1141 | /*! |
1142 | * If the \a type and the \a property's name is known to contain a structured value, \a property's |
1143 | * value is split according to the type of structuring (compound vs. list) it is known to have. |
1144 | * Returns true if and only if such a split happened (ie. the property value holds a QStringList on |
1145 | * exit). |
1146 | */ |
1147 | bool QVersitReaderPrivate::splitStructuredValue( |
1148 | QVersitProperty* property, |
1149 | bool hasEscapedBackslashes) const |
1150 | { |
1151 | QVariant variant = property->variantValue(); |
1152 | if (property->valueType() == QVersitProperty::CompoundType) { |
1153 | variant.setValue(splitValue(string: variant.toString(), sep: QLatin1Char(';'), |
1154 | behaviour: QString::KeepEmptyParts, hasEscapedBackslashes)); |
1155 | property->setValue(variant); |
1156 | return true; |
1157 | } else if (property->valueType() == QVersitProperty::ListType) { |
1158 | variant.setValue(splitValue(string: variant.toString(), sep: QLatin1Char(','), |
1159 | behaviour: QString::SkipEmptyParts, hasEscapedBackslashes)); |
1160 | property->setValue(variant); |
1161 | return true; |
1162 | } |
1163 | return false; |
1164 | } |
1165 | |
1166 | /*! |
1167 | * Splits the \a string into substrings wherever \a sep occurs. |
1168 | * If \a hasEscapedBackslashes is false, then a \a sep preceded by a backslash is not considered |
1169 | * a split point (but the backslash is removed). |
1170 | * If \a hasEscapedBackslashes is true, then a \a sep preceded by an odd number of backslashes is |
1171 | * not considered a split point (but one backslash is removed). |
1172 | */ |
1173 | QStringList QVersitReaderPrivate::splitValue(const QString& string, |
1174 | const QChar& sep, |
1175 | QString::SplitBehavior behaviour, |
1176 | bool hasEscapedBackslashes) |
1177 | { |
1178 | QStringList list; |
1179 | bool isEscaped = false; // is the current character escaped |
1180 | int segmentStartIndex = 0; |
1181 | QString segment; |
1182 | for (int i = 0; i < string.length(); i++) { |
1183 | if (string.at(i) == QLatin1Char('\\')) { |
1184 | if (hasEscapedBackslashes) |
1185 | isEscaped = !isEscaped; // two consecutive backslashes make isEscaped false |
1186 | else |
1187 | isEscaped = true; |
1188 | } else if (string.at(i) == sep) { |
1189 | if (isEscaped) { |
1190 | // we see an escaped separator - remove the backslash |
1191 | segment += string.midRef(position: segmentStartIndex, n: i-segmentStartIndex-1); |
1192 | segment += sep; |
1193 | } else { |
1194 | // we see a separator |
1195 | segment += string.midRef(position: segmentStartIndex, n: i - segmentStartIndex); |
1196 | if (behaviour == QString::KeepEmptyParts || !segment.isEmpty()) |
1197 | list.append(t: segment); |
1198 | segment.clear(); |
1199 | } |
1200 | segmentStartIndex = i+1; |
1201 | isEscaped = false; |
1202 | } else { // normal character - keep going |
1203 | isEscaped = false; |
1204 | } |
1205 | } |
1206 | // The rest of the string after the last sep. |
1207 | segment += string.midRef(position: segmentStartIndex); |
1208 | if (behaviour == QString::KeepEmptyParts || !segment.isEmpty()) |
1209 | list.append(t: segment); |
1210 | return list; |
1211 | } |
1212 | |
1213 | /*! |
1214 | * Removes backslash escaping for line breaks (CRLFs), colons, semicolons, backslashes and commas |
1215 | * according to RFC 2426. This is called on parameter names and values and property values. |
1216 | * Colons ARE unescaped because the text of RFC2426 suggests that they should be. |
1217 | */ |
1218 | void QVersitReaderPrivate::removeBackSlashEscaping(QString* text) |
1219 | { |
1220 | if (!(text->startsWith(c: QLatin1Char('"')) && text->endsWith(c: QLatin1Char('"')))) { |
1221 | /* replaces \; with ; |
1222 | \, with , |
1223 | \: with : |
1224 | \\ with \ |
1225 | */ |
1226 | text->replace(rx: QRegExp(QStringLiteral("\\\\([;,:\\\\])" )), QStringLiteral("\\1" )); |
1227 | // replaces \n with a CRLF |
1228 | text->replace(QStringLiteral("\\n" ), QStringLiteral("\r\n" ), cs: Qt::CaseInsensitive); |
1229 | } |
1230 | } |
1231 | |
1232 | #include "moc_qversitreader_p.cpp" |
1233 | QT_END_NAMESPACE_VERSIT |
1234 | |