1 | /* |
2 | This file is part of the KContacts framework. |
3 | SPDX-FileCopyrightText: 2003 Tobias Koenig <tokoe@kde.org> |
4 | |
5 | SPDX-License-Identifier: LGPL-2.0-or-later |
6 | */ |
7 | |
8 | #include "kcontacts_debug.h" |
9 | #include "vcardparser_p.h" |
10 | #include <KCodecs> |
11 | #include <QStringDecoder> |
12 | #include <QStringEncoder> |
13 | #include <functional> |
14 | |
15 | // This cache for QString::fromLatin1() isn't about improving speed, but about reducing memory usage by sharing common strings |
16 | class StringCache |
17 | { |
18 | public: |
19 | QString fromLatin1(const QByteArray &value) |
20 | { |
21 | if (value.isEmpty()) { |
22 | return QString(); |
23 | } |
24 | |
25 | auto it = m_values.constFind(key: value); |
26 | if (it != m_values.constEnd()) { |
27 | return it.value(); |
28 | } |
29 | |
30 | QString string = QString::fromLatin1(ba: value); |
31 | m_values.insert(key: value, value: string); |
32 | return string; |
33 | } |
34 | |
35 | private: |
36 | QHash<QByteArray, QString> m_values; |
37 | }; |
38 | |
39 | using namespace KContacts; |
40 | |
41 | static void addEscapes(QByteArray &str, bool excludeEscapedComma) |
42 | { |
43 | str.replace(before: '\\', after: "\\\\" ); |
44 | if (!excludeEscapedComma) { |
45 | str.replace(before: ',', after: "\\," ); |
46 | } |
47 | str.replace(before: '\r', after: "\\r" ); |
48 | str.replace(before: '\n', after: "\\n" ); |
49 | } |
50 | |
51 | static void removeEscapes(QByteArray &str) |
52 | { |
53 | // It's more likely that no escape is present, so add fast path |
54 | if (!str.contains(c: '\\')) { |
55 | return; |
56 | } |
57 | str.replace(before: "\\n" , after: "\n" ); |
58 | str.replace(before: "\\N" , after: "\n" ); |
59 | str.replace(before: "\\r" , after: "\r" ); |
60 | str.replace(before: "\\," , after: "," ); |
61 | str.replace(before: "\\\\" , after: "\\" ); |
62 | } |
63 | |
64 | class VCardLineParser |
65 | { |
66 | public: |
67 | VCardLineParser(StringCache &cache, std::function<QByteArray()> fetchAnotherLine) |
68 | : m_cache(cache) |
69 | , m_fetchAnotherLine(fetchAnotherLine) |
70 | { |
71 | } |
72 | |
73 | void parseLine(const QByteArray ¤tLine, VCardLine *vCardLine); |
74 | |
75 | private: |
76 | void addParameter(const QByteArray ¶mKey, const QByteArray ¶mValue); |
77 | |
78 | private: |
79 | StringCache &m_cache; |
80 | std::function<QByteArray()> m_fetchAnotherLine; |
81 | |
82 | VCardLine *m_vCardLine = nullptr; |
83 | QByteArray m_encoding; |
84 | QByteArray m_charset; |
85 | }; |
86 | |
87 | void VCardLineParser::addParameter(const QByteArray ¶mKey, const QByteArray ¶mValue) |
88 | { |
89 | if (paramKey == "encoding" ) { |
90 | m_encoding = paramValue.toLower(); |
91 | } else if (paramKey == "charset" ) { |
92 | m_charset = paramValue.toLower(); |
93 | } |
94 | // qDebug() << " add parameter" << paramKey << " = " << paramValue; |
95 | m_vCardLine->addParameter(param: m_cache.fromLatin1(value: paramKey), value: m_cache.fromLatin1(value: paramValue)); |
96 | } |
97 | |
98 | void VCardLineParser::parseLine(const QByteArray ¤tLine, KContacts::VCardLine *vCardLine) |
99 | { |
100 | // qDebug() << currentLine; |
101 | m_vCardLine = vCardLine; |
102 | // The syntax is key:value, but the key can contain semicolon-separated parameters, which can contain a ':', so indexOf(':') is wrong. |
103 | // EXAMPLE: ADR;GEO="geo:22.500000,45.099998";LABEL="My Label";TYPE=home:P.O. Box 101;;;Any Town;CA;91921-1234; |
104 | // Therefore we need a small state machine, just the way I like it. |
105 | enum State { |
106 | StateInitial, |
107 | StateParamKey, |
108 | StateParamValue, |
109 | StateQuotedValue, |
110 | StateAfterParamValue, |
111 | StateValue, |
112 | }; |
113 | State state = StateInitial; |
114 | const int lineLength = currentLine.length(); |
115 | const char *lineData = currentLine.constData(); // to skip length checks from at() in debug mode |
116 | QByteArray paramKey; |
117 | QByteArray paramValue; |
118 | int start = 0; |
119 | int pos = 0; |
120 | for (; pos < lineLength; ++pos) { |
121 | const char ch = lineData[pos]; |
122 | const bool colonOrSemicolon = (ch == ';' || ch == ':'); |
123 | switch (state) { |
124 | case StateInitial: |
125 | if (colonOrSemicolon) { |
126 | const QByteArray identifier = currentLine.mid(index: start, len: pos - start); |
127 | // qDebug() << " identifier" << identifier; |
128 | vCardLine->setIdentifier(m_cache.fromLatin1(value: identifier)); |
129 | start = pos + 1; |
130 | } |
131 | if (ch == ';') { |
132 | state = StateParamKey; |
133 | } else if (ch == ':') { |
134 | state = StateValue; |
135 | } else if (ch == '.') { |
136 | vCardLine->setGroup(m_cache.fromLatin1(value: currentLine.mid(index: start, len: pos - start))); |
137 | start = pos + 1; |
138 | } |
139 | break; |
140 | case StateParamKey: |
141 | if (colonOrSemicolon || ch == '=') { |
142 | paramKey = currentLine.mid(index: start, len: pos - start); |
143 | start = pos + 1; |
144 | } |
145 | if (colonOrSemicolon) { |
146 | // correct the so-called 2.1 'standard' |
147 | paramValue = paramKey; |
148 | const QByteArray lowerKey = paramKey.toLower(); |
149 | if (lowerKey == "quoted-printable" || lowerKey == "base64" ) { |
150 | paramKey = "encoding" ; |
151 | } else { |
152 | paramKey = "type" ; |
153 | } |
154 | addParameter(paramKey, paramValue); |
155 | } |
156 | if (ch == ';') { |
157 | state = StateParamKey; |
158 | } else if (ch == ':') { |
159 | state = StateValue; |
160 | } else if (ch == '=') { |
161 | state = StateParamValue; |
162 | } |
163 | break; |
164 | case StateQuotedValue: |
165 | if (ch == '"' || (ch == ',' && paramKey.toLower() == "type" )) { |
166 | // TODO the hack above is for TEL;TYPE=\"voice,home\":... without breaking GEO.... TODO: check spec |
167 | paramValue = currentLine.mid(index: start, len: pos - start); |
168 | addParameter(paramKey: paramKey.toLower(), paramValue); |
169 | start = pos + 1; |
170 | if (ch == '"') { |
171 | state = StateAfterParamValue; // to avoid duplicating code with StateParamValue, we use this intermediate state for one char |
172 | } |
173 | } |
174 | break; |
175 | case StateParamValue: |
176 | if (colonOrSemicolon || ch == ',') { |
177 | paramValue = currentLine.mid(index: start, len: pos - start); |
178 | addParameter(paramKey: paramKey.toLower(), paramValue); |
179 | start = pos + 1; |
180 | } |
181 | // fall-through intended |
182 | Q_FALLTHROUGH(); |
183 | case StateAfterParamValue: |
184 | if (ch == ';') { |
185 | state = StateParamKey; |
186 | start = pos + 1; |
187 | } else if (ch == ':') { |
188 | state = StateValue; |
189 | } else if (pos == start && ch == '"') { // don't treat foo"bar" as quoted - TODO check the vCard 3.0 spec. |
190 | state = StateQuotedValue; |
191 | start = pos + 1; |
192 | } |
193 | break; |
194 | case StateValue: |
195 | Q_UNREACHABLE(); |
196 | break; |
197 | } |
198 | |
199 | if (state == StateValue) { |
200 | break; |
201 | } |
202 | } |
203 | |
204 | if (state != StateValue) { // invalid line, no ':' |
205 | return; |
206 | } |
207 | |
208 | QByteArray value = currentLine.mid(index: pos + 1); |
209 | removeEscapes(str&: value); |
210 | |
211 | QByteArray output; |
212 | bool wasBase64Encoded = false; |
213 | |
214 | if (!m_encoding.isEmpty()) { |
215 | // have to decode the data |
216 | if (m_encoding == "b" || m_encoding == "base64" ) { |
217 | output = QByteArray::fromBase64(base64: value); |
218 | wasBase64Encoded = true; |
219 | } else if (m_encoding == "quoted-printable" ) { |
220 | // join any qp-folded lines |
221 | while (value.endsWith(c: '=')) { |
222 | value.chop(n: 1); // remove the '=' |
223 | value.append(a: m_fetchAnotherLine()); |
224 | } |
225 | KCodecs::quotedPrintableDecode(in: value, out&: output); |
226 | } else if (m_encoding == "8bit" ) { |
227 | output = value; |
228 | } else { |
229 | qDebug(msg: "Unknown vcard encoding type!" ); |
230 | } |
231 | } else { |
232 | output = value; |
233 | } |
234 | |
235 | if (!m_charset.isEmpty()) { |
236 | // have to convert the data |
237 | auto codec = QStringDecoder(m_charset.constData()); |
238 | if (codec.isValid()) { |
239 | vCardLine->setValue(QVariant::fromValue<QString>(value: codec.decode(ba: output))); |
240 | } else { |
241 | vCardLine->setValue(QString::fromUtf8(ba: output)); |
242 | } |
243 | } else if (wasBase64Encoded) { |
244 | vCardLine->setValue(output); |
245 | } else { |
246 | vCardLine->setValue(QString::fromUtf8(ba: output)); |
247 | } |
248 | } |
249 | |
250 | //// |
251 | |
252 | VCardParser::VCardParser() |
253 | { |
254 | } |
255 | |
256 | VCardParser::~VCardParser() |
257 | { |
258 | } |
259 | |
260 | VCard::List VCardParser::parseVCards(const QByteArray &text) |
261 | { |
262 | VCard currentVCard; |
263 | VCard::List vCardList; |
264 | QByteArray currentLine; |
265 | |
266 | int lineStart = 0; |
267 | int lineEnd = text.indexOf(c: '\n'); |
268 | |
269 | bool inVCard = false; |
270 | |
271 | StringCache cache; |
272 | for (; lineStart != text.size() + 1; |
273 | lineStart = lineEnd + 1, lineEnd = (text.indexOf(c: '\n', from: lineStart) == -1) ? text.size() : text.indexOf(c: '\n', from: lineStart)) { |
274 | QByteArray cur = text.mid(index: lineStart, len: lineEnd - lineStart); |
275 | // remove the trailing \r, left from \r\n |
276 | if (cur.endsWith(c: '\r')) { |
277 | cur.chop(n: 1); |
278 | } |
279 | |
280 | if (cur.startsWith(c: ' ') // |
281 | || cur.startsWith(c: '\t')) { // folded line => append to previous |
282 | currentLine.append(a: cur.mid(index: 1)); |
283 | continue; |
284 | } else { |
285 | if (cur.trimmed().isEmpty()) { // empty line |
286 | continue; |
287 | } |
288 | if (inVCard && !currentLine.isEmpty()) { // now parse the line |
289 | VCardLine vCardLine; |
290 | |
291 | // Provide a way for the parseVCardLine function to read more lines (for quoted-printable support) |
292 | auto fetchAnotherLine = [&text, &lineStart, &lineEnd, &cur]() -> QByteArray { |
293 | const QByteArray ret = cur; |
294 | lineStart = lineEnd + 1; |
295 | lineEnd = text.indexOf(c: '\n', from: lineStart); |
296 | if (lineEnd != -1) { |
297 | cur = text.mid(index: lineStart, len: lineEnd - lineStart); |
298 | // remove the trailing \r, left from \r\n |
299 | if (cur.endsWith(c: '\r')) { |
300 | cur.chop(n: 1); |
301 | } |
302 | } |
303 | return ret; |
304 | }; |
305 | |
306 | VCardLineParser lineParser(cache, fetchAnotherLine); |
307 | |
308 | lineParser.parseLine(currentLine, vCardLine: &vCardLine); |
309 | |
310 | currentVCard.addLine(line: vCardLine); |
311 | } |
312 | |
313 | // we do not save the start and end tag as vcardline |
314 | if (qstrnicmp(cur.constData(), "begin:vcard" , len: 11) == 0) { |
315 | inVCard = true; |
316 | currentLine.clear(); |
317 | currentVCard.clear(); // flush vcard |
318 | continue; |
319 | } |
320 | |
321 | if (qstrnicmp(cur.constData(), "end:vcard" , len: 9) == 0) { |
322 | inVCard = false; |
323 | vCardList.append(t: currentVCard); |
324 | currentLine.clear(); |
325 | currentVCard.clear(); // flush vcard |
326 | continue; |
327 | } |
328 | |
329 | currentLine = cur; |
330 | } |
331 | } |
332 | |
333 | return vCardList; |
334 | } |
335 | |
336 | static const int FOLD_WIDTH = 75; |
337 | |
338 | QByteArray VCardParser::createVCards(const VCard::List &list) |
339 | { |
340 | QByteArray text; |
341 | QByteArray textLine; |
342 | QString encodingType; |
343 | QStringList params; |
344 | QStringList values; |
345 | |
346 | VCardLine::List lines; |
347 | |
348 | bool hasEncoding; |
349 | |
350 | text.reserve(asize: list.size() * 300); // reserve memory to be more efficient |
351 | |
352 | // iterate over the cards |
353 | for (const VCard &card : list) { |
354 | text.append(s: "BEGIN:VCARD\r\n" ); |
355 | |
356 | QStringList idents = card.identifiers(); |
357 | // VERSION must be first |
358 | if (idents.contains(str: QLatin1String("VERSION" ))) { |
359 | const QString str = idents.takeAt(i: idents.indexOf(t: QLatin1String("VERSION" ))); |
360 | idents.prepend(t: str); |
361 | } |
362 | |
363 | for (const auto &id : std::as_const(t&: idents)) { |
364 | lines = card.lines(identifier: id); |
365 | |
366 | // iterate over the lines |
367 | for (const VCardLine &vline : std::as_const(t&: lines)) { |
368 | QVariant val = vline.value(); |
369 | if (val.isValid()) { |
370 | if (vline.hasGroup()) { |
371 | textLine = vline.group().toLatin1() + '.' + vline.identifier().toLatin1(); |
372 | } else { |
373 | textLine = vline.identifier().toLatin1(); |
374 | } |
375 | |
376 | params = vline.parameterList(); |
377 | hasEncoding = false; |
378 | if (!params.isEmpty()) { // we have parameters |
379 | for (const QString ¶m : std::as_const(t&: params)) { |
380 | if (param == QLatin1String("encoding" )) { |
381 | hasEncoding = true; |
382 | encodingType = vline.parameter(QStringLiteral("encoding" )).toLower(); |
383 | } |
384 | |
385 | values = vline.parameters(param); |
386 | for (const QString &str : std::as_const(t&: values)) { |
387 | textLine.append(a: ';' + param.toLatin1().toUpper()); |
388 | if (!str.isEmpty()) { |
389 | textLine.append(a: '=' + str.toLatin1()); |
390 | } |
391 | } |
392 | } |
393 | } |
394 | |
395 | QByteArray input; |
396 | QByteArray output; |
397 | bool checkMultibyte = false; // avoid splitting a multibyte character |
398 | |
399 | // handle charset |
400 | const QString charset = vline.parameter(QStringLiteral("charset" )); |
401 | if (!charset.isEmpty()) { |
402 | // have to convert the data |
403 | const QString value = vline.value().toString(); |
404 | auto codec = QStringEncoder(charset.toLatin1().constData()); |
405 | if (codec.isValid()) { |
406 | input = codec.encode(str: value); |
407 | } else { |
408 | checkMultibyte = true; |
409 | input = value.toUtf8(); |
410 | } |
411 | } else if (vline.value().userType() == QMetaType::QByteArray) { |
412 | input = vline.value().toByteArray(); |
413 | } else { |
414 | checkMultibyte = true; |
415 | input = vline.value().toString().toUtf8(); |
416 | } |
417 | |
418 | // handle encoding |
419 | if (hasEncoding) { // have to encode the data |
420 | if (encodingType == QLatin1Char('b')) { |
421 | checkMultibyte = false; |
422 | output = input.toBase64(); |
423 | } else if (encodingType == QLatin1String("quoted-printable" )) { |
424 | checkMultibyte = false; |
425 | KCodecs::quotedPrintableEncode(in: input, out&: output, useCRLF: false); |
426 | } |
427 | } else { |
428 | output = input; |
429 | } |
430 | addEscapes(str&: output, excludeEscapedComma: (vline.identifier() == QLatin1String("CATEGORIES" ) || vline.identifier() == QLatin1String("GEO" ))); |
431 | |
432 | if (!output.isEmpty()) { |
433 | textLine.append(a: ':' + output); |
434 | |
435 | if (textLine.length() > FOLD_WIDTH) { // we have to fold the line |
436 | if (checkMultibyte) { |
437 | // RFC 6350: Multi-octet characters MUST remain contiguous. |
438 | // we know that textLine contains UTF-8 encoded characters |
439 | int lineLength = 0; |
440 | for (int i = 0; i < textLine.length(); ++i) { |
441 | if ((textLine[i] & 0xC0) == 0xC0) { // a multibyte sequence follows |
442 | int sequenceLength = 2; |
443 | if ((textLine[i] & 0xE0) == 0xE0) { |
444 | sequenceLength = 3; |
445 | } else if ((textLine[i] & 0xF0) == 0xF0) { |
446 | sequenceLength = 4; |
447 | } |
448 | if ((lineLength + sequenceLength) > FOLD_WIDTH) { |
449 | // the current line would be too long. fold it |
450 | text += "\r\n " + textLine.mid(index: i, len: sequenceLength); |
451 | lineLength = 1 + sequenceLength; // incl. leading space |
452 | } else { |
453 | text += textLine.mid(index: i, len: sequenceLength); |
454 | lineLength += sequenceLength; |
455 | } |
456 | i += sequenceLength - 1; |
457 | } else { |
458 | text += textLine[i]; |
459 | ++lineLength; |
460 | } |
461 | if ((lineLength == FOLD_WIDTH) && (i < (textLine.length() - 1))) { |
462 | text += "\r\n " ; |
463 | lineLength = 1; // leading space |
464 | } |
465 | } |
466 | text += "\r\n" ; |
467 | } else { |
468 | for (int i = 0; i <= (textLine.length() / FOLD_WIDTH); ++i) { |
469 | text.append(a: (i == 0 ? "" : " " ) + textLine.mid(index: i * FOLD_WIDTH, len: FOLD_WIDTH) + "\r\n" ); |
470 | } |
471 | } |
472 | } else { |
473 | text.append(a: textLine); |
474 | text.append(s: "\r\n" ); |
475 | } |
476 | } |
477 | } |
478 | } |
479 | } |
480 | |
481 | text.append(s: "END:VCARD\r\n" ); |
482 | text.append(s: "\r\n" ); |
483 | } |
484 | |
485 | return text; |
486 | } |
487 | |