1/*
2 This file is part of the KContacts framework.
3 SPDX-FileCopyrightText: 2003 Tobias Koenig <tokoe@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8#include "vcardparser_p.h"
9#include <KCodecs>
10#include <QStringDecoder>
11#include <QStringEncoder>
12#include <functional>
13
14// This cache for QString::fromLatin1() isn't about improving speed, but about reducing memory usage by sharing common strings
15class StringCache
16{
17public:
18 QString fromLatin1(const QByteArray &value)
19 {
20 if (value.isEmpty()) {
21 return QString();
22 }
23
24 auto it = m_values.constFind(key: value);
25 if (it != m_values.constEnd()) {
26 return it.value();
27 }
28
29 QString string = QString::fromLatin1(ba: value);
30 m_values.insert(key: value, value: string);
31 return string;
32 }
33
34private:
35 QHash<QByteArray, QString> m_values;
36};
37
38using namespace KContacts;
39
40static void addEscapes(QByteArray &str, bool excludeEscapedComma)
41{
42 str.replace(before: '\\', after: "\\\\");
43 if (!excludeEscapedComma) {
44 str.replace(before: ',', after: "\\,");
45 }
46 str.replace(before: '\r', after: "\\r");
47 str.replace(before: '\n', after: "\\n");
48}
49
50static void removeEscapes(QByteArray &str)
51{
52 // It's more likely that no escape is present, so add fast path
53 if (!str.contains(c: '\\')) {
54 return;
55 }
56 str.replace(before: "\\n", after: "\n");
57 str.replace(before: "\\N", after: "\n");
58 str.replace(before: "\\r", after: "\r");
59 str.replace(before: "\\,", after: ",");
60 str.replace(before: "\\\\", after: "\\");
61}
62
63class VCardLineParser
64{
65public:
66 VCardLineParser(StringCache &cache, std::function<QByteArray()> fetchAnotherLine)
67 : m_cache(cache)
68 , m_fetchAnotherLine(fetchAnotherLine)
69 {
70 }
71
72 void parseLine(const QByteArray &currentLine, VCardLine *vCardLine);
73
74private:
75 void addParameter(const QByteArray &paramKey, const QByteArray &paramValue);
76
77private:
78 StringCache &m_cache;
79 std::function<QByteArray()> m_fetchAnotherLine;
80
81 VCardLine *m_vCardLine = nullptr;
82 QByteArray m_encoding;
83 QByteArray m_charset;
84};
85
86void VCardLineParser::addParameter(const QByteArray &paramKey, const QByteArray &paramValue)
87{
88 if (paramKey == "encoding") {
89 m_encoding = paramValue.toLower();
90 } else if (paramKey == "charset") {
91 m_charset = paramValue.toLower();
92 }
93 // qDebug() << " add parameter" << paramKey << " = " << paramValue;
94 m_vCardLine->addParameter(param: m_cache.fromLatin1(value: paramKey), value: m_cache.fromLatin1(value: paramValue));
95}
96
97void VCardLineParser::parseLine(const QByteArray &currentLine, KContacts::VCardLine *vCardLine)
98{
99 // qDebug() << currentLine;
100 m_vCardLine = vCardLine;
101 // The syntax is key:value, but the key can contain semicolon-separated parameters, which can contain a ':', so indexOf(':') is wrong.
102 // EXAMPLE: ADR;GEO="geo:22.500000,45.099998";LABEL="My Label";TYPE=home:P.O. Box 101;;;Any Town;CA;91921-1234;
103 // Therefore we need a small state machine, just the way I like it.
104 enum State {
105 StateInitial,
106 StateParamKey,
107 StateParamValue,
108 StateQuotedValue,
109 StateAfterParamValue,
110 StateValue,
111 };
112 State state = StateInitial;
113 const int lineLength = currentLine.length();
114 const char *lineData = currentLine.constData(); // to skip length checks from at() in debug mode
115 QByteArray paramKey;
116 QByteArray paramValue;
117 int start = 0;
118 int pos = 0;
119 for (; pos < lineLength; ++pos) {
120 const char ch = lineData[pos];
121 const bool colonOrSemicolon = (ch == ';' || ch == ':');
122 switch (state) {
123 case StateInitial:
124 if (colonOrSemicolon) {
125 const QByteArray identifier = currentLine.mid(index: start, len: pos - start);
126 // qDebug() << " identifier" << identifier;
127 vCardLine->setIdentifier(m_cache.fromLatin1(value: identifier));
128 start = pos + 1;
129 }
130 if (ch == ';') {
131 state = StateParamKey;
132 } else if (ch == ':') {
133 state = StateValue;
134 } else if (ch == '.') {
135 vCardLine->setGroup(m_cache.fromLatin1(value: currentLine.mid(index: start, len: pos - start)));
136 start = pos + 1;
137 }
138 break;
139 case StateParamKey:
140 if (colonOrSemicolon || ch == '=') {
141 paramKey = currentLine.mid(index: start, len: pos - start);
142 start = pos + 1;
143 }
144 if (colonOrSemicolon) {
145 // correct the so-called 2.1 'standard'
146 paramValue = paramKey;
147 const QByteArray lowerKey = paramKey.toLower();
148 if (lowerKey == "quoted-printable" || lowerKey == "base64") {
149 paramKey = "encoding";
150 } else {
151 paramKey = "type";
152 }
153 addParameter(paramKey, paramValue);
154 }
155 if (ch == ';') {
156 state = StateParamKey;
157 } else if (ch == ':') {
158 state = StateValue;
159 } else if (ch == '=') {
160 state = StateParamValue;
161 }
162 break;
163 case StateQuotedValue:
164 if (ch == '"' || (ch == ',' && paramKey.toLower() == "type")) {
165 // TODO the hack above is for TEL;TYPE=\"voice,home\":... without breaking GEO.... TODO: check spec
166 paramValue = currentLine.mid(index: start, len: pos - start);
167 addParameter(paramKey: paramKey.toLower(), paramValue);
168 start = pos + 1;
169 if (ch == '"') {
170 state = StateAfterParamValue; // to avoid duplicating code with StateParamValue, we use this intermediate state for one char
171 }
172 }
173 break;
174 case StateParamValue:
175 if (colonOrSemicolon || ch == ',') {
176 paramValue = currentLine.mid(index: start, len: pos - start);
177 addParameter(paramKey: paramKey.toLower(), paramValue);
178 start = pos + 1;
179 }
180 // fall-through intended
181 Q_FALLTHROUGH();
182 case StateAfterParamValue:
183 if (ch == ';') {
184 state = StateParamKey;
185 start = pos + 1;
186 } else if (ch == ':') {
187 state = StateValue;
188 } else if (pos == start && ch == '"') { // don't treat foo"bar" as quoted - TODO check the vCard 3.0 spec.
189 state = StateQuotedValue;
190 start = pos + 1;
191 }
192 break;
193 case StateValue:
194 Q_UNREACHABLE();
195 break;
196 }
197
198 if (state == StateValue) {
199 break;
200 }
201 }
202
203 if (state != StateValue) { // invalid line, no ':'
204 return;
205 }
206
207 QByteArray value = currentLine.mid(index: pos + 1);
208 removeEscapes(str&: value);
209
210 QByteArray output;
211 bool wasBase64Encoded = false;
212
213 if (!m_encoding.isEmpty()) {
214 // have to decode the data
215 if (m_encoding == "b" || m_encoding == "base64") {
216 output = QByteArray::fromBase64(base64: value);
217 wasBase64Encoded = true;
218 } else if (m_encoding == "quoted-printable") {
219 // join any qp-folded lines
220 while (value.endsWith(c: '=')) {
221 value.chop(n: 1); // remove the '='
222 value.append(a: m_fetchAnotherLine());
223 }
224 KCodecs::quotedPrintableDecode(in: value, out&: output);
225 } else if (m_encoding == "8bit") {
226 output = value;
227 } else {
228 qDebug(msg: "Unknown vcard encoding type!");
229 }
230 } else {
231 output = value;
232 }
233
234 if (!m_charset.isEmpty()) {
235 // have to convert the data
236 auto codec = QStringDecoder(m_charset.constData());
237 if (codec.isValid()) {
238 vCardLine->setValue(QVariant::fromValue<QString>(value: codec.decode(ba: output)));
239 } else {
240 vCardLine->setValue(QString::fromUtf8(ba: output));
241 }
242 } else if (wasBase64Encoded) {
243 vCardLine->setValue(output);
244 } else {
245 vCardLine->setValue(QString::fromUtf8(ba: output));
246 }
247}
248
249////
250
251VCardParser::VCardParser()
252{
253}
254
255VCardParser::~VCardParser()
256{
257}
258
259VCard::List VCardParser::parseVCards(const QByteArray &text)
260{
261 VCard currentVCard;
262 VCard::List vCardList;
263 QByteArray currentLine;
264
265 int lineStart = 0;
266 int lineEnd = text.indexOf(ch: '\n');
267
268 bool inVCard = false;
269
270 StringCache cache;
271 for (; lineStart != text.size() + 1;
272 lineStart = lineEnd + 1, lineEnd = (text.indexOf(ch: '\n', from: lineStart) == -1) ? text.size() : text.indexOf(ch: '\n', from: lineStart)) {
273 QByteArray cur = text.mid(index: lineStart, len: lineEnd - lineStart);
274 // remove the trailing \r, left from \r\n
275 if (cur.endsWith(c: '\r')) {
276 cur.chop(n: 1);
277 }
278
279 if (cur.startsWith(c: ' ') //
280 || cur.startsWith(c: '\t')) { // folded line => append to previous
281 currentLine.append(a: cur.mid(index: 1));
282 continue;
283 } else {
284 if (cur.trimmed().isEmpty()) { // empty line
285 continue;
286 }
287 if (inVCard && !currentLine.isEmpty()) { // now parse the line
288 VCardLine vCardLine;
289
290 // Provide a way for the parseVCardLine function to read more lines (for quoted-printable support)
291 auto fetchAnotherLine = [&text, &lineStart, &lineEnd, &cur]() -> QByteArray {
292 const QByteArray ret = cur;
293 lineStart = lineEnd + 1;
294 lineEnd = text.indexOf(ch: '\n', from: lineStart);
295 if (lineEnd != -1) {
296 cur = text.mid(index: lineStart, len: lineEnd - lineStart);
297 // remove the trailing \r, left from \r\n
298 if (cur.endsWith(c: '\r')) {
299 cur.chop(n: 1);
300 }
301 }
302 return ret;
303 };
304
305 VCardLineParser lineParser(cache, fetchAnotherLine);
306
307 lineParser.parseLine(currentLine, vCardLine: &vCardLine);
308
309 currentVCard.addLine(line: vCardLine);
310 }
311
312 // we do not save the start and end tag as vcardline
313 if (qstrnicmp(cur.constData(), "begin:vcard", len: 11) == 0) {
314 inVCard = true;
315 currentLine.clear();
316 currentVCard.clear(); // flush vcard
317 continue;
318 }
319
320 if (qstrnicmp(cur.constData(), "end:vcard", len: 9) == 0) {
321 inVCard = false;
322 vCardList.append(t: currentVCard);
323 currentLine.clear();
324 currentVCard.clear(); // flush vcard
325 continue;
326 }
327
328 currentLine = cur;
329 }
330 }
331
332 return vCardList;
333}
334
335static const int FOLD_WIDTH = 75;
336
337namespace
338{
339static QByteArray fixLineSize(const QByteArray &textLine, bool checkMultibyte)
340{
341 QByteArray text;
342 if (textLine.length() > FOLD_WIDTH) { // we have to fold the line
343 if (checkMultibyte) {
344 // RFC 6350: Multi-octet characters MUST remain contiguous.
345 // we know that textLine contains UTF-8 encoded characters
346 int lineLength = 0;
347 for (int i = 0; i < textLine.length(); ++i) {
348 if ((textLine[i] & 0xC0) == 0xC0) { // a multibyte sequence follows
349 int sequenceLength = 2;
350 if ((textLine[i] & 0xE0) == 0xE0) {
351 sequenceLength = 3;
352 } else if ((textLine[i] & 0xF0) == 0xF0) {
353 sequenceLength = 4;
354 }
355 if ((lineLength + sequenceLength) > FOLD_WIDTH) {
356 // the current line would be too long. fold it
357 text += "\r\n " + textLine.mid(index: i, len: sequenceLength);
358 lineLength = 1 + sequenceLength; // incl. leading space
359 } else {
360 text += textLine.mid(index: i, len: sequenceLength);
361 lineLength += sequenceLength;
362 }
363 i += sequenceLength - 1;
364 } else {
365 text += textLine[i];
366 ++lineLength;
367 }
368 if ((lineLength == FOLD_WIDTH) && (i < (textLine.length() - 1))) {
369 text += "\r\n ";
370 lineLength = 1; // leading space
371 }
372 }
373 text += "\r\n";
374 } else {
375 for (int i = 0; i <= (textLine.length() / FOLD_WIDTH); ++i) {
376 text.append(a: (i == 0 ? "" : " ") + textLine.mid(index: i * FOLD_WIDTH, len: FOLD_WIDTH) + "\r\n");
377 }
378 }
379 } else {
380 text.append(a: textLine);
381 text.append(s: "\r\n");
382 }
383 return text;
384}
385}
386
387QByteArray VCardParser::createVCards(const VCard::List &list)
388{
389 QByteArray text;
390 QByteArray textLine;
391 QString encodingType;
392 QStringList params;
393 QStringList values;
394
395 VCardLine::List lines;
396
397 bool hasEncoding;
398
399 text.reserve(asize: list.size() * 300); // reserve memory to be more efficient
400
401 // iterate over the cards
402 for (const VCard &card : list) {
403 text.append(s: "BEGIN:VCARD\r\n");
404
405 QStringList idents = card.identifiers();
406 // VERSION must be first
407 if (idents.contains(str: QLatin1String("VERSION"))) {
408 const QString str = idents.takeAt(i: idents.indexOf(needle: QLatin1String("VERSION")));
409 idents.prepend(t: str);
410 }
411
412 for (const auto &id : std::as_const(t&: idents)) {
413 lines = card.lines(identifier: id);
414
415 // iterate over the lines
416 for (const VCardLine &vline : std::as_const(t&: lines)) {
417 const QVariant val = vline.value();
418 if (val.isValid()) {
419 if (vline.hasGroup()) {
420 textLine = vline.group().toLatin1() + '.' + vline.identifier().toLatin1();
421 } else {
422 textLine = vline.identifier().toLatin1();
423 }
424
425 params = vline.parameterList();
426 hasEncoding = false;
427 if (!params.isEmpty()) { // we have parameters
428 for (const QString &param : std::as_const(t&: params)) {
429 if (param == QLatin1String("encoding")) {
430 hasEncoding = true;
431 encodingType = vline.parameter(QStringLiteral("encoding")).toLower();
432 }
433
434 values = vline.parameters(param);
435 for (const QString &str : std::as_const(t&: values)) {
436 textLine.append(a: ';' + param.toLatin1().toUpper());
437 if (!str.isEmpty()) {
438 textLine.append(a: '=' + str.toLatin1());
439 }
440 }
441 }
442 }
443
444 QByteArray input;
445 QByteArray output;
446 bool checkMultibyte = false; // avoid splitting a multibyte character
447
448 // handle charset
449 const QString charset = vline.parameter(QStringLiteral("charset"));
450 if (!charset.isEmpty()) {
451 // have to convert the data
452 const QString value = vline.value().toString();
453 auto codec = QStringEncoder(charset.toLatin1().constData());
454 if (codec.isValid()) {
455 input = codec.encode(str: value);
456 } else {
457 checkMultibyte = true;
458 input = value.toUtf8();
459 }
460 } else if (vline.value().userType() == QMetaType::QByteArray) {
461 input = vline.value().toByteArray();
462 } else {
463 checkMultibyte = true;
464 input = vline.value().toString().toUtf8();
465 }
466
467 // handle encoding
468 if (hasEncoding) { // have to encode the data
469 if (encodingType == QLatin1Char('b')) {
470 checkMultibyte = false;
471 output = input.toBase64();
472 } else if (encodingType == QLatin1String("quoted-printable")) {
473 checkMultibyte = false;
474 KCodecs::quotedPrintableEncode(in: input, out&: output, useCRLF: false);
475 }
476 } else {
477 output = input;
478 }
479 addEscapes(str&: output, excludeEscapedComma: (vline.identifier() == QLatin1String("CATEGORIES") || vline.identifier() == QLatin1String("GEO")));
480
481 if (!output.isEmpty()) {
482 textLine.append(a: ':' + output);
483
484 text.append(a: fixLineSize(textLine, checkMultibyte));
485 }
486 } else if (!vline.base64Value().isEmpty()) {
487 text += vline.identifier().toLatin1();
488 text.append(a: fixLineSize(textLine: ";base64," + vline.base64Value(), checkMultibyte: false));
489 }
490 }
491 }
492
493 text.append(s: "END:VCARD\r\n");
494 text.append(s: "\r\n");
495 }
496
497 return text;
498}
499

source code of kcontacts/src/vcardparser/vcardparser.cpp