| 1 | /* |
| 2 | This file is part of the KContacts framework. |
| 3 | SPDX-FileCopyrightText: 2003 Tobias Koenig <tokoe@kde.org> |
| 4 | |
| 5 | SPDX-License-Identifier: LGPL-2.0-or-later |
| 6 | */ |
| 7 | |
| 8 | #include "kcontacts_debug.h" |
| 9 | #include "vcardparser_p.h" |
| 10 | #include <KCodecs> |
| 11 | #include <QStringDecoder> |
| 12 | #include <QStringEncoder> |
| 13 | #include <functional> |
| 14 | |
| 15 | // This cache for QString::fromLatin1() isn't about improving speed, but about reducing memory usage by sharing common strings |
| 16 | class StringCache |
| 17 | { |
| 18 | public: |
| 19 | QString fromLatin1(const QByteArray &value) |
| 20 | { |
| 21 | if (value.isEmpty()) { |
| 22 | return QString(); |
| 23 | } |
| 24 | |
| 25 | auto it = m_values.constFind(key: value); |
| 26 | if (it != m_values.constEnd()) { |
| 27 | return it.value(); |
| 28 | } |
| 29 | |
| 30 | QString string = QString::fromLatin1(ba: value); |
| 31 | m_values.insert(key: value, value: string); |
| 32 | return string; |
| 33 | } |
| 34 | |
| 35 | private: |
| 36 | QHash<QByteArray, QString> m_values; |
| 37 | }; |
| 38 | |
| 39 | using namespace KContacts; |
| 40 | |
| 41 | static void addEscapes(QByteArray &str, bool excludeEscapedComma) |
| 42 | { |
| 43 | str.replace(before: '\\', after: "\\\\" ); |
| 44 | if (!excludeEscapedComma) { |
| 45 | str.replace(before: ',', after: "\\," ); |
| 46 | } |
| 47 | str.replace(before: '\r', after: "\\r" ); |
| 48 | str.replace(before: '\n', after: "\\n" ); |
| 49 | } |
| 50 | |
| 51 | static void removeEscapes(QByteArray &str) |
| 52 | { |
| 53 | // It's more likely that no escape is present, so add fast path |
| 54 | if (!str.contains(c: '\\')) { |
| 55 | return; |
| 56 | } |
| 57 | str.replace(before: "\\n" , after: "\n" ); |
| 58 | str.replace(before: "\\N" , after: "\n" ); |
| 59 | str.replace(before: "\\r" , after: "\r" ); |
| 60 | str.replace(before: "\\," , after: "," ); |
| 61 | str.replace(before: "\\\\" , after: "\\" ); |
| 62 | } |
| 63 | |
| 64 | class VCardLineParser |
| 65 | { |
| 66 | public: |
| 67 | VCardLineParser(StringCache &cache, std::function<QByteArray()> fetchAnotherLine) |
| 68 | : m_cache(cache) |
| 69 | , m_fetchAnotherLine(fetchAnotherLine) |
| 70 | { |
| 71 | } |
| 72 | |
| 73 | void parseLine(const QByteArray ¤tLine, VCardLine *vCardLine); |
| 74 | |
| 75 | private: |
| 76 | void addParameter(const QByteArray ¶mKey, const QByteArray ¶mValue); |
| 77 | |
| 78 | private: |
| 79 | StringCache &m_cache; |
| 80 | std::function<QByteArray()> m_fetchAnotherLine; |
| 81 | |
| 82 | VCardLine *m_vCardLine = nullptr; |
| 83 | QByteArray m_encoding; |
| 84 | QByteArray m_charset; |
| 85 | }; |
| 86 | |
| 87 | void VCardLineParser::addParameter(const QByteArray ¶mKey, const QByteArray ¶mValue) |
| 88 | { |
| 89 | if (paramKey == "encoding" ) { |
| 90 | m_encoding = paramValue.toLower(); |
| 91 | } else if (paramKey == "charset" ) { |
| 92 | m_charset = paramValue.toLower(); |
| 93 | } |
| 94 | // qDebug() << " add parameter" << paramKey << " = " << paramValue; |
| 95 | m_vCardLine->addParameter(param: m_cache.fromLatin1(value: paramKey), value: m_cache.fromLatin1(value: paramValue)); |
| 96 | } |
| 97 | |
| 98 | void VCardLineParser::parseLine(const QByteArray ¤tLine, KContacts::VCardLine *vCardLine) |
| 99 | { |
| 100 | // qDebug() << currentLine; |
| 101 | m_vCardLine = vCardLine; |
| 102 | // The syntax is key:value, but the key can contain semicolon-separated parameters, which can contain a ':', so indexOf(':') is wrong. |
| 103 | // EXAMPLE: ADR;GEO="geo:22.500000,45.099998";LABEL="My Label";TYPE=home:P.O. Box 101;;;Any Town;CA;91921-1234; |
| 104 | // Therefore we need a small state machine, just the way I like it. |
| 105 | enum State { |
| 106 | StateInitial, |
| 107 | StateParamKey, |
| 108 | StateParamValue, |
| 109 | StateQuotedValue, |
| 110 | StateAfterParamValue, |
| 111 | StateValue, |
| 112 | }; |
| 113 | State state = StateInitial; |
| 114 | const int lineLength = currentLine.length(); |
| 115 | const char *lineData = currentLine.constData(); // to skip length checks from at() in debug mode |
| 116 | QByteArray paramKey; |
| 117 | QByteArray paramValue; |
| 118 | int start = 0; |
| 119 | int pos = 0; |
| 120 | for (; pos < lineLength; ++pos) { |
| 121 | const char ch = lineData[pos]; |
| 122 | const bool colonOrSemicolon = (ch == ';' || ch == ':'); |
| 123 | switch (state) { |
| 124 | case StateInitial: |
| 125 | if (colonOrSemicolon) { |
| 126 | const QByteArray identifier = currentLine.mid(index: start, len: pos - start); |
| 127 | // qDebug() << " identifier" << identifier; |
| 128 | vCardLine->setIdentifier(m_cache.fromLatin1(value: identifier)); |
| 129 | start = pos + 1; |
| 130 | } |
| 131 | if (ch == ';') { |
| 132 | state = StateParamKey; |
| 133 | } else if (ch == ':') { |
| 134 | state = StateValue; |
| 135 | } else if (ch == '.') { |
| 136 | vCardLine->setGroup(m_cache.fromLatin1(value: currentLine.mid(index: start, len: pos - start))); |
| 137 | start = pos + 1; |
| 138 | } |
| 139 | break; |
| 140 | case StateParamKey: |
| 141 | if (colonOrSemicolon || ch == '=') { |
| 142 | paramKey = currentLine.mid(index: start, len: pos - start); |
| 143 | start = pos + 1; |
| 144 | } |
| 145 | if (colonOrSemicolon) { |
| 146 | // correct the so-called 2.1 'standard' |
| 147 | paramValue = paramKey; |
| 148 | const QByteArray lowerKey = paramKey.toLower(); |
| 149 | if (lowerKey == "quoted-printable" || lowerKey == "base64" ) { |
| 150 | paramKey = "encoding" ; |
| 151 | } else { |
| 152 | paramKey = "type" ; |
| 153 | } |
| 154 | addParameter(paramKey, paramValue); |
| 155 | } |
| 156 | if (ch == ';') { |
| 157 | state = StateParamKey; |
| 158 | } else if (ch == ':') { |
| 159 | state = StateValue; |
| 160 | } else if (ch == '=') { |
| 161 | state = StateParamValue; |
| 162 | } |
| 163 | break; |
| 164 | case StateQuotedValue: |
| 165 | if (ch == '"' || (ch == ',' && paramKey.toLower() == "type" )) { |
| 166 | // TODO the hack above is for TEL;TYPE=\"voice,home\":... without breaking GEO.... TODO: check spec |
| 167 | paramValue = currentLine.mid(index: start, len: pos - start); |
| 168 | addParameter(paramKey: paramKey.toLower(), paramValue); |
| 169 | start = pos + 1; |
| 170 | if (ch == '"') { |
| 171 | state = StateAfterParamValue; // to avoid duplicating code with StateParamValue, we use this intermediate state for one char |
| 172 | } |
| 173 | } |
| 174 | break; |
| 175 | case StateParamValue: |
| 176 | if (colonOrSemicolon || ch == ',') { |
| 177 | paramValue = currentLine.mid(index: start, len: pos - start); |
| 178 | addParameter(paramKey: paramKey.toLower(), paramValue); |
| 179 | start = pos + 1; |
| 180 | } |
| 181 | // fall-through intended |
| 182 | Q_FALLTHROUGH(); |
| 183 | case StateAfterParamValue: |
| 184 | if (ch == ';') { |
| 185 | state = StateParamKey; |
| 186 | start = pos + 1; |
| 187 | } else if (ch == ':') { |
| 188 | state = StateValue; |
| 189 | } else if (pos == start && ch == '"') { // don't treat foo"bar" as quoted - TODO check the vCard 3.0 spec. |
| 190 | state = StateQuotedValue; |
| 191 | start = pos + 1; |
| 192 | } |
| 193 | break; |
| 194 | case StateValue: |
| 195 | Q_UNREACHABLE(); |
| 196 | break; |
| 197 | } |
| 198 | |
| 199 | if (state == StateValue) { |
| 200 | break; |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | if (state != StateValue) { // invalid line, no ':' |
| 205 | return; |
| 206 | } |
| 207 | |
| 208 | QByteArray value = currentLine.mid(index: pos + 1); |
| 209 | removeEscapes(str&: value); |
| 210 | |
| 211 | QByteArray output; |
| 212 | bool wasBase64Encoded = false; |
| 213 | |
| 214 | if (!m_encoding.isEmpty()) { |
| 215 | // have to decode the data |
| 216 | if (m_encoding == "b" || m_encoding == "base64" ) { |
| 217 | output = QByteArray::fromBase64(base64: value); |
| 218 | wasBase64Encoded = true; |
| 219 | } else if (m_encoding == "quoted-printable" ) { |
| 220 | // join any qp-folded lines |
| 221 | while (value.endsWith(c: '=')) { |
| 222 | value.chop(n: 1); // remove the '=' |
| 223 | value.append(a: m_fetchAnotherLine()); |
| 224 | } |
| 225 | KCodecs::quotedPrintableDecode(in: value, out&: output); |
| 226 | } else if (m_encoding == "8bit" ) { |
| 227 | output = value; |
| 228 | } else { |
| 229 | qDebug(msg: "Unknown vcard encoding type!" ); |
| 230 | } |
| 231 | } else { |
| 232 | output = value; |
| 233 | } |
| 234 | |
| 235 | if (!m_charset.isEmpty()) { |
| 236 | // have to convert the data |
| 237 | auto codec = QStringDecoder(m_charset.constData()); |
| 238 | if (codec.isValid()) { |
| 239 | vCardLine->setValue(QVariant::fromValue<QString>(value: codec.decode(ba: output))); |
| 240 | } else { |
| 241 | vCardLine->setValue(QString::fromUtf8(ba: output)); |
| 242 | } |
| 243 | } else if (wasBase64Encoded) { |
| 244 | vCardLine->setValue(output); |
| 245 | } else { |
| 246 | vCardLine->setValue(QString::fromUtf8(ba: output)); |
| 247 | } |
| 248 | } |
| 249 | |
| 250 | //// |
| 251 | |
| 252 | VCardParser::VCardParser() |
| 253 | { |
| 254 | } |
| 255 | |
| 256 | VCardParser::~VCardParser() |
| 257 | { |
| 258 | } |
| 259 | |
| 260 | VCard::List VCardParser::parseVCards(const QByteArray &text) |
| 261 | { |
| 262 | VCard currentVCard; |
| 263 | VCard::List vCardList; |
| 264 | QByteArray currentLine; |
| 265 | |
| 266 | int lineStart = 0; |
| 267 | int lineEnd = text.indexOf(ch: '\n'); |
| 268 | |
| 269 | bool inVCard = false; |
| 270 | |
| 271 | StringCache cache; |
| 272 | for (; lineStart != text.size() + 1; |
| 273 | lineStart = lineEnd + 1, lineEnd = (text.indexOf(ch: '\n', from: lineStart) == -1) ? text.size() : text.indexOf(ch: '\n', from: lineStart)) { |
| 274 | QByteArray cur = text.mid(index: lineStart, len: lineEnd - lineStart); |
| 275 | // remove the trailing \r, left from \r\n |
| 276 | if (cur.endsWith(c: '\r')) { |
| 277 | cur.chop(n: 1); |
| 278 | } |
| 279 | |
| 280 | if (cur.startsWith(c: ' ') // |
| 281 | || cur.startsWith(c: '\t')) { // folded line => append to previous |
| 282 | currentLine.append(a: cur.mid(index: 1)); |
| 283 | continue; |
| 284 | } else { |
| 285 | if (cur.trimmed().isEmpty()) { // empty line |
| 286 | continue; |
| 287 | } |
| 288 | if (inVCard && !currentLine.isEmpty()) { // now parse the line |
| 289 | VCardLine vCardLine; |
| 290 | |
| 291 | // Provide a way for the parseVCardLine function to read more lines (for quoted-printable support) |
| 292 | auto fetchAnotherLine = [&text, &lineStart, &lineEnd, &cur]() -> QByteArray { |
| 293 | const QByteArray ret = cur; |
| 294 | lineStart = lineEnd + 1; |
| 295 | lineEnd = text.indexOf(ch: '\n', from: lineStart); |
| 296 | if (lineEnd != -1) { |
| 297 | cur = text.mid(index: lineStart, len: lineEnd - lineStart); |
| 298 | // remove the trailing \r, left from \r\n |
| 299 | if (cur.endsWith(c: '\r')) { |
| 300 | cur.chop(n: 1); |
| 301 | } |
| 302 | } |
| 303 | return ret; |
| 304 | }; |
| 305 | |
| 306 | VCardLineParser lineParser(cache, fetchAnotherLine); |
| 307 | |
| 308 | lineParser.parseLine(currentLine, vCardLine: &vCardLine); |
| 309 | |
| 310 | currentVCard.addLine(line: vCardLine); |
| 311 | } |
| 312 | |
| 313 | // we do not save the start and end tag as vcardline |
| 314 | if (qstrnicmp(cur.constData(), "begin:vcard" , len: 11) == 0) { |
| 315 | inVCard = true; |
| 316 | currentLine.clear(); |
| 317 | currentVCard.clear(); // flush vcard |
| 318 | continue; |
| 319 | } |
| 320 | |
| 321 | if (qstrnicmp(cur.constData(), "end:vcard" , len: 9) == 0) { |
| 322 | inVCard = false; |
| 323 | vCardList.append(t: currentVCard); |
| 324 | currentLine.clear(); |
| 325 | currentVCard.clear(); // flush vcard |
| 326 | continue; |
| 327 | } |
| 328 | |
| 329 | currentLine = cur; |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | return vCardList; |
| 334 | } |
| 335 | |
| 336 | static const int FOLD_WIDTH = 75; |
| 337 | |
| 338 | QByteArray VCardParser::createVCards(const VCard::List &list) |
| 339 | { |
| 340 | QByteArray text; |
| 341 | QByteArray textLine; |
| 342 | QString encodingType; |
| 343 | QStringList params; |
| 344 | QStringList values; |
| 345 | |
| 346 | VCardLine::List lines; |
| 347 | |
| 348 | bool hasEncoding; |
| 349 | |
| 350 | text.reserve(asize: list.size() * 300); // reserve memory to be more efficient |
| 351 | |
| 352 | // iterate over the cards |
| 353 | for (const VCard &card : list) { |
| 354 | text.append(s: "BEGIN:VCARD\r\n" ); |
| 355 | |
| 356 | QStringList idents = card.identifiers(); |
| 357 | // VERSION must be first |
| 358 | if (idents.contains(str: QLatin1String("VERSION" ))) { |
| 359 | const QString str = idents.takeAt(i: idents.indexOf(needle: QLatin1String("VERSION" ))); |
| 360 | idents.prepend(t: str); |
| 361 | } |
| 362 | |
| 363 | for (const auto &id : std::as_const(t&: idents)) { |
| 364 | lines = card.lines(identifier: id); |
| 365 | |
| 366 | // iterate over the lines |
| 367 | for (const VCardLine &vline : std::as_const(t&: lines)) { |
| 368 | QVariant val = vline.value(); |
| 369 | if (val.isValid()) { |
| 370 | if (vline.hasGroup()) { |
| 371 | textLine = vline.group().toLatin1() + '.' + vline.identifier().toLatin1(); |
| 372 | } else { |
| 373 | textLine = vline.identifier().toLatin1(); |
| 374 | } |
| 375 | |
| 376 | params = vline.parameterList(); |
| 377 | hasEncoding = false; |
| 378 | if (!params.isEmpty()) { // we have parameters |
| 379 | for (const QString ¶m : std::as_const(t&: params)) { |
| 380 | if (param == QLatin1String("encoding" )) { |
| 381 | hasEncoding = true; |
| 382 | encodingType = vline.parameter(QStringLiteral("encoding" )).toLower(); |
| 383 | } |
| 384 | |
| 385 | values = vline.parameters(param); |
| 386 | for (const QString &str : std::as_const(t&: values)) { |
| 387 | textLine.append(a: ';' + param.toLatin1().toUpper()); |
| 388 | if (!str.isEmpty()) { |
| 389 | textLine.append(a: '=' + str.toLatin1()); |
| 390 | } |
| 391 | } |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | QByteArray input; |
| 396 | QByteArray output; |
| 397 | bool checkMultibyte = false; // avoid splitting a multibyte character |
| 398 | |
| 399 | // handle charset |
| 400 | const QString charset = vline.parameter(QStringLiteral("charset" )); |
| 401 | if (!charset.isEmpty()) { |
| 402 | // have to convert the data |
| 403 | const QString value = vline.value().toString(); |
| 404 | auto codec = QStringEncoder(charset.toLatin1().constData()); |
| 405 | if (codec.isValid()) { |
| 406 | input = codec.encode(str: value); |
| 407 | } else { |
| 408 | checkMultibyte = true; |
| 409 | input = value.toUtf8(); |
| 410 | } |
| 411 | } else if (vline.value().userType() == QMetaType::QByteArray) { |
| 412 | input = vline.value().toByteArray(); |
| 413 | } else { |
| 414 | checkMultibyte = true; |
| 415 | input = vline.value().toString().toUtf8(); |
| 416 | } |
| 417 | |
| 418 | // handle encoding |
| 419 | if (hasEncoding) { // have to encode the data |
| 420 | if (encodingType == QLatin1Char('b')) { |
| 421 | checkMultibyte = false; |
| 422 | output = input.toBase64(); |
| 423 | } else if (encodingType == QLatin1String("quoted-printable" )) { |
| 424 | checkMultibyte = false; |
| 425 | KCodecs::quotedPrintableEncode(in: input, out&: output, useCRLF: false); |
| 426 | } |
| 427 | } else { |
| 428 | output = input; |
| 429 | } |
| 430 | addEscapes(str&: output, excludeEscapedComma: (vline.identifier() == QLatin1String("CATEGORIES" ) || vline.identifier() == QLatin1String("GEO" ))); |
| 431 | |
| 432 | if (!output.isEmpty()) { |
| 433 | textLine.append(a: ':' + output); |
| 434 | |
| 435 | if (textLine.length() > FOLD_WIDTH) { // we have to fold the line |
| 436 | if (checkMultibyte) { |
| 437 | // RFC 6350: Multi-octet characters MUST remain contiguous. |
| 438 | // we know that textLine contains UTF-8 encoded characters |
| 439 | int lineLength = 0; |
| 440 | for (int i = 0; i < textLine.length(); ++i) { |
| 441 | if ((textLine[i] & 0xC0) == 0xC0) { // a multibyte sequence follows |
| 442 | int sequenceLength = 2; |
| 443 | if ((textLine[i] & 0xE0) == 0xE0) { |
| 444 | sequenceLength = 3; |
| 445 | } else if ((textLine[i] & 0xF0) == 0xF0) { |
| 446 | sequenceLength = 4; |
| 447 | } |
| 448 | if ((lineLength + sequenceLength) > FOLD_WIDTH) { |
| 449 | // the current line would be too long. fold it |
| 450 | text += "\r\n " + textLine.mid(index: i, len: sequenceLength); |
| 451 | lineLength = 1 + sequenceLength; // incl. leading space |
| 452 | } else { |
| 453 | text += textLine.mid(index: i, len: sequenceLength); |
| 454 | lineLength += sequenceLength; |
| 455 | } |
| 456 | i += sequenceLength - 1; |
| 457 | } else { |
| 458 | text += textLine[i]; |
| 459 | ++lineLength; |
| 460 | } |
| 461 | if ((lineLength == FOLD_WIDTH) && (i < (textLine.length() - 1))) { |
| 462 | text += "\r\n " ; |
| 463 | lineLength = 1; // leading space |
| 464 | } |
| 465 | } |
| 466 | text += "\r\n" ; |
| 467 | } else { |
| 468 | for (int i = 0; i <= (textLine.length() / FOLD_WIDTH); ++i) { |
| 469 | text.append(a: (i == 0 ? "" : " " ) + textLine.mid(index: i * FOLD_WIDTH, len: FOLD_WIDTH) + "\r\n" ); |
| 470 | } |
| 471 | } |
| 472 | } else { |
| 473 | text.append(a: textLine); |
| 474 | text.append(s: "\r\n" ); |
| 475 | } |
| 476 | } |
| 477 | } |
| 478 | } |
| 479 | } |
| 480 | |
| 481 | text.append(s: "END:VCARD\r\n" ); |
| 482 | text.append(s: "\r\n" ); |
| 483 | } |
| 484 | |
| 485 | return text; |
| 486 | } |
| 487 | |