1 | /* |
2 | SPDX-FileCopyrightText: 2004 Matt Douhan <matt@fruitsalad.org> |
3 | |
4 | SPDX-License-Identifier: LGPL-2.0-or-later |
5 | */ |
6 | |
7 | #include "kemailaddress.h" |
8 | #include "kcodecs.h" |
9 | #include "kcodecs_debug.h" |
10 | |
11 | #include <QRegularExpression> |
12 | |
13 | using namespace KEmailAddress; |
14 | |
15 | //----------------------------------------------------------------------------- |
16 | QStringList KEmailAddress::splitAddressList(const QString &aStr) |
17 | { |
18 | // Features: |
19 | // - always ignores quoted characters |
20 | // - ignores everything (including parentheses and commas) |
21 | // inside quoted strings |
22 | // - supports nested comments |
23 | // - ignores everything (including double quotes and commas) |
24 | // inside comments |
25 | |
26 | QStringList list; |
27 | |
28 | if (aStr.isEmpty()) { |
29 | return list; |
30 | } |
31 | |
32 | QString addr; |
33 | uint addrstart = 0; |
34 | int = 0; |
35 | bool insidequote = false; |
36 | |
37 | for (int index = 0; index < aStr.length(); index++) { |
38 | // the following conversion to latin1 is o.k. because |
39 | // we can safely ignore all non-latin1 characters |
40 | switch (aStr[index].toLatin1()) { |
41 | case '"': // start or end of quoted string |
42 | if (commentlevel == 0) { |
43 | insidequote = !insidequote; |
44 | } |
45 | break; |
46 | case '(': // start of comment |
47 | if (!insidequote) { |
48 | ++commentlevel; |
49 | } |
50 | break; |
51 | case ')': // end of comment |
52 | if (!insidequote) { |
53 | if (commentlevel > 0) { |
54 | --commentlevel; |
55 | } else { |
56 | return list; |
57 | } |
58 | } |
59 | break; |
60 | case '\\': // quoted character |
61 | index++; // ignore the quoted character |
62 | break; |
63 | case ',': |
64 | case ';': |
65 | if (!insidequote && (commentlevel == 0)) { |
66 | addr = aStr.mid(position: addrstart, n: index - addrstart); |
67 | if (!addr.isEmpty()) { |
68 | list += addr.trimmed(); |
69 | } |
70 | addrstart = index + 1; |
71 | } |
72 | break; |
73 | } |
74 | } |
75 | // append the last address to the list |
76 | if (!insidequote && (commentlevel == 0)) { |
77 | addr = aStr.mid(position: addrstart, n: aStr.length() - addrstart); |
78 | if (!addr.isEmpty()) { |
79 | list += addr.trimmed(); |
80 | } |
81 | } |
82 | |
83 | return list; |
84 | } |
85 | |
86 | //----------------------------------------------------------------------------- |
87 | // Used by KEmailAddress::splitAddress(...) and KEmailAddress::firstEmailAddress(...). |
88 | KEmailAddress::EmailParseResult |
89 | splitAddressInternal(const QByteArray &address, QByteArray &displayName, QByteArray &addrSpec, QByteArray &, bool allowMultipleAddresses) |
90 | { |
91 | // qCDebug(KCODECS_LOG) << "address"; |
92 | // 110 is at time of writing a step in qbytearrays growth curve, and it is |
93 | // unlikely that displayname nor addrSpec will be larger than that |
94 | // but neither addrSpec nor displayName will be larger than the input. |
95 | displayName.clear(); |
96 | displayName.reserve(asize: std::min<qsizetype>(a: 110, b: address.size())); |
97 | addrSpec.clear(); |
98 | addrSpec.reserve(asize: std::min<qsizetype>(a: 110, b: address.size())); |
99 | comment.clear(); // comments are uncommon, so no need to reserve up front |
100 | |
101 | if (address.isEmpty()) { |
102 | return AddressEmpty; |
103 | } |
104 | |
105 | // The following is a primitive parser for a mailbox-list (cf. RFC 2822). |
106 | // The purpose is to extract a displayable string from the mailboxes. |
107 | // Comments in the addr-spec are not handled. No error checking is done. |
108 | |
109 | enum { |
110 | TopLevel, |
111 | , |
112 | InAngleAddress, |
113 | } context = TopLevel; |
114 | bool inQuotedString = false; |
115 | int = 0; |
116 | bool stop = false; |
117 | |
118 | for (const char *p = address.data(); *p && !stop; ++p) { |
119 | switch (context) { |
120 | case TopLevel: { |
121 | switch (*p) { |
122 | case '"': |
123 | inQuotedString = !inQuotedString; |
124 | displayName += *p; |
125 | break; |
126 | case '(': |
127 | if (!inQuotedString) { |
128 | context = InComment; |
129 | commentLevel = 1; |
130 | } else { |
131 | displayName += *p; |
132 | } |
133 | break; |
134 | case '<': |
135 | if (!inQuotedString) { |
136 | context = InAngleAddress; |
137 | } else { |
138 | displayName += *p; |
139 | } |
140 | break; |
141 | case '\\': // quoted character |
142 | displayName += *p; |
143 | ++p; // skip the '\' |
144 | if (*p) { |
145 | displayName += *p; |
146 | } else { |
147 | return UnexpectedEnd; |
148 | } |
149 | break; |
150 | case ',': |
151 | if (!inQuotedString) { |
152 | if (allowMultipleAddresses) { |
153 | stop = true; |
154 | } else { |
155 | return UnexpectedComma; |
156 | } |
157 | } else { |
158 | displayName += *p; |
159 | } |
160 | break; |
161 | default: |
162 | displayName += *p; |
163 | } |
164 | break; |
165 | } |
166 | case InComment: { |
167 | switch (*p) { |
168 | case '(': |
169 | ++commentLevel; |
170 | comment += *p; |
171 | break; |
172 | case ')': |
173 | --commentLevel; |
174 | if (commentLevel == 0) { |
175 | context = TopLevel; |
176 | comment += ' '; // separate the text of several comments |
177 | } else { |
178 | comment += *p; |
179 | } |
180 | break; |
181 | case '\\': // quoted character |
182 | comment += *p; |
183 | ++p; // skip the '\' |
184 | if (*p) { |
185 | comment += *p; |
186 | } else { |
187 | return UnexpectedEnd; |
188 | } |
189 | break; |
190 | default: |
191 | comment += *p; |
192 | } |
193 | break; |
194 | } |
195 | case InAngleAddress: { |
196 | switch (*p) { |
197 | case '"': |
198 | inQuotedString = !inQuotedString; |
199 | addrSpec += *p; |
200 | break; |
201 | case '>': |
202 | if (!inQuotedString) { |
203 | context = TopLevel; |
204 | } else { |
205 | addrSpec += *p; |
206 | } |
207 | break; |
208 | case '\\': // quoted character |
209 | addrSpec += *p; |
210 | ++p; // skip the '\' |
211 | if (*p) { |
212 | addrSpec += *p; |
213 | } else { |
214 | return UnexpectedEnd; |
215 | } |
216 | break; |
217 | default: |
218 | addrSpec += *p; |
219 | } |
220 | break; |
221 | } |
222 | } // switch ( context ) |
223 | } |
224 | // check for errors |
225 | if (inQuotedString) { |
226 | return UnbalancedQuote; |
227 | } |
228 | if (context == InComment) { |
229 | return UnbalancedParens; |
230 | } |
231 | if (context == InAngleAddress) { |
232 | return UnclosedAngleAddr; |
233 | } |
234 | |
235 | displayName = std::move(displayName).trimmed(); |
236 | comment = std::move(comment).trimmed(); |
237 | addrSpec = std::move(addrSpec).trimmed(); |
238 | |
239 | if (addrSpec.isEmpty()) { |
240 | if (displayName.isEmpty()) { |
241 | return NoAddressSpec; |
242 | } else { |
243 | addrSpec = displayName; |
244 | displayName.truncate(pos: 0); |
245 | } |
246 | } |
247 | /* |
248 | qCDebug(KCODECS_LOG) << "display-name : \"" << displayName << "\""; |
249 | qCDebug(KCODECS_LOG) << "comment : \"" << comment << "\""; |
250 | qCDebug(KCODECS_LOG) << "addr-spec : \"" << addrSpec << "\""; |
251 | */ |
252 | return AddressOk; |
253 | } |
254 | |
255 | //----------------------------------------------------------------------------- |
256 | EmailParseResult KEmailAddress::splitAddress(const QByteArray &address, QByteArray &displayName, QByteArray &addrSpec, QByteArray &) |
257 | { |
258 | return splitAddressInternal(address, displayName, addrSpec, comment, allowMultipleAddresses: false /* don't allow multiple addresses */); |
259 | } |
260 | |
261 | //----------------------------------------------------------------------------- |
262 | EmailParseResult KEmailAddress::splitAddress(const QString &address, QString &displayName, QString &addrSpec, QString &) |
263 | { |
264 | QByteArray d; |
265 | QByteArray a; |
266 | QByteArray c; |
267 | // FIXME: toUtf8() is probably not safe here, what if the second byte of a multi-byte character |
268 | // has the same code as one of the ASCII characters that splitAddress uses as delimiters? |
269 | EmailParseResult result = splitAddress(address: address.toUtf8(), displayName&: d, addrSpec&: a, comment&: c); |
270 | |
271 | if (result == AddressOk) { |
272 | displayName = QString::fromUtf8(ba: d); |
273 | addrSpec = QString::fromUtf8(ba: a); |
274 | comment = QString::fromUtf8(ba: c); |
275 | } |
276 | return result; |
277 | } |
278 | |
279 | //----------------------------------------------------------------------------- |
280 | EmailParseResult KEmailAddress::isValidAddress(const QString &aStr) |
281 | { |
282 | // If we are passed an empty string bail right away no need to process |
283 | // further and waste resources |
284 | if (aStr.isEmpty()) { |
285 | return AddressEmpty; |
286 | } |
287 | |
288 | // count how many @'s are in the string that is passed to us |
289 | // if 0 or > 1 take action |
290 | // at this point to many @'s cannot bail out right away since |
291 | // @ is allowed in quotes, so we use a bool to keep track |
292 | // and then make a judgment further down in the parser |
293 | |
294 | bool tooManyAtsFlag = false; |
295 | |
296 | int atCount = aStr.count(c: QLatin1Char('@')); |
297 | if (atCount > 1) { |
298 | tooManyAtsFlag = true; |
299 | } else if (atCount == 0) { |
300 | return TooFewAts; |
301 | } |
302 | |
303 | int dotCount = aStr.count(c: QLatin1Char('.')); |
304 | |
305 | // The main parser, try and catch all weird and wonderful |
306 | // mistakes users and/or machines can create |
307 | |
308 | enum { |
309 | TopLevel, |
310 | , |
311 | InAngleAddress, |
312 | } context = TopLevel; |
313 | bool inQuotedString = false; |
314 | int = 0; |
315 | |
316 | unsigned int strlen = aStr.length(); |
317 | |
318 | for (unsigned int index = 0; index < strlen; index++) { |
319 | switch (context) { |
320 | case TopLevel: { |
321 | switch (aStr[index].toLatin1()) { |
322 | case '"': |
323 | inQuotedString = !inQuotedString; |
324 | break; |
325 | case '(': |
326 | if (!inQuotedString) { |
327 | context = InComment; |
328 | commentLevel = 1; |
329 | } |
330 | break; |
331 | case '[': |
332 | if (!inQuotedString) { |
333 | return InvalidDisplayName; |
334 | } |
335 | break; |
336 | case ']': |
337 | if (!inQuotedString) { |
338 | return InvalidDisplayName; |
339 | } |
340 | break; |
341 | case ':': |
342 | if (!inQuotedString) { |
343 | return DisallowedChar; |
344 | } |
345 | break; |
346 | case '<': |
347 | if (!inQuotedString) { |
348 | context = InAngleAddress; |
349 | } |
350 | break; |
351 | case '\\': // quoted character |
352 | ++index; // skip the '\' |
353 | if ((index + 1) > strlen) { |
354 | return UnexpectedEnd; |
355 | } |
356 | break; |
357 | case ',': |
358 | if (!inQuotedString) { |
359 | return UnexpectedComma; |
360 | } |
361 | break; |
362 | case ')': |
363 | if (!inQuotedString) { |
364 | return UnbalancedParens; |
365 | } |
366 | break; |
367 | case '>': |
368 | if (!inQuotedString) { |
369 | return UnopenedAngleAddr; |
370 | } |
371 | break; |
372 | case '@': |
373 | if (!inQuotedString) { |
374 | if (index == 0) { // Missing local part |
375 | return MissingLocalPart; |
376 | } else if (index == strlen - 1) { |
377 | return MissingDomainPart; |
378 | } |
379 | } else { |
380 | --atCount; |
381 | if (atCount == 1) { |
382 | tooManyAtsFlag = false; |
383 | } |
384 | } |
385 | break; |
386 | case '.': |
387 | if (inQuotedString) { |
388 | --dotCount; |
389 | } |
390 | break; |
391 | } |
392 | break; |
393 | } |
394 | case InComment: { |
395 | switch (aStr[index].toLatin1()) { |
396 | case '(': |
397 | ++commentLevel; |
398 | break; |
399 | case ')': |
400 | --commentLevel; |
401 | if (commentLevel == 0) { |
402 | context = TopLevel; |
403 | } |
404 | break; |
405 | case '\\': // quoted character |
406 | ++index; // skip the '\' |
407 | if ((index + 1) > strlen) { |
408 | return UnexpectedEnd; |
409 | } |
410 | break; |
411 | } |
412 | break; |
413 | } |
414 | |
415 | case InAngleAddress: { |
416 | switch (aStr[index].toLatin1()) { |
417 | case ',': |
418 | if (!inQuotedString) { |
419 | return UnexpectedComma; |
420 | } |
421 | break; |
422 | case '"': |
423 | inQuotedString = !inQuotedString; |
424 | break; |
425 | case '@': |
426 | if (inQuotedString) { |
427 | --atCount; |
428 | } |
429 | if (atCount == 1) { |
430 | tooManyAtsFlag = false; |
431 | } |
432 | break; |
433 | case '.': |
434 | if (inQuotedString) { |
435 | --dotCount; |
436 | } |
437 | break; |
438 | case '>': |
439 | if (!inQuotedString) { |
440 | context = TopLevel; |
441 | break; |
442 | } |
443 | break; |
444 | case '\\': // quoted character |
445 | ++index; // skip the '\' |
446 | if ((index + 1) > strlen) { |
447 | return UnexpectedEnd; |
448 | } |
449 | break; |
450 | } |
451 | break; |
452 | } |
453 | } |
454 | } |
455 | |
456 | if (dotCount == 0 && !inQuotedString) { |
457 | return TooFewDots; |
458 | } |
459 | |
460 | if (atCount == 0 && !inQuotedString) { |
461 | return TooFewAts; |
462 | } |
463 | |
464 | if (inQuotedString) { |
465 | return UnbalancedQuote; |
466 | } |
467 | |
468 | if (context == InComment) { |
469 | return UnbalancedParens; |
470 | } |
471 | |
472 | if (context == InAngleAddress) { |
473 | return UnclosedAngleAddr; |
474 | } |
475 | |
476 | if (tooManyAtsFlag) { |
477 | return TooManyAts; |
478 | } |
479 | |
480 | return AddressOk; |
481 | } |
482 | |
483 | //----------------------------------------------------------------------------- |
484 | KEmailAddress::EmailParseResult KEmailAddress::isValidAddressList(const QString &aStr, QString &badAddr) |
485 | { |
486 | if (aStr.isEmpty()) { |
487 | return AddressEmpty; |
488 | } |
489 | |
490 | const QStringList list = splitAddressList(aStr); |
491 | EmailParseResult errorCode = AddressOk; |
492 | auto it = std::find_if(first: list.cbegin(), last: list.cend(), pred: [&errorCode](const QString &addr) { |
493 | qCDebug(KCODECS_LOG) << " address" << addr; |
494 | errorCode = isValidAddress(aStr: addr); |
495 | return errorCode != AddressOk; |
496 | }); |
497 | if (it != list.cend()) { |
498 | badAddr = *it; |
499 | } |
500 | return errorCode; |
501 | } |
502 | |
503 | //----------------------------------------------------------------------------- |
504 | QString KEmailAddress::emailParseResultToString(EmailParseResult errorCode) |
505 | { |
506 | switch (errorCode) { |
507 | case TooManyAts: |
508 | return QObject::tr( |
509 | s: "The email address you entered is not valid because it " |
510 | "contains more than one @.\n" |
511 | "You will not create valid messages if you do not " |
512 | "change your address." ); |
513 | case TooFewAts: |
514 | return QObject::tr( |
515 | s: "The email address you entered is not valid because it " |
516 | "does not contain a @.\n" |
517 | "You will not create valid messages if you do not " |
518 | "change your address." ); |
519 | case AddressEmpty: |
520 | return QObject::tr(s: "You have to enter something in the email address field." ); |
521 | case MissingLocalPart: |
522 | return QObject::tr( |
523 | s: "The email address you entered is not valid because it " |
524 | "does not contain a local part." ); |
525 | case MissingDomainPart: |
526 | return QObject::tr( |
527 | s: "The email address you entered is not valid because it " |
528 | "does not contain a domain part." ); |
529 | case UnbalancedParens: |
530 | return QObject::tr( |
531 | s: "The email address you entered is not valid because it " |
532 | "contains unclosed comments/brackets." ); |
533 | case AddressOk: |
534 | return QObject::tr(s: "The email address you entered is valid." ); |
535 | case UnclosedAngleAddr: |
536 | return QObject::tr( |
537 | s: "The email address you entered is not valid because it " |
538 | "contains an unclosed angle bracket." ); |
539 | case UnopenedAngleAddr: |
540 | return QObject::tr( |
541 | s: "The email address you entered is not valid because it " |
542 | "contains too many closing angle brackets." ); |
543 | case UnexpectedComma: |
544 | return QObject::tr( |
545 | s: "The email address you have entered is not valid because it " |
546 | "contains an unexpected comma." ); |
547 | case UnexpectedEnd: |
548 | return QObject::tr( |
549 | s: "The email address you entered is not valid because it ended " |
550 | "unexpectedly.\nThis probably means you have used an escaping " |
551 | "type character like a '\\' as the last character in your " |
552 | "email address." ); |
553 | case UnbalancedQuote: |
554 | return QObject::tr( |
555 | s: "The email address you entered is not valid because it " |
556 | "contains quoted text which does not end." ); |
557 | case NoAddressSpec: |
558 | return QObject::tr( |
559 | s: "The email address you entered is not valid because it " |
560 | "does not seem to contain an actual email address, i.e. " |
561 | "something of the form joe@example.org." ); |
562 | case DisallowedChar: |
563 | return QObject::tr( |
564 | s: "The email address you entered is not valid because it " |
565 | "contains an illegal character." ); |
566 | case InvalidDisplayName: |
567 | return QObject::tr( |
568 | s: "The email address you have entered is not valid because it " |
569 | "contains an invalid display name." ); |
570 | case TooFewDots: |
571 | return QObject::tr( |
572 | s: "The email address you entered is not valid because it " |
573 | "does not contain a \'.\'.\n" |
574 | "You will not create valid messages if you do not " |
575 | "change your address." ); |
576 | } |
577 | return QObject::tr(s: "Unknown problem with email address" ); |
578 | } |
579 | |
580 | //----------------------------------------------------------------------------- |
581 | bool KEmailAddress::isValidSimpleAddress(const QString &aStr) |
582 | { |
583 | // If we are passed an empty string bail right away no need to process further |
584 | // and waste resources |
585 | if (aStr.isEmpty()) { |
586 | return false; |
587 | } |
588 | |
589 | int atChar = aStr.lastIndexOf(c: QLatin1Char('@')); |
590 | QString domainPart = aStr.mid(position: atChar + 1); |
591 | QString localPart = aStr.left(n: atChar); |
592 | |
593 | // Both of these parts must be non empty |
594 | // after all we cannot have emails like: |
595 | // @kde.org, or foo@ |
596 | if (localPart.isEmpty() || domainPart.isEmpty()) { |
597 | return false; |
598 | } |
599 | |
600 | bool inQuotedString = false; |
601 | int atCount = localPart.count(c: QLatin1Char('@')); |
602 | |
603 | unsigned int strlen = localPart.length(); |
604 | for (unsigned int index = 0; index < strlen; index++) { |
605 | switch (localPart[index].toLatin1()) { |
606 | case '"': |
607 | inQuotedString = !inQuotedString; |
608 | break; |
609 | case '@': |
610 | if (inQuotedString) { |
611 | --atCount; |
612 | } |
613 | break; |
614 | } |
615 | } |
616 | |
617 | QString addrRx; |
618 | |
619 | if (localPart[0] == QLatin1Char('\"') || localPart[localPart.length() - 1] == QLatin1Char('\"')) { |
620 | addrRx = QStringLiteral("\"[a-zA-Z@]*[\\w.@-]*[a-zA-Z0-9@]\"@" ); |
621 | } else { |
622 | addrRx = QStringLiteral("[a-zA-Z]*[~|{}`\\^?=/+*'&%$#!_\\w.-]*[~|{}`\\^?=/+*'&%$#!_a-zA-Z0-9-]@" ); |
623 | } |
624 | if (domainPart[0] == QLatin1Char('[') || domainPart[domainPart.length() - 1] == QLatin1Char(']')) { |
625 | addrRx += QStringLiteral("\\[[0-9]{1,3}(\\.[0-9]{1,3}){3}\\]" ); |
626 | } else { |
627 | addrRx += QStringLiteral("[\\w#-]+(\\.[\\w#-]+)*" ); |
628 | } |
629 | |
630 | const QRegularExpression rx(QRegularExpression::anchoredPattern(expression: addrRx), QRegularExpression::UseUnicodePropertiesOption); |
631 | return rx.match(subject: aStr).hasMatch(); |
632 | } |
633 | |
634 | //----------------------------------------------------------------------------- |
635 | QString KEmailAddress::simpleEmailAddressErrorMsg() |
636 | { |
637 | return QObject::tr( |
638 | s: "The email address you entered is not valid.\nIt " |
639 | "does not seem to contain an actual email address, i.e. " |
640 | "something of the form joe@example.org." ); |
641 | } |
642 | |
643 | //----------------------------------------------------------------------------- |
644 | QByteArray KEmailAddress::(const QByteArray &address) |
645 | { |
646 | QString errorMessage; |
647 | return extractEmailAddress(address, errorMessage); |
648 | } |
649 | |
650 | QByteArray KEmailAddress::(const QByteArray &address, QString &errorMessage) |
651 | { |
652 | QByteArray dummy1; |
653 | QByteArray dummy2; |
654 | QByteArray addrSpec; |
655 | const EmailParseResult result = splitAddressInternal(address, displayName&: dummy1, addrSpec, comment&: dummy2, allowMultipleAddresses: false /* don't allow multiple addresses */); |
656 | if (result != AddressOk) { |
657 | addrSpec = QByteArray(); |
658 | if (result != AddressEmpty) { |
659 | errorMessage = emailParseResultToString(errorCode: result); |
660 | qCDebug(KCODECS_LOG) << "Input:" << address << "\nError:" << errorMessage; |
661 | } |
662 | } else { |
663 | errorMessage.clear(); |
664 | } |
665 | |
666 | return addrSpec; |
667 | } |
668 | |
669 | //----------------------------------------------------------------------------- |
670 | QString KEmailAddress::(const QString &address) |
671 | { |
672 | QString errorMessage; |
673 | return extractEmailAddress(address, errorMessage); |
674 | } |
675 | |
676 | QString KEmailAddress::(const QString &address, QString &errorMessage) |
677 | { |
678 | return QString::fromUtf8(ba: extractEmailAddress(address: address.toUtf8(), errorMessage)); |
679 | } |
680 | |
681 | //----------------------------------------------------------------------------- |
682 | QByteArray KEmailAddress::firstEmailAddress(const QByteArray &addresses) |
683 | { |
684 | QString errorMessage; |
685 | return firstEmailAddress(addresses, errorMessage); |
686 | } |
687 | |
688 | QByteArray KEmailAddress::firstEmailAddress(const QByteArray &addresses, QString &errorMessage) |
689 | { |
690 | QByteArray dummy1; |
691 | QByteArray dummy2; |
692 | QByteArray addrSpec; |
693 | const EmailParseResult result = splitAddressInternal(address: addresses, displayName&: dummy1, addrSpec, comment&: dummy2, allowMultipleAddresses: true /* allow multiple addresses */); |
694 | if (result != AddressOk) { |
695 | addrSpec = QByteArray(); |
696 | if (result != AddressEmpty) { |
697 | errorMessage = emailParseResultToString(errorCode: result); |
698 | qCDebug(KCODECS_LOG) << "Input: aStr\nError:" << errorMessage; |
699 | } |
700 | } else { |
701 | errorMessage.clear(); |
702 | } |
703 | |
704 | return addrSpec; |
705 | } |
706 | |
707 | //----------------------------------------------------------------------------- |
708 | QString KEmailAddress::firstEmailAddress(const QString &addresses) |
709 | { |
710 | QString errorMessage; |
711 | return firstEmailAddress(addresses, errorMessage); |
712 | } |
713 | |
714 | QString KEmailAddress::firstEmailAddress(const QString &addresses, QString &errorMessage) |
715 | { |
716 | return QString::fromUtf8(ba: firstEmailAddress(addresses: addresses.toUtf8(), errorMessage)); |
717 | } |
718 | |
719 | //----------------------------------------------------------------------------- |
720 | bool KEmailAddress::extractEmailAddressAndName(const QString &aStr, QString &mail, QString &name) |
721 | { |
722 | name.clear(); |
723 | mail.clear(); |
724 | |
725 | const int len = aStr.length(); |
726 | const char cQuotes = '"'; |
727 | |
728 | bool = false; |
729 | bool bInQuotesOutsideOfEmail = false; |
730 | int i = 0; |
731 | int iAd = 0; |
732 | int iMailStart = 0; |
733 | int iMailEnd = 0; |
734 | QChar c; |
735 | unsigned int = 0; |
736 | |
737 | // Find the '@' of the email address |
738 | // skipping all '@' inside "(...)" comments: |
739 | while (i < len) { |
740 | c = aStr[i]; |
741 | if (QLatin1Char('(') == c) { |
742 | ++commentstack; |
743 | } |
744 | if (QLatin1Char(')') == c) { |
745 | --commentstack; |
746 | } |
747 | bInComment = commentstack != 0; |
748 | if (QLatin1Char('"') == c && !bInComment) { |
749 | bInQuotesOutsideOfEmail = !bInQuotesOutsideOfEmail; |
750 | } |
751 | |
752 | if (!bInComment && !bInQuotesOutsideOfEmail) { |
753 | if (QLatin1Char('@') == c) { |
754 | iAd = i; |
755 | break; // found it |
756 | } |
757 | } |
758 | ++i; |
759 | } |
760 | |
761 | if (!iAd) { |
762 | // We suppose the user is typing the string manually and just |
763 | // has not finished typing the mail address part. |
764 | // So we take everything that's left of the '<' as name and the rest as mail |
765 | for (i = 0; len > i; ++i) { |
766 | c = aStr[i]; |
767 | if (QLatin1Char('<') != c) { |
768 | name.append(c); |
769 | } else { |
770 | break; |
771 | } |
772 | } |
773 | mail = aStr.mid(position: i + 1); |
774 | if (mail.endsWith(c: QLatin1Char('>'))) { |
775 | mail.truncate(pos: mail.length() - 1); |
776 | } |
777 | |
778 | } else { |
779 | // Loop backwards until we find the start of the string |
780 | // or a ',' that is outside of a comment |
781 | // and outside of quoted text before the leading '<'. |
782 | bInComment = false; |
783 | bInQuotesOutsideOfEmail = false; |
784 | for (i = iAd - 1; 0 <= i; --i) { |
785 | c = aStr[i]; |
786 | if (bInComment) { |
787 | if (QLatin1Char('(') == c) { |
788 | if (!name.isEmpty()) { |
789 | name.prepend(c: QLatin1Char(' ')); |
790 | } |
791 | bInComment = false; |
792 | } else { |
793 | name.prepend(c); // all comment stuff is part of the name |
794 | } |
795 | } else if (bInQuotesOutsideOfEmail) { |
796 | if (QLatin1Char(cQuotes) == c) { |
797 | bInQuotesOutsideOfEmail = false; |
798 | } else if (c != QLatin1Char('\\')) { |
799 | name.prepend(c); |
800 | } |
801 | } else { |
802 | // found the start of this addressee ? |
803 | if (QLatin1Char(',') == c) { |
804 | break; |
805 | } |
806 | // stuff is before the leading '<' ? |
807 | if (iMailStart) { |
808 | if (QLatin1Char(cQuotes) == c) { |
809 | bInQuotesOutsideOfEmail = true; // end of quoted text found |
810 | } else { |
811 | name.prepend(c); |
812 | } |
813 | } else { |
814 | switch (c.toLatin1()) { |
815 | case '<': |
816 | iMailStart = i; |
817 | break; |
818 | case ')': |
819 | if (!name.isEmpty()) { |
820 | name.prepend(c: QLatin1Char(' ')); |
821 | } |
822 | bInComment = true; |
823 | break; |
824 | default: |
825 | if (QLatin1Char(' ') != c) { |
826 | mail.prepend(c); |
827 | } |
828 | } |
829 | } |
830 | } |
831 | } |
832 | |
833 | name = name.simplified(); |
834 | mail = mail.simplified(); |
835 | |
836 | if (mail.isEmpty()) { |
837 | return false; |
838 | } |
839 | |
840 | mail.append(c: QLatin1Char('@')); |
841 | |
842 | // Loop forward until we find the end of the string |
843 | // or a ',' that is outside of a comment |
844 | // and outside of quoted text behind the trailing '>'. |
845 | bInComment = false; |
846 | bInQuotesOutsideOfEmail = false; |
847 | int parenthesesNesting = 0; |
848 | for (i = iAd + 1; len > i; ++i) { |
849 | c = aStr[i]; |
850 | if (bInComment) { |
851 | if (QLatin1Char(')') == c) { |
852 | if (--parenthesesNesting == 0) { |
853 | bInComment = false; |
854 | if (!name.isEmpty()) { |
855 | name.append(c: QLatin1Char(' ')); |
856 | } |
857 | } else { |
858 | // nested ")", add it |
859 | name.append(c: QLatin1Char(')')); // name can't be empty here |
860 | } |
861 | } else { |
862 | if (QLatin1Char('(') == c) { |
863 | // nested "(" |
864 | ++parenthesesNesting; |
865 | } |
866 | name.append(c); // all comment stuff is part of the name |
867 | } |
868 | } else if (bInQuotesOutsideOfEmail) { |
869 | if (QLatin1Char(cQuotes) == c) { |
870 | bInQuotesOutsideOfEmail = false; |
871 | } else if (c != QLatin1Char('\\')) { |
872 | name.append(c); |
873 | } |
874 | } else { |
875 | // found the end of this addressee ? |
876 | if (QLatin1Char(',') == c) { |
877 | break; |
878 | } |
879 | // stuff is behind the trailing '>' ? |
880 | if (iMailEnd) { |
881 | if (QLatin1Char(cQuotes) == c) { |
882 | bInQuotesOutsideOfEmail = true; // start of quoted text found |
883 | } else { |
884 | name.append(c); |
885 | } |
886 | } else { |
887 | switch (c.toLatin1()) { |
888 | case '>': |
889 | iMailEnd = i; |
890 | break; |
891 | case '(': |
892 | if (!name.isEmpty()) { |
893 | name.append(c: QLatin1Char(' ')); |
894 | } |
895 | if (++parenthesesNesting > 0) { |
896 | bInComment = true; |
897 | } |
898 | break; |
899 | default: |
900 | if (QLatin1Char(' ') != c) { |
901 | mail.append(c); |
902 | } |
903 | } |
904 | } |
905 | } |
906 | } |
907 | } |
908 | |
909 | name = name.simplified(); |
910 | mail = mail.simplified(); |
911 | |
912 | return !(name.isEmpty() || mail.isEmpty()); |
913 | } |
914 | |
915 | //----------------------------------------------------------------------------- |
916 | bool KEmailAddress::compareEmail(const QString &email1, const QString &email2, bool matchName) |
917 | { |
918 | QString e1Name; |
919 | QString e1Email; |
920 | QString e2Name; |
921 | QString e2Email; |
922 | |
923 | extractEmailAddressAndName(aStr: email1, mail&: e1Email, name&: e1Name); |
924 | extractEmailAddressAndName(aStr: email2, mail&: e2Email, name&: e2Name); |
925 | |
926 | return e1Email == e2Email && (!matchName || (e1Name == e2Name)); |
927 | } |
928 | |
929 | //----------------------------------------------------------------------------- |
930 | // Used internally by normalizedAddress() |
931 | QString removeBidiControlChars(const QString &input) |
932 | { |
933 | constexpr QChar LRO(0x202D); |
934 | constexpr QChar RLO(0x202E); |
935 | constexpr QChar LRE(0x202A); |
936 | constexpr QChar RLE(0x202B); |
937 | QString result = input; |
938 | result.remove(c: LRO); |
939 | result.remove(c: RLO); |
940 | result.remove(c: LRE); |
941 | result.remove(c: RLE); |
942 | return result; |
943 | } |
944 | |
945 | QString KEmailAddress::normalizedAddress(const QString &displayName, const QString &addrSpec, const QString &) |
946 | { |
947 | const QString realDisplayName = removeBidiControlChars(input: displayName); |
948 | if (realDisplayName.isEmpty() && comment.isEmpty()) { |
949 | return addrSpec; |
950 | } else if (comment.isEmpty()) { |
951 | if (!realDisplayName.startsWith(c: QLatin1Char('\"'))) { |
952 | return quoteNameIfNecessary(str: realDisplayName) + QLatin1String(" <" ) + addrSpec + QLatin1Char('>'); |
953 | } else { |
954 | return realDisplayName + QLatin1String(" <" ) + addrSpec + QLatin1Char('>'); |
955 | } |
956 | } else if (realDisplayName.isEmpty()) { |
957 | return quoteNameIfNecessary(str: comment) + QLatin1String(" <" ) + addrSpec + QLatin1Char('>'); |
958 | } else { |
959 | return realDisplayName + QLatin1String(" (" ) + comment + QLatin1String(") <" ) + addrSpec + QLatin1Char('>'); |
960 | } |
961 | } |
962 | |
963 | //----------------------------------------------------------------------------- |
964 | QString KEmailAddress::fromIdn(const QString &addrSpec) |
965 | { |
966 | const int atPos = addrSpec.lastIndexOf(c: QLatin1Char('@')); |
967 | if (atPos == -1) { |
968 | return addrSpec; |
969 | } |
970 | |
971 | QString idn = QUrl::fromAce(domain: addrSpec.mid(position: atPos + 1).toLatin1()); |
972 | if (idn.isEmpty()) { |
973 | return QString(); |
974 | } |
975 | |
976 | return addrSpec.left(n: atPos + 1) + idn; |
977 | } |
978 | |
979 | //----------------------------------------------------------------------------- |
980 | QString KEmailAddress::toIdn(const QString &addrSpec) |
981 | { |
982 | const int atPos = addrSpec.lastIndexOf(c: QLatin1Char('@')); |
983 | if (atPos == -1) { |
984 | return addrSpec; |
985 | } |
986 | |
987 | QString idn = QLatin1String(QUrl::toAce(domain: addrSpec.mid(position: atPos + 1))); |
988 | if (idn.isEmpty()) { |
989 | return addrSpec; |
990 | } |
991 | |
992 | return addrSpec.left(n: atPos + 1) + idn; |
993 | } |
994 | |
995 | //----------------------------------------------------------------------------- |
996 | QString KEmailAddress::normalizeAddressesAndDecodeIdn(const QString &str) |
997 | { |
998 | // qCDebug(KCODECS_LOG) << str; |
999 | if (str.isEmpty()) { |
1000 | return str; |
1001 | } |
1002 | |
1003 | const QStringList addressList = splitAddressList(aStr: str); |
1004 | QStringList normalizedAddressList; |
1005 | |
1006 | QByteArray displayName; |
1007 | QByteArray addrSpec; |
1008 | QByteArray ; |
1009 | |
1010 | for (const auto &addr : addressList) { |
1011 | if (!addr.isEmpty()) { |
1012 | if (splitAddress(address: addr.toUtf8(), displayName, addrSpec, comment) == AddressOk) { |
1013 | QByteArray cs; |
1014 | displayName = KCodecs::decodeRFC2047String(src: displayName, usedCS: &cs).toUtf8(); |
1015 | comment = KCodecs::decodeRFC2047String(src: comment, usedCS: &cs).toUtf8(); |
1016 | |
1017 | normalizedAddressList << normalizedAddress(displayName: QString::fromUtf8(ba: displayName), addrSpec: fromIdn(addrSpec: QString::fromUtf8(ba: addrSpec)), comment: QString::fromUtf8(ba: comment)); |
1018 | } |
1019 | } |
1020 | } |
1021 | /* |
1022 | qCDebug(KCODECS_LOG) << "normalizedAddressList: \"" |
1023 | << normalizedAddressList.join( ", " ) |
1024 | << "\""; |
1025 | */ |
1026 | return normalizedAddressList.join(QStringLiteral(", " )); |
1027 | } |
1028 | |
1029 | //----------------------------------------------------------------------------- |
1030 | QString KEmailAddress::normalizeAddressesAndEncodeIdn(const QString &str) |
1031 | { |
1032 | // qCDebug(KCODECS_LOG) << str; |
1033 | if (str.isEmpty()) { |
1034 | return str; |
1035 | } |
1036 | |
1037 | const QStringList addressList = splitAddressList(aStr: str); |
1038 | QStringList normalizedAddressList; |
1039 | |
1040 | QByteArray displayName; |
1041 | QByteArray addrSpec; |
1042 | QByteArray ; |
1043 | |
1044 | for (const auto &addr : addressList) { |
1045 | if (!addr.isEmpty()) { |
1046 | if (splitAddress(address: addr.toUtf8(), displayName, addrSpec, comment) == AddressOk) { |
1047 | normalizedAddressList << normalizedAddress(displayName: QString::fromUtf8(ba: displayName), addrSpec: toIdn(addrSpec: QString::fromUtf8(ba: addrSpec)), comment: QString::fromUtf8(ba: comment)); |
1048 | } |
1049 | } |
1050 | } |
1051 | |
1052 | /* |
1053 | qCDebug(KCODECS_LOG) << "normalizedAddressList: \"" |
1054 | << normalizedAddressList.join( ", " ) |
1055 | << "\""; |
1056 | */ |
1057 | return normalizedAddressList.join(QStringLiteral(", " )); |
1058 | } |
1059 | |
1060 | //----------------------------------------------------------------------------- |
1061 | // Escapes unescaped doublequotes in str. |
1062 | static QString escapeQuotes(const QString &str) |
1063 | { |
1064 | if (str.isEmpty()) { |
1065 | return QString(); |
1066 | } |
1067 | |
1068 | QString escaped; |
1069 | // reserve enough memory for the worst case ( """..."" -> \"\"\"...\"\" ) |
1070 | escaped.reserve(asize: 2 * str.length()); |
1071 | unsigned int len = 0; |
1072 | for (int i = 0, total = str.length(); i < total; ++i, ++len) { |
1073 | const QChar &c = str[i]; |
1074 | if (c == QLatin1Char('"')) { // unescaped doublequote |
1075 | escaped.append(c: QLatin1Char('\\')); |
1076 | ++len; |
1077 | } else if (c == QLatin1Char('\\')) { // escaped character |
1078 | escaped.append(c: QLatin1Char('\\')); |
1079 | ++len; |
1080 | ++i; |
1081 | if (i >= str.length()) { // handle trailing '\' gracefully |
1082 | break; |
1083 | } |
1084 | } |
1085 | // Keep str[i] as we increase i previously |
1086 | escaped.append(c: str[i]); |
1087 | } |
1088 | escaped.truncate(pos: len); |
1089 | return escaped; |
1090 | } |
1091 | |
1092 | //----------------------------------------------------------------------------- |
1093 | QString KEmailAddress::quoteNameIfNecessary(const QString &str) |
1094 | { |
1095 | if (str.isEmpty()) { |
1096 | return str; |
1097 | } |
1098 | QString quoted = str; |
1099 | |
1100 | static const QRegularExpression needQuotes(QStringLiteral("[^ 0-9A-Za-z\\x{0080}-\\x{FFFF}]" )); |
1101 | // avoid double quoting |
1102 | if ((quoted[0] == QLatin1Char('"')) && (quoted[quoted.length() - 1] == QLatin1Char('"'))) { |
1103 | quoted = QLatin1String("\"" ) + escapeQuotes(str: quoted.mid(position: 1, n: quoted.length() - 2)) + QLatin1String("\"" ); |
1104 | } else if (quoted.indexOf(re: needQuotes) != -1) { |
1105 | quoted = QLatin1String("\"" ) + escapeQuotes(str: quoted) + QLatin1String("\"" ); |
1106 | } |
1107 | |
1108 | return quoted; |
1109 | } |
1110 | |
1111 | QUrl KEmailAddress::encodeMailtoUrl(const QString &mailbox) |
1112 | { |
1113 | const QByteArray encodedPath = KCodecs::encodeRFC2047String(src: mailbox, charset: "utf-8" ); |
1114 | QUrl mailtoUrl; |
1115 | mailtoUrl.setScheme(QStringLiteral("mailto" )); |
1116 | mailtoUrl.setPath(path: QLatin1String(encodedPath)); |
1117 | return mailtoUrl; |
1118 | } |
1119 | |
1120 | QString KEmailAddress::decodeMailtoUrl(const QUrl &mailtoUrl) |
1121 | { |
1122 | Q_ASSERT(mailtoUrl.scheme() == QLatin1String("mailto" )); |
1123 | return KCodecs::decodeRFC2047String(text: mailtoUrl.path()); |
1124 | } |
1125 | |