1 | /* |
2 | SPDX-FileCopyrightText: 2004 Matt Douhan <matt@fruitsalad.org> |
3 | |
4 | SPDX-License-Identifier: LGPL-2.0-or-later |
5 | */ |
6 | |
7 | #include "kemailaddress.h" |
8 | #include "kcodecs.h" |
9 | #include "kcodecs_debug.h" |
10 | |
11 | #include <QRegularExpression> |
12 | |
13 | using namespace KEmailAddress; |
14 | |
15 | //----------------------------------------------------------------------------- |
16 | QStringList KEmailAddress::splitAddressList(const QString &aStr) |
17 | { |
18 | // Features: |
19 | // - always ignores quoted characters |
20 | // - ignores everything (including parentheses and commas) |
21 | // inside quoted strings |
22 | // - supports nested comments |
23 | // - ignores everything (including double quotes and commas) |
24 | // inside comments |
25 | |
26 | QStringList list; |
27 | |
28 | if (aStr.isEmpty()) { |
29 | return list; |
30 | } |
31 | |
32 | QString addr; |
33 | uint addrstart = 0; |
34 | int = 0; |
35 | bool insidequote = false; |
36 | |
37 | for (int index = 0; index < aStr.length(); index++) { |
38 | // the following conversion to latin1 is o.k. because |
39 | // we can safely ignore all non-latin1 characters |
40 | switch (aStr[index].toLatin1()) { |
41 | case '"': // start or end of quoted string |
42 | if (commentlevel == 0) { |
43 | insidequote = !insidequote; |
44 | } |
45 | break; |
46 | case '(': // start of comment |
47 | if (!insidequote) { |
48 | ++commentlevel; |
49 | } |
50 | break; |
51 | case ')': // end of comment |
52 | if (!insidequote) { |
53 | if (commentlevel > 0) { |
54 | --commentlevel; |
55 | } else { |
56 | return list; |
57 | } |
58 | } |
59 | break; |
60 | case '\\': // quoted character |
61 | index++; // ignore the quoted character |
62 | break; |
63 | case ',': |
64 | case ';': |
65 | if (!insidequote && (commentlevel == 0)) { |
66 | addr = aStr.mid(position: addrstart, n: index - addrstart); |
67 | if (!addr.isEmpty()) { |
68 | list += addr.trimmed(); |
69 | } |
70 | addrstart = index + 1; |
71 | } |
72 | break; |
73 | } |
74 | } |
75 | // append the last address to the list |
76 | if (!insidequote && (commentlevel == 0)) { |
77 | addr = aStr.mid(position: addrstart, n: aStr.length() - addrstart); |
78 | if (!addr.isEmpty()) { |
79 | list += addr.trimmed(); |
80 | } |
81 | } |
82 | |
83 | return list; |
84 | } |
85 | |
86 | //----------------------------------------------------------------------------- |
87 | // Used by KEmailAddress::splitAddress(...) and KEmailAddress::firstEmailAddress(...). |
88 | KEmailAddress::EmailParseResult |
89 | splitAddressInternal(const QByteArray &address, QByteArray &displayName, QByteArray &addrSpec, QByteArray &, bool allowMultipleAddresses) |
90 | { |
91 | // qCDebug(KCODECS_LOG) << "address"; |
92 | displayName = "" ; |
93 | addrSpec = "" ; |
94 | comment = "" ; |
95 | |
96 | if (address.isEmpty()) { |
97 | return AddressEmpty; |
98 | } |
99 | |
100 | // The following is a primitive parser for a mailbox-list (cf. RFC 2822). |
101 | // The purpose is to extract a displayable string from the mailboxes. |
102 | // Comments in the addr-spec are not handled. No error checking is done. |
103 | |
104 | enum { |
105 | TopLevel, |
106 | , |
107 | InAngleAddress, |
108 | } context = TopLevel; |
109 | bool inQuotedString = false; |
110 | int = 0; |
111 | bool stop = false; |
112 | |
113 | for (const char *p = address.data(); *p && !stop; ++p) { |
114 | switch (context) { |
115 | case TopLevel: { |
116 | switch (*p) { |
117 | case '"': |
118 | inQuotedString = !inQuotedString; |
119 | displayName += *p; |
120 | break; |
121 | case '(': |
122 | if (!inQuotedString) { |
123 | context = InComment; |
124 | commentLevel = 1; |
125 | } else { |
126 | displayName += *p; |
127 | } |
128 | break; |
129 | case '<': |
130 | if (!inQuotedString) { |
131 | context = InAngleAddress; |
132 | } else { |
133 | displayName += *p; |
134 | } |
135 | break; |
136 | case '\\': // quoted character |
137 | displayName += *p; |
138 | ++p; // skip the '\' |
139 | if (*p) { |
140 | displayName += *p; |
141 | } else { |
142 | return UnexpectedEnd; |
143 | } |
144 | break; |
145 | case ',': |
146 | if (!inQuotedString) { |
147 | if (allowMultipleAddresses) { |
148 | stop = true; |
149 | } else { |
150 | return UnexpectedComma; |
151 | } |
152 | } else { |
153 | displayName += *p; |
154 | } |
155 | break; |
156 | default: |
157 | displayName += *p; |
158 | } |
159 | break; |
160 | } |
161 | case InComment: { |
162 | switch (*p) { |
163 | case '(': |
164 | ++commentLevel; |
165 | comment += *p; |
166 | break; |
167 | case ')': |
168 | --commentLevel; |
169 | if (commentLevel == 0) { |
170 | context = TopLevel; |
171 | comment += ' '; // separate the text of several comments |
172 | } else { |
173 | comment += *p; |
174 | } |
175 | break; |
176 | case '\\': // quoted character |
177 | comment += *p; |
178 | ++p; // skip the '\' |
179 | if (*p) { |
180 | comment += *p; |
181 | } else { |
182 | return UnexpectedEnd; |
183 | } |
184 | break; |
185 | default: |
186 | comment += *p; |
187 | } |
188 | break; |
189 | } |
190 | case InAngleAddress: { |
191 | switch (*p) { |
192 | case '"': |
193 | inQuotedString = !inQuotedString; |
194 | addrSpec += *p; |
195 | break; |
196 | case '>': |
197 | if (!inQuotedString) { |
198 | context = TopLevel; |
199 | } else { |
200 | addrSpec += *p; |
201 | } |
202 | break; |
203 | case '\\': // quoted character |
204 | addrSpec += *p; |
205 | ++p; // skip the '\' |
206 | if (*p) { |
207 | addrSpec += *p; |
208 | } else { |
209 | return UnexpectedEnd; |
210 | } |
211 | break; |
212 | default: |
213 | addrSpec += *p; |
214 | } |
215 | break; |
216 | } |
217 | } // switch ( context ) |
218 | } |
219 | // check for errors |
220 | if (inQuotedString) { |
221 | return UnbalancedQuote; |
222 | } |
223 | if (context == InComment) { |
224 | return UnbalancedParens; |
225 | } |
226 | if (context == InAngleAddress) { |
227 | return UnclosedAngleAddr; |
228 | } |
229 | |
230 | displayName = displayName.trimmed(); |
231 | comment = comment.trimmed(); |
232 | addrSpec = addrSpec.trimmed(); |
233 | |
234 | if (addrSpec.isEmpty()) { |
235 | if (displayName.isEmpty()) { |
236 | return NoAddressSpec; |
237 | } else { |
238 | addrSpec = displayName; |
239 | displayName.truncate(pos: 0); |
240 | } |
241 | } |
242 | /* |
243 | qCDebug(KCODECS_LOG) << "display-name : \"" << displayName << "\""; |
244 | qCDebug(KCODECS_LOG) << "comment : \"" << comment << "\""; |
245 | qCDebug(KCODECS_LOG) << "addr-spec : \"" << addrSpec << "\""; |
246 | */ |
247 | return AddressOk; |
248 | } |
249 | |
250 | //----------------------------------------------------------------------------- |
251 | EmailParseResult KEmailAddress::splitAddress(const QByteArray &address, QByteArray &displayName, QByteArray &addrSpec, QByteArray &) |
252 | { |
253 | return splitAddressInternal(address, displayName, addrSpec, comment, allowMultipleAddresses: false /* don't allow multiple addresses */); |
254 | } |
255 | |
256 | //----------------------------------------------------------------------------- |
257 | EmailParseResult KEmailAddress::splitAddress(const QString &address, QString &displayName, QString &addrSpec, QString &) |
258 | { |
259 | QByteArray d; |
260 | QByteArray a; |
261 | QByteArray c; |
262 | // FIXME: toUtf8() is probably not safe here, what if the second byte of a multi-byte character |
263 | // has the same code as one of the ASCII characters that splitAddress uses as delimiters? |
264 | EmailParseResult result = splitAddress(address: address.toUtf8(), displayName&: d, addrSpec&: a, comment&: c); |
265 | |
266 | if (result == AddressOk) { |
267 | displayName = QString::fromUtf8(ba: d); |
268 | addrSpec = QString::fromUtf8(ba: a); |
269 | comment = QString::fromUtf8(ba: c); |
270 | } |
271 | return result; |
272 | } |
273 | |
274 | //----------------------------------------------------------------------------- |
275 | EmailParseResult KEmailAddress::isValidAddress(const QString &aStr) |
276 | { |
277 | // If we are passed an empty string bail right away no need to process |
278 | // further and waste resources |
279 | if (aStr.isEmpty()) { |
280 | return AddressEmpty; |
281 | } |
282 | |
283 | // count how many @'s are in the string that is passed to us |
284 | // if 0 or > 1 take action |
285 | // at this point to many @'s cannot bail out right away since |
286 | // @ is allowed in quotes, so we use a bool to keep track |
287 | // and then make a judgment further down in the parser |
288 | |
289 | bool tooManyAtsFlag = false; |
290 | |
291 | int atCount = aStr.count(c: QLatin1Char('@')); |
292 | if (atCount > 1) { |
293 | tooManyAtsFlag = true; |
294 | } else if (atCount == 0) { |
295 | return TooFewAts; |
296 | } |
297 | |
298 | int dotCount = aStr.count(c: QLatin1Char('.')); |
299 | |
300 | // The main parser, try and catch all weird and wonderful |
301 | // mistakes users and/or machines can create |
302 | |
303 | enum { |
304 | TopLevel, |
305 | , |
306 | InAngleAddress, |
307 | } context = TopLevel; |
308 | bool inQuotedString = false; |
309 | int = 0; |
310 | |
311 | unsigned int strlen = aStr.length(); |
312 | |
313 | for (unsigned int index = 0; index < strlen; index++) { |
314 | switch (context) { |
315 | case TopLevel: { |
316 | switch (aStr[index].toLatin1()) { |
317 | case '"': |
318 | inQuotedString = !inQuotedString; |
319 | break; |
320 | case '(': |
321 | if (!inQuotedString) { |
322 | context = InComment; |
323 | commentLevel = 1; |
324 | } |
325 | break; |
326 | case '[': |
327 | if (!inQuotedString) { |
328 | return InvalidDisplayName; |
329 | } |
330 | break; |
331 | case ']': |
332 | if (!inQuotedString) { |
333 | return InvalidDisplayName; |
334 | } |
335 | break; |
336 | case ':': |
337 | if (!inQuotedString) { |
338 | return DisallowedChar; |
339 | } |
340 | break; |
341 | case '<': |
342 | if (!inQuotedString) { |
343 | context = InAngleAddress; |
344 | } |
345 | break; |
346 | case '\\': // quoted character |
347 | ++index; // skip the '\' |
348 | if ((index + 1) > strlen) { |
349 | return UnexpectedEnd; |
350 | } |
351 | break; |
352 | case ',': |
353 | if (!inQuotedString) { |
354 | return UnexpectedComma; |
355 | } |
356 | break; |
357 | case ')': |
358 | if (!inQuotedString) { |
359 | return UnbalancedParens; |
360 | } |
361 | break; |
362 | case '>': |
363 | if (!inQuotedString) { |
364 | return UnopenedAngleAddr; |
365 | } |
366 | break; |
367 | case '@': |
368 | if (!inQuotedString) { |
369 | if (index == 0) { // Missing local part |
370 | return MissingLocalPart; |
371 | } else if (index == strlen - 1) { |
372 | return MissingDomainPart; |
373 | } |
374 | } else { |
375 | --atCount; |
376 | if (atCount == 1) { |
377 | tooManyAtsFlag = false; |
378 | } |
379 | } |
380 | break; |
381 | case '.': |
382 | if (inQuotedString) { |
383 | --dotCount; |
384 | } |
385 | break; |
386 | } |
387 | break; |
388 | } |
389 | case InComment: { |
390 | switch (aStr[index].toLatin1()) { |
391 | case '(': |
392 | ++commentLevel; |
393 | break; |
394 | case ')': |
395 | --commentLevel; |
396 | if (commentLevel == 0) { |
397 | context = TopLevel; |
398 | } |
399 | break; |
400 | case '\\': // quoted character |
401 | ++index; // skip the '\' |
402 | if ((index + 1) > strlen) { |
403 | return UnexpectedEnd; |
404 | } |
405 | break; |
406 | } |
407 | break; |
408 | } |
409 | |
410 | case InAngleAddress: { |
411 | switch (aStr[index].toLatin1()) { |
412 | case ',': |
413 | if (!inQuotedString) { |
414 | return UnexpectedComma; |
415 | } |
416 | break; |
417 | case '"': |
418 | inQuotedString = !inQuotedString; |
419 | break; |
420 | case '@': |
421 | if (inQuotedString) { |
422 | --atCount; |
423 | } |
424 | if (atCount == 1) { |
425 | tooManyAtsFlag = false; |
426 | } |
427 | break; |
428 | case '.': |
429 | if (inQuotedString) { |
430 | --dotCount; |
431 | } |
432 | break; |
433 | case '>': |
434 | if (!inQuotedString) { |
435 | context = TopLevel; |
436 | break; |
437 | } |
438 | break; |
439 | case '\\': // quoted character |
440 | ++index; // skip the '\' |
441 | if ((index + 1) > strlen) { |
442 | return UnexpectedEnd; |
443 | } |
444 | break; |
445 | } |
446 | break; |
447 | } |
448 | } |
449 | } |
450 | |
451 | if (dotCount == 0 && !inQuotedString) { |
452 | return TooFewDots; |
453 | } |
454 | |
455 | if (atCount == 0 && !inQuotedString) { |
456 | return TooFewAts; |
457 | } |
458 | |
459 | if (inQuotedString) { |
460 | return UnbalancedQuote; |
461 | } |
462 | |
463 | if (context == InComment) { |
464 | return UnbalancedParens; |
465 | } |
466 | |
467 | if (context == InAngleAddress) { |
468 | return UnclosedAngleAddr; |
469 | } |
470 | |
471 | if (tooManyAtsFlag) { |
472 | return TooManyAts; |
473 | } |
474 | |
475 | return AddressOk; |
476 | } |
477 | |
478 | //----------------------------------------------------------------------------- |
479 | KEmailAddress::EmailParseResult KEmailAddress::isValidAddressList(const QString &aStr, QString &badAddr) |
480 | { |
481 | if (aStr.isEmpty()) { |
482 | return AddressEmpty; |
483 | } |
484 | |
485 | const QStringList list = splitAddressList(aStr); |
486 | EmailParseResult errorCode = AddressOk; |
487 | auto it = std::find_if(first: list.cbegin(), last: list.cend(), pred: [&errorCode](const QString &addr) { |
488 | qCDebug(KCODECS_LOG) << " address" << addr; |
489 | errorCode = isValidAddress(aStr: addr); |
490 | return errorCode != AddressOk; |
491 | }); |
492 | if (it != list.cend()) { |
493 | badAddr = *it; |
494 | } |
495 | return errorCode; |
496 | } |
497 | |
498 | //----------------------------------------------------------------------------- |
499 | QString KEmailAddress::emailParseResultToString(EmailParseResult errorCode) |
500 | { |
501 | switch (errorCode) { |
502 | case TooManyAts: |
503 | return QObject::tr( |
504 | s: "The email address you entered is not valid because it " |
505 | "contains more than one @.\n" |
506 | "You will not create valid messages if you do not " |
507 | "change your address." ); |
508 | case TooFewAts: |
509 | return QObject::tr( |
510 | s: "The email address you entered is not valid because it " |
511 | "does not contain a @.\n" |
512 | "You will not create valid messages if you do not " |
513 | "change your address." ); |
514 | case AddressEmpty: |
515 | return QObject::tr(s: "You have to enter something in the email address field." ); |
516 | case MissingLocalPart: |
517 | return QObject::tr( |
518 | s: "The email address you entered is not valid because it " |
519 | "does not contain a local part." ); |
520 | case MissingDomainPart: |
521 | return QObject::tr( |
522 | s: "The email address you entered is not valid because it " |
523 | "does not contain a domain part." ); |
524 | case UnbalancedParens: |
525 | return QObject::tr( |
526 | s: "The email address you entered is not valid because it " |
527 | "contains unclosed comments/brackets." ); |
528 | case AddressOk: |
529 | return QObject::tr(s: "The email address you entered is valid." ); |
530 | case UnclosedAngleAddr: |
531 | return QObject::tr( |
532 | s: "The email address you entered is not valid because it " |
533 | "contains an unclosed angle bracket." ); |
534 | case UnopenedAngleAddr: |
535 | return QObject::tr( |
536 | s: "The email address you entered is not valid because it " |
537 | "contains too many closing angle brackets." ); |
538 | case UnexpectedComma: |
539 | return QObject::tr( |
540 | s: "The email address you have entered is not valid because it " |
541 | "contains an unexpected comma." ); |
542 | case UnexpectedEnd: |
543 | return QObject::tr( |
544 | s: "The email address you entered is not valid because it ended " |
545 | "unexpectedly.\nThis probably means you have used an escaping " |
546 | "type character like a '\\' as the last character in your " |
547 | "email address." ); |
548 | case UnbalancedQuote: |
549 | return QObject::tr( |
550 | s: "The email address you entered is not valid because it " |
551 | "contains quoted text which does not end." ); |
552 | case NoAddressSpec: |
553 | return QObject::tr( |
554 | s: "The email address you entered is not valid because it " |
555 | "does not seem to contain an actual email address, i.e. " |
556 | "something of the form joe@example.org." ); |
557 | case DisallowedChar: |
558 | return QObject::tr( |
559 | s: "The email address you entered is not valid because it " |
560 | "contains an illegal character." ); |
561 | case InvalidDisplayName: |
562 | return QObject::tr( |
563 | s: "The email address you have entered is not valid because it " |
564 | "contains an invalid display name." ); |
565 | case TooFewDots: |
566 | return QObject::tr( |
567 | s: "The email address you entered is not valid because it " |
568 | "does not contain a \'.\'.\n" |
569 | "You will not create valid messages if you do not " |
570 | "change your address." ); |
571 | } |
572 | return QObject::tr(s: "Unknown problem with email address" ); |
573 | } |
574 | |
575 | //----------------------------------------------------------------------------- |
576 | bool KEmailAddress::isValidSimpleAddress(const QString &aStr) |
577 | { |
578 | // If we are passed an empty string bail right away no need to process further |
579 | // and waste resources |
580 | if (aStr.isEmpty()) { |
581 | return false; |
582 | } |
583 | |
584 | int atChar = aStr.lastIndexOf(c: QLatin1Char('@')); |
585 | QString domainPart = aStr.mid(position: atChar + 1); |
586 | QString localPart = aStr.left(n: atChar); |
587 | |
588 | // Both of these parts must be non empty |
589 | // after all we cannot have emails like: |
590 | // @kde.org, or foo@ |
591 | if (localPart.isEmpty() || domainPart.isEmpty()) { |
592 | return false; |
593 | } |
594 | |
595 | bool inQuotedString = false; |
596 | int atCount = localPart.count(c: QLatin1Char('@')); |
597 | |
598 | unsigned int strlen = localPart.length(); |
599 | for (unsigned int index = 0; index < strlen; index++) { |
600 | switch (localPart[index].toLatin1()) { |
601 | case '"': |
602 | inQuotedString = !inQuotedString; |
603 | break; |
604 | case '@': |
605 | if (inQuotedString) { |
606 | --atCount; |
607 | } |
608 | break; |
609 | } |
610 | } |
611 | |
612 | QString addrRx; |
613 | |
614 | if (localPart[0] == QLatin1Char('\"') || localPart[localPart.length() - 1] == QLatin1Char('\"')) { |
615 | addrRx = QStringLiteral("\"[a-zA-Z@]*[\\w.@-]*[a-zA-Z0-9@]\"@" ); |
616 | } else { |
617 | addrRx = QStringLiteral("[a-zA-Z]*[~|{}`\\^?=/+*'&%$#!_\\w.-]*[~|{}`\\^?=/+*'&%$#!_a-zA-Z0-9-]@" ); |
618 | } |
619 | if (domainPart[0] == QLatin1Char('[') || domainPart[domainPart.length() - 1] == QLatin1Char(']')) { |
620 | addrRx += QStringLiteral("\\[[0-9]{1,3}(\\.[0-9]{1,3}){3}\\]" ); |
621 | } else { |
622 | addrRx += QStringLiteral("[\\w#-]+(\\.[\\w#-]+)*" ); |
623 | } |
624 | |
625 | const QRegularExpression rx(QRegularExpression::anchoredPattern(expression: addrRx), QRegularExpression::UseUnicodePropertiesOption); |
626 | return rx.match(subject: aStr).hasMatch(); |
627 | } |
628 | |
629 | //----------------------------------------------------------------------------- |
630 | QString KEmailAddress::simpleEmailAddressErrorMsg() |
631 | { |
632 | return QObject::tr( |
633 | s: "The email address you entered is not valid.\nIt " |
634 | "does not seem to contain an actual email address, i.e. " |
635 | "something of the form joe@example.org." ); |
636 | } |
637 | |
638 | //----------------------------------------------------------------------------- |
639 | QByteArray KEmailAddress::(const QByteArray &address) |
640 | { |
641 | QString errorMessage; |
642 | return extractEmailAddress(address, errorMessage); |
643 | } |
644 | |
645 | QByteArray KEmailAddress::(const QByteArray &address, QString &errorMessage) |
646 | { |
647 | QByteArray dummy1; |
648 | QByteArray dummy2; |
649 | QByteArray addrSpec; |
650 | const EmailParseResult result = splitAddressInternal(address, displayName&: dummy1, addrSpec, comment&: dummy2, allowMultipleAddresses: false /* don't allow multiple addresses */); |
651 | if (result != AddressOk) { |
652 | addrSpec = QByteArray(); |
653 | if (result != AddressEmpty) { |
654 | errorMessage = emailParseResultToString(errorCode: result); |
655 | qCDebug(KCODECS_LOG) << "Input:" << address << "\nError:" << errorMessage; |
656 | } |
657 | } else { |
658 | errorMessage.clear(); |
659 | } |
660 | |
661 | return addrSpec; |
662 | } |
663 | |
664 | //----------------------------------------------------------------------------- |
665 | QString KEmailAddress::(const QString &address) |
666 | { |
667 | QString errorMessage; |
668 | return extractEmailAddress(address, errorMessage); |
669 | } |
670 | |
671 | QString KEmailAddress::(const QString &address, QString &errorMessage) |
672 | { |
673 | return QString::fromUtf8(ba: extractEmailAddress(address: address.toUtf8(), errorMessage)); |
674 | } |
675 | |
676 | //----------------------------------------------------------------------------- |
677 | QByteArray KEmailAddress::firstEmailAddress(const QByteArray &addresses) |
678 | { |
679 | QString errorMessage; |
680 | return firstEmailAddress(addresses, errorMessage); |
681 | } |
682 | |
683 | QByteArray KEmailAddress::firstEmailAddress(const QByteArray &addresses, QString &errorMessage) |
684 | { |
685 | QByteArray dummy1; |
686 | QByteArray dummy2; |
687 | QByteArray addrSpec; |
688 | const EmailParseResult result = splitAddressInternal(address: addresses, displayName&: dummy1, addrSpec, comment&: dummy2, allowMultipleAddresses: true /* allow multiple addresses */); |
689 | if (result != AddressOk) { |
690 | addrSpec = QByteArray(); |
691 | if (result != AddressEmpty) { |
692 | errorMessage = emailParseResultToString(errorCode: result); |
693 | qCDebug(KCODECS_LOG) << "Input: aStr\nError:" << errorMessage; |
694 | } |
695 | } else { |
696 | errorMessage.clear(); |
697 | } |
698 | |
699 | return addrSpec; |
700 | } |
701 | |
702 | //----------------------------------------------------------------------------- |
703 | QString KEmailAddress::firstEmailAddress(const QString &addresses) |
704 | { |
705 | QString errorMessage; |
706 | return firstEmailAddress(addresses, errorMessage); |
707 | } |
708 | |
709 | QString KEmailAddress::firstEmailAddress(const QString &addresses, QString &errorMessage) |
710 | { |
711 | return QString::fromUtf8(ba: firstEmailAddress(addresses: addresses.toUtf8(), errorMessage)); |
712 | } |
713 | |
714 | //----------------------------------------------------------------------------- |
715 | bool KEmailAddress::extractEmailAddressAndName(const QString &aStr, QString &mail, QString &name) |
716 | { |
717 | name.clear(); |
718 | mail.clear(); |
719 | |
720 | const int len = aStr.length(); |
721 | const char cQuotes = '"'; |
722 | |
723 | bool = false; |
724 | bool bInQuotesOutsideOfEmail = false; |
725 | int i = 0; |
726 | int iAd = 0; |
727 | int iMailStart = 0; |
728 | int iMailEnd = 0; |
729 | QChar c; |
730 | unsigned int = 0; |
731 | |
732 | // Find the '@' of the email address |
733 | // skipping all '@' inside "(...)" comments: |
734 | while (i < len) { |
735 | c = aStr[i]; |
736 | if (QLatin1Char('(') == c) { |
737 | ++commentstack; |
738 | } |
739 | if (QLatin1Char(')') == c) { |
740 | --commentstack; |
741 | } |
742 | bInComment = commentstack != 0; |
743 | if (QLatin1Char('"') == c && !bInComment) { |
744 | bInQuotesOutsideOfEmail = !bInQuotesOutsideOfEmail; |
745 | } |
746 | |
747 | if (!bInComment && !bInQuotesOutsideOfEmail) { |
748 | if (QLatin1Char('@') == c) { |
749 | iAd = i; |
750 | break; // found it |
751 | } |
752 | } |
753 | ++i; |
754 | } |
755 | |
756 | if (!iAd) { |
757 | // We suppose the user is typing the string manually and just |
758 | // has not finished typing the mail address part. |
759 | // So we take everything that's left of the '<' as name and the rest as mail |
760 | for (i = 0; len > i; ++i) { |
761 | c = aStr[i]; |
762 | if (QLatin1Char('<') != c) { |
763 | name.append(c); |
764 | } else { |
765 | break; |
766 | } |
767 | } |
768 | mail = aStr.mid(position: i + 1); |
769 | if (mail.endsWith(c: QLatin1Char('>'))) { |
770 | mail.truncate(pos: mail.length() - 1); |
771 | } |
772 | |
773 | } else { |
774 | // Loop backwards until we find the start of the string |
775 | // or a ',' that is outside of a comment |
776 | // and outside of quoted text before the leading '<'. |
777 | bInComment = false; |
778 | bInQuotesOutsideOfEmail = false; |
779 | for (i = iAd - 1; 0 <= i; --i) { |
780 | c = aStr[i]; |
781 | if (bInComment) { |
782 | if (QLatin1Char('(') == c) { |
783 | if (!name.isEmpty()) { |
784 | name.prepend(c: QLatin1Char(' ')); |
785 | } |
786 | bInComment = false; |
787 | } else { |
788 | name.prepend(c); // all comment stuff is part of the name |
789 | } |
790 | } else if (bInQuotesOutsideOfEmail) { |
791 | if (QLatin1Char(cQuotes) == c) { |
792 | bInQuotesOutsideOfEmail = false; |
793 | } else if (c != QLatin1Char('\\')) { |
794 | name.prepend(c); |
795 | } |
796 | } else { |
797 | // found the start of this addressee ? |
798 | if (QLatin1Char(',') == c) { |
799 | break; |
800 | } |
801 | // stuff is before the leading '<' ? |
802 | if (iMailStart) { |
803 | if (QLatin1Char(cQuotes) == c) { |
804 | bInQuotesOutsideOfEmail = true; // end of quoted text found |
805 | } else { |
806 | name.prepend(c); |
807 | } |
808 | } else { |
809 | switch (c.toLatin1()) { |
810 | case '<': |
811 | iMailStart = i; |
812 | break; |
813 | case ')': |
814 | if (!name.isEmpty()) { |
815 | name.prepend(c: QLatin1Char(' ')); |
816 | } |
817 | bInComment = true; |
818 | break; |
819 | default: |
820 | if (QLatin1Char(' ') != c) { |
821 | mail.prepend(c); |
822 | } |
823 | } |
824 | } |
825 | } |
826 | } |
827 | |
828 | name = name.simplified(); |
829 | mail = mail.simplified(); |
830 | |
831 | if (mail.isEmpty()) { |
832 | return false; |
833 | } |
834 | |
835 | mail.append(c: QLatin1Char('@')); |
836 | |
837 | // Loop forward until we find the end of the string |
838 | // or a ',' that is outside of a comment |
839 | // and outside of quoted text behind the trailing '>'. |
840 | bInComment = false; |
841 | bInQuotesOutsideOfEmail = false; |
842 | int parenthesesNesting = 0; |
843 | for (i = iAd + 1; len > i; ++i) { |
844 | c = aStr[i]; |
845 | if (bInComment) { |
846 | if (QLatin1Char(')') == c) { |
847 | if (--parenthesesNesting == 0) { |
848 | bInComment = false; |
849 | if (!name.isEmpty()) { |
850 | name.append(c: QLatin1Char(' ')); |
851 | } |
852 | } else { |
853 | // nested ")", add it |
854 | name.append(c: QLatin1Char(')')); // name can't be empty here |
855 | } |
856 | } else { |
857 | if (QLatin1Char('(') == c) { |
858 | // nested "(" |
859 | ++parenthesesNesting; |
860 | } |
861 | name.append(c); // all comment stuff is part of the name |
862 | } |
863 | } else if (bInQuotesOutsideOfEmail) { |
864 | if (QLatin1Char(cQuotes) == c) { |
865 | bInQuotesOutsideOfEmail = false; |
866 | } else if (c != QLatin1Char('\\')) { |
867 | name.append(c); |
868 | } |
869 | } else { |
870 | // found the end of this addressee ? |
871 | if (QLatin1Char(',') == c) { |
872 | break; |
873 | } |
874 | // stuff is behind the trailing '>' ? |
875 | if (iMailEnd) { |
876 | if (QLatin1Char(cQuotes) == c) { |
877 | bInQuotesOutsideOfEmail = true; // start of quoted text found |
878 | } else { |
879 | name.append(c); |
880 | } |
881 | } else { |
882 | switch (c.toLatin1()) { |
883 | case '>': |
884 | iMailEnd = i; |
885 | break; |
886 | case '(': |
887 | if (!name.isEmpty()) { |
888 | name.append(c: QLatin1Char(' ')); |
889 | } |
890 | if (++parenthesesNesting > 0) { |
891 | bInComment = true; |
892 | } |
893 | break; |
894 | default: |
895 | if (QLatin1Char(' ') != c) { |
896 | mail.append(c); |
897 | } |
898 | } |
899 | } |
900 | } |
901 | } |
902 | } |
903 | |
904 | name = name.simplified(); |
905 | mail = mail.simplified(); |
906 | |
907 | return !(name.isEmpty() || mail.isEmpty()); |
908 | } |
909 | |
910 | //----------------------------------------------------------------------------- |
911 | bool KEmailAddress::compareEmail(const QString &email1, const QString &email2, bool matchName) |
912 | { |
913 | QString e1Name; |
914 | QString e1Email; |
915 | QString e2Name; |
916 | QString e2Email; |
917 | |
918 | extractEmailAddressAndName(aStr: email1, mail&: e1Email, name&: e1Name); |
919 | extractEmailAddressAndName(aStr: email2, mail&: e2Email, name&: e2Name); |
920 | |
921 | return e1Email == e2Email && (!matchName || (e1Name == e2Name)); |
922 | } |
923 | |
924 | //----------------------------------------------------------------------------- |
925 | // Used internally by normalizedAddress() |
926 | QString removeBidiControlChars(const QString &input) |
927 | { |
928 | constexpr QChar LRO(0x202D); |
929 | constexpr QChar RLO(0x202E); |
930 | constexpr QChar LRE(0x202A); |
931 | constexpr QChar RLE(0x202B); |
932 | QString result = input; |
933 | result.remove(c: LRO); |
934 | result.remove(c: RLO); |
935 | result.remove(c: LRE); |
936 | result.remove(c: RLE); |
937 | return result; |
938 | } |
939 | |
940 | QString KEmailAddress::normalizedAddress(const QString &displayName, const QString &addrSpec, const QString &) |
941 | { |
942 | const QString realDisplayName = removeBidiControlChars(input: displayName); |
943 | if (realDisplayName.isEmpty() && comment.isEmpty()) { |
944 | return addrSpec; |
945 | } else if (comment.isEmpty()) { |
946 | if (!realDisplayName.startsWith(c: QLatin1Char('\"'))) { |
947 | return quoteNameIfNecessary(str: realDisplayName) + QLatin1String(" <" ) + addrSpec + QLatin1Char('>'); |
948 | } else { |
949 | return realDisplayName + QLatin1String(" <" ) + addrSpec + QLatin1Char('>'); |
950 | } |
951 | } else if (realDisplayName.isEmpty()) { |
952 | return quoteNameIfNecessary(str: comment) + QLatin1String(" <" ) + addrSpec + QLatin1Char('>'); |
953 | } else { |
954 | return realDisplayName + QLatin1String(" (" ) + comment + QLatin1String(") <" ) + addrSpec + QLatin1Char('>'); |
955 | } |
956 | } |
957 | |
958 | //----------------------------------------------------------------------------- |
959 | QString KEmailAddress::fromIdn(const QString &addrSpec) |
960 | { |
961 | const int atPos = addrSpec.lastIndexOf(c: QLatin1Char('@')); |
962 | if (atPos == -1) { |
963 | return addrSpec; |
964 | } |
965 | |
966 | QString idn = QUrl::fromAce(domain: addrSpec.mid(position: atPos + 1).toLatin1()); |
967 | if (idn.isEmpty()) { |
968 | return QString(); |
969 | } |
970 | |
971 | return addrSpec.left(n: atPos + 1) + idn; |
972 | } |
973 | |
974 | //----------------------------------------------------------------------------- |
975 | QString KEmailAddress::toIdn(const QString &addrSpec) |
976 | { |
977 | const int atPos = addrSpec.lastIndexOf(c: QLatin1Char('@')); |
978 | if (atPos == -1) { |
979 | return addrSpec; |
980 | } |
981 | |
982 | QString idn = QLatin1String(QUrl::toAce(domain: addrSpec.mid(position: atPos + 1))); |
983 | if (idn.isEmpty()) { |
984 | return addrSpec; |
985 | } |
986 | |
987 | return addrSpec.left(n: atPos + 1) + idn; |
988 | } |
989 | |
990 | //----------------------------------------------------------------------------- |
991 | QString KEmailAddress::normalizeAddressesAndDecodeIdn(const QString &str) |
992 | { |
993 | // qCDebug(KCODECS_LOG) << str; |
994 | if (str.isEmpty()) { |
995 | return str; |
996 | } |
997 | |
998 | const QStringList addressList = splitAddressList(aStr: str); |
999 | QStringList normalizedAddressList; |
1000 | |
1001 | QByteArray displayName; |
1002 | QByteArray addrSpec; |
1003 | QByteArray ; |
1004 | |
1005 | for (const auto &addr : addressList) { |
1006 | if (!addr.isEmpty()) { |
1007 | if (splitAddress(address: addr.toUtf8(), displayName, addrSpec, comment) == AddressOk) { |
1008 | QByteArray cs; |
1009 | displayName = KCodecs::decodeRFC2047String(src: displayName, usedCS: &cs).toUtf8(); |
1010 | comment = KCodecs::decodeRFC2047String(src: comment, usedCS: &cs).toUtf8(); |
1011 | |
1012 | normalizedAddressList << normalizedAddress(displayName: QString::fromUtf8(ba: displayName), addrSpec: fromIdn(addrSpec: QString::fromUtf8(ba: addrSpec)), comment: QString::fromUtf8(ba: comment)); |
1013 | } |
1014 | } |
1015 | } |
1016 | /* |
1017 | qCDebug(KCODECS_LOG) << "normalizedAddressList: \"" |
1018 | << normalizedAddressList.join( ", " ) |
1019 | << "\""; |
1020 | */ |
1021 | return normalizedAddressList.join(QStringLiteral(", " )); |
1022 | } |
1023 | |
1024 | //----------------------------------------------------------------------------- |
1025 | QString KEmailAddress::normalizeAddressesAndEncodeIdn(const QString &str) |
1026 | { |
1027 | // qCDebug(KCODECS_LOG) << str; |
1028 | if (str.isEmpty()) { |
1029 | return str; |
1030 | } |
1031 | |
1032 | const QStringList addressList = splitAddressList(aStr: str); |
1033 | QStringList normalizedAddressList; |
1034 | |
1035 | QByteArray displayName; |
1036 | QByteArray addrSpec; |
1037 | QByteArray ; |
1038 | |
1039 | for (const auto &addr : addressList) { |
1040 | if (!addr.isEmpty()) { |
1041 | if (splitAddress(address: addr.toUtf8(), displayName, addrSpec, comment) == AddressOk) { |
1042 | normalizedAddressList << normalizedAddress(displayName: QString::fromUtf8(ba: displayName), addrSpec: toIdn(addrSpec: QString::fromUtf8(ba: addrSpec)), comment: QString::fromUtf8(ba: comment)); |
1043 | } |
1044 | } |
1045 | } |
1046 | |
1047 | /* |
1048 | qCDebug(KCODECS_LOG) << "normalizedAddressList: \"" |
1049 | << normalizedAddressList.join( ", " ) |
1050 | << "\""; |
1051 | */ |
1052 | return normalizedAddressList.join(QStringLiteral(", " )); |
1053 | } |
1054 | |
1055 | //----------------------------------------------------------------------------- |
1056 | // Escapes unescaped doublequotes in str. |
1057 | static QString escapeQuotes(const QString &str) |
1058 | { |
1059 | if (str.isEmpty()) { |
1060 | return QString(); |
1061 | } |
1062 | |
1063 | QString escaped; |
1064 | // reserve enough memory for the worst case ( """..."" -> \"\"\"...\"\" ) |
1065 | escaped.reserve(asize: 2 * str.length()); |
1066 | unsigned int len = 0; |
1067 | for (int i = 0, total = str.length(); i < total; ++i, ++len) { |
1068 | const QChar &c = str[i]; |
1069 | if (c == QLatin1Char('"')) { // unescaped doublequote |
1070 | escaped.append(c: QLatin1Char('\\')); |
1071 | ++len; |
1072 | } else if (c == QLatin1Char('\\')) { // escaped character |
1073 | escaped.append(c: QLatin1Char('\\')); |
1074 | ++len; |
1075 | ++i; |
1076 | if (i >= str.length()) { // handle trailing '\' gracefully |
1077 | break; |
1078 | } |
1079 | } |
1080 | // Keep str[i] as we increase i previously |
1081 | escaped.append(c: str[i]); |
1082 | } |
1083 | escaped.truncate(pos: len); |
1084 | return escaped; |
1085 | } |
1086 | |
1087 | //----------------------------------------------------------------------------- |
1088 | QString KEmailAddress::quoteNameIfNecessary(const QString &str) |
1089 | { |
1090 | if (str.isEmpty()) { |
1091 | return str; |
1092 | } |
1093 | QString quoted = str; |
1094 | |
1095 | static const QRegularExpression needQuotes(QStringLiteral("[^ 0-9A-Za-z\\x{0080}-\\x{FFFF}]" )); |
1096 | // avoid double quoting |
1097 | if ((quoted[0] == QLatin1Char('"')) && (quoted[quoted.length() - 1] == QLatin1Char('"'))) { |
1098 | quoted = QLatin1String("\"" ) + escapeQuotes(str: quoted.mid(position: 1, n: quoted.length() - 2)) + QLatin1String("\"" ); |
1099 | } else if (quoted.indexOf(re: needQuotes) != -1) { |
1100 | quoted = QLatin1String("\"" ) + escapeQuotes(str: quoted) + QLatin1String("\"" ); |
1101 | } |
1102 | |
1103 | return quoted; |
1104 | } |
1105 | |
1106 | QUrl KEmailAddress::encodeMailtoUrl(const QString &mailbox) |
1107 | { |
1108 | const QByteArray encodedPath = KCodecs::encodeRFC2047String(src: mailbox, charset: "utf-8" ); |
1109 | QUrl mailtoUrl; |
1110 | mailtoUrl.setScheme(QStringLiteral("mailto" )); |
1111 | mailtoUrl.setPath(path: QLatin1String(encodedPath)); |
1112 | return mailtoUrl; |
1113 | } |
1114 | |
1115 | QString KEmailAddress::decodeMailtoUrl(const QUrl &mailtoUrl) |
1116 | { |
1117 | Q_ASSERT(mailtoUrl.scheme() == QLatin1String("mailto" )); |
1118 | return KCodecs::decodeRFC2047String(text: mailtoUrl.path()); |
1119 | } |
1120 | |