1 | /* |
2 | This file is part of the syndication library |
3 | SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org> |
4 | |
5 | SPDX-License-Identifier: LGPL-2.0-or-later |
6 | */ |
7 | |
8 | #include "tools.h" |
9 | #include "personimpl.h" |
10 | |
11 | #include <KCharsets> |
12 | |
13 | #include <QByteArray> |
14 | #include <QCryptographicHash> |
15 | #include <QDateTime> |
16 | #include <QRegularExpression> |
17 | #include <QTimeZone> |
18 | |
19 | #include <ctime> |
20 | |
21 | namespace Syndication |
22 | { |
23 | QCryptographicHash md5Machine(QCryptographicHash::Md5); |
24 | |
25 | unsigned int calcHash(const QString &str) |
26 | { |
27 | return calcHash(array: str.toUtf8()); |
28 | } |
29 | |
30 | unsigned int calcHash(const QByteArray &array) |
31 | { |
32 | if (array.isEmpty()) { |
33 | return 0; |
34 | } else { |
35 | const char *s = array.data(); |
36 | unsigned int hash = 5381; |
37 | int c; |
38 | while ((c = *s++)) { |
39 | hash = ((hash << 5) + hash) + c; // hash*33 + c |
40 | } |
41 | return hash; |
42 | } |
43 | } |
44 | |
45 | static uint toTimeT(QDateTime &kdt) |
46 | { |
47 | if (kdt.isValid()) { |
48 | // work around unspecified timezones/date-only timestamps by setting the time to 12:00 UTC |
49 | if (kdt.time().isNull() // |
50 | || (kdt.time() == QTime(0, 0) && kdt.timeSpec() == Qt::LocalTime)) { |
51 | kdt.setTimeZone(toZone: QTimeZone::utc()); |
52 | kdt.setTime(time: QTime(12, 0)); |
53 | } |
54 | return kdt.toMSecsSinceEpoch() / 1000; |
55 | } else { |
56 | return 0; |
57 | } |
58 | } |
59 | |
60 | uint parseISODate(const QString &str) |
61 | { |
62 | QDateTime kdt = QDateTime::fromString(string: str, format: Qt::ISODate); |
63 | return toTimeT(kdt); |
64 | } |
65 | |
66 | uint parseRFCDate(const QString &str) |
67 | { |
68 | QDateTime kdt = QDateTime::fromString(string: str, format: Qt::RFC2822Date); |
69 | // Qt5 used to ignore invalid textual offsets but Qt6 rejects those, so handle that explictly |
70 | if (!kdt.isValid()) { |
71 | kdt = QDateTime::fromString(string: QStringView(str).chopped(n: 4), format: Qt::RFC2822Date); |
72 | } |
73 | return toTimeT(kdt); |
74 | } |
75 | |
76 | uint parseDate(const QString &str, DateFormat hint) |
77 | { |
78 | if (str.isEmpty()) { |
79 | return 0; |
80 | } |
81 | |
82 | if (hint == RFCDate) { |
83 | time_t t = parseRFCDate(str); |
84 | return t != 0 ? t : parseISODate(str); |
85 | } else { |
86 | time_t t = parseISODate(str); |
87 | return t != 0 ? t : parseRFCDate(str); |
88 | } |
89 | } |
90 | |
91 | QString dateTimeToString(uint date) |
92 | { |
93 | if (date == 0) { |
94 | return QString(); |
95 | } |
96 | |
97 | const QString format = QStringLiteral("ddd MMM d HH:mm:ss yyyy" ); |
98 | QDateTime dt; |
99 | dt.setMSecsSinceEpoch(quint64(date) * 1000); |
100 | return dt.toUTC().toString(format); |
101 | } |
102 | |
103 | QString calcMD5Sum(const QString &str) |
104 | { |
105 | md5Machine.reset(); |
106 | md5Machine.addData(data: str.toUtf8()); |
107 | return QLatin1String(md5Machine.result().toHex().constData()); |
108 | } |
109 | |
110 | QString resolveEntities(const QString &str) |
111 | { |
112 | return KCharsets::resolveEntities(text: str); |
113 | } |
114 | |
115 | QString escapeSpecialCharacters(const QString &strp) |
116 | { |
117 | QString str(strp); |
118 | str.replace(c: QLatin1Char('&'), after: QLatin1String("&" )); |
119 | str.replace(c: QLatin1Char('\"'), after: QLatin1String(""" )); |
120 | str.replace(c: QLatin1Char('<'), after: QLatin1String("<" )); |
121 | str.replace(c: QLatin1Char('>'), after: QLatin1String(">" )); |
122 | str.replace(c: QLatin1Char('\''), after: QLatin1String("'" )); |
123 | return str.trimmed(); |
124 | } |
125 | |
126 | QString convertNewlines(const QString &strp) |
127 | { |
128 | QString str(strp); |
129 | str.replace(c: QLatin1Char('\n'), after: QLatin1String("<br/>" )); |
130 | return str; |
131 | } |
132 | |
133 | QString plainTextToHtml(const QString &plainText) |
134 | { |
135 | QString str(plainText); |
136 | str.replace(c: QLatin1Char('&'), after: QLatin1String("&" )); |
137 | str.replace(c: QLatin1Char('\"'), after: QLatin1String(""" )); |
138 | str.replace(c: QLatin1Char('<'), after: QLatin1String("<" )); |
139 | // str.replace(QLatin1Char('>'), QLatin1String(">")); |
140 | str.replace(c: QLatin1Char('\n'), after: QLatin1String("<br/>" )); |
141 | return str.trimmed(); |
142 | } |
143 | |
144 | QString htmlToPlainText(const QString &html) |
145 | { |
146 | QString str(html); |
147 | // TODO: preserve some formatting, such as line breaks |
148 | str.remove(re: QRegularExpression(QStringLiteral("<[^>]*?>" ))); // remove tags |
149 | str = resolveEntities(str); |
150 | return str.trimmed(); |
151 | } |
152 | |
153 | static QRegularExpression tagRegExp() |
154 | { |
155 | static QRegularExpression exp(QStringLiteral("<\\w+.*/?>" )); |
156 | return exp; |
157 | } |
158 | |
159 | bool stringContainsMarkup(const QString &str) |
160 | { |
161 | // check for entities |
162 | if (str.contains(re: QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;" )))) { |
163 | return true; |
164 | } |
165 | |
166 | const int ltc = str.count(c: QLatin1Char('<')); |
167 | if (ltc == 0) { |
168 | return false; |
169 | } |
170 | |
171 | return str.contains(re: tagRegExp()); |
172 | } |
173 | |
174 | bool isHtml(const QString &str) |
175 | { |
176 | // check for entities |
177 | if (str.contains(re: QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;" )))) { |
178 | return true; |
179 | } |
180 | |
181 | const int ltc = str.count(c: QLatin1Char('<')); |
182 | if (ltc == 0) { |
183 | return false; |
184 | } |
185 | |
186 | return str.contains(re: tagRegExp()); |
187 | } |
188 | |
189 | QString normalize(const QString &str) |
190 | { |
191 | return isHtml(str) ? str.trimmed() : plainTextToHtml(plainText: str); |
192 | } |
193 | |
194 | QString normalize(const QString &strp, bool isCDATA, bool containsMarkup) |
195 | { |
196 | if (containsMarkup) { |
197 | return strp.trimmed(); |
198 | } else { |
199 | if (isCDATA) { |
200 | QString str = resolveEntities(str: strp); |
201 | str = escapeSpecialCharacters(strp: str); |
202 | str = convertNewlines(strp: str); |
203 | str = str.trimmed(); |
204 | return str; |
205 | } else { |
206 | QString str = escapeSpecialCharacters(strp); |
207 | str = str.trimmed(); |
208 | return str; |
209 | } |
210 | } |
211 | } |
212 | |
213 | PersonPtr personFromString(const QString &strp) |
214 | { |
215 | QString str = strp.trimmed(); |
216 | if (str.isEmpty()) { |
217 | return PersonPtr(new PersonImpl()); |
218 | } |
219 | |
220 | str = resolveEntities(str); |
221 | QString name; |
222 | QString uri; |
223 | QString email; |
224 | |
225 | // look for something looking like a mail address ("foo@bar.com", |
226 | // "<foo@bar.com>") and extract it |
227 | |
228 | const QRegularExpression remail(QStringLiteral("<?([^@\\s<]+@[^>\\s]+)>?" )); // FIXME: user "proper" regexp, |
229 | // search kmail source for it |
230 | |
231 | QRegularExpressionMatch match = remail.match(subject: str); |
232 | if (match.hasMatch()) { |
233 | const QString all = match.captured(nth: 0); |
234 | email = match.captured(nth: 1); |
235 | str.remove(s: all); // remove mail address |
236 | } |
237 | |
238 | // replace "mailto", "(", ")" (to be extended) |
239 | email.remove(QStringLiteral("mailto:" )); |
240 | email.remove(re: QRegularExpression(QStringLiteral("[()]" ))); |
241 | |
242 | // simplify the rest and use it as name |
243 | |
244 | name = str.simplified(); |
245 | |
246 | // after removing the email, str might have |
247 | // the format "(Foo M. Bar)". We cut off |
248 | // parentheses if there are any. However, if |
249 | // str is of the format "Foo M. Bar (President)", |
250 | // we should not cut anything. |
251 | |
252 | QRegularExpression rename(QRegularExpression::anchoredPattern(QStringLiteral("^\\(([^)]*)\\)" ))); |
253 | match = rename.match(subject: name); |
254 | if (match.hasMatch()) { |
255 | name = match.captured(nth: 1); |
256 | } |
257 | |
258 | name = name.isEmpty() ? QString() : name; |
259 | email = email.isEmpty() ? QString() : email; |
260 | uri = uri.isEmpty() ? QString() : uri; |
261 | |
262 | if (name.isEmpty() && email.isEmpty() && uri.isEmpty()) { |
263 | return PersonPtr(new PersonImpl()); |
264 | } |
265 | |
266 | return PersonPtr(new PersonImpl(name, uri, email)); |
267 | } |
268 | |
269 | ElementType::ElementType(const QString &localnamep, const QString &nsp) |
270 | : ns(nsp) |
271 | , localname(localnamep) |
272 | { |
273 | } |
274 | |
275 | bool ElementType::operator==(const ElementType &other) const |
276 | { |
277 | return localname == other.localname && ns == other.ns; |
278 | } |
279 | |
280 | } // namespace Syndication |
281 | |