1 | /* |
2 | This file is part of the syndication library |
3 | SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org> |
4 | |
5 | SPDX-License-Identifier: LGPL-2.0-or-later |
6 | */ |
7 | |
8 | #include "tools.h" |
9 | #include "personimpl.h" |
10 | |
11 | #include <KCharsets> |
12 | |
13 | #include <QByteArray> |
14 | #include <QCryptographicHash> |
15 | #include <QDateTime> |
16 | #include <QRegularExpression> |
17 | |
18 | #include <ctime> |
19 | |
20 | namespace Syndication |
21 | { |
22 | QCryptographicHash md5Machine(QCryptographicHash::Md5); |
23 | |
24 | unsigned int calcHash(const QString &str) |
25 | { |
26 | return calcHash(str.toUtf8()); |
27 | } |
28 | |
29 | unsigned int calcHash(const QByteArray &array) |
30 | { |
31 | if (array.isEmpty()) { |
32 | return 0; |
33 | } else { |
34 | const char *s = array.data(); |
35 | unsigned int hash = 5381; |
36 | int c; |
37 | while ((c = *s++)) { |
38 | hash = ((hash << 5) + hash) + c; // hash*33 + c |
39 | } |
40 | return hash; |
41 | } |
42 | } |
43 | |
44 | static uint toTimeT(QDateTime &kdt) |
45 | { |
46 | if (kdt.isValid()) { |
47 | // work around unspecified timezones/date-only timestamps by setting the time to 12:00 UTC |
48 | if (kdt.time().isNull() // |
49 | || (kdt.time() == QTime(0, 0) && kdt.timeSpec() == Qt::LocalTime)) { |
50 | kdt.setTimeSpec(Qt::UTC); |
51 | kdt.setTime(QTime(12, 0)); |
52 | } |
53 | return kdt.toMSecsSinceEpoch() / 1000; |
54 | } else { |
55 | return 0; |
56 | } |
57 | } |
58 | |
59 | uint parseISODate(const QString &str) |
60 | { |
61 | QDateTime kdt = QDateTime::fromString(str, Qt::ISODate); |
62 | return toTimeT(kdt); |
63 | } |
64 | |
65 | uint parseRFCDate(const QString &str) |
66 | { |
67 | QDateTime kdt = QDateTime::fromString(str, Qt::RFC2822Date); |
68 | // Qt5 used to ignore invalid textual offsets but Qt6 rejects those, so handle that explictly |
69 | if (!kdt.isValid() && str.endsWith(QLatin1String(" GMT" ))) { |
70 | kdt = QDateTime::fromString(QStringView(str).chopped(4), Qt::RFC2822Date); |
71 | } |
72 | return toTimeT(kdt); |
73 | } |
74 | |
75 | uint parseDate(const QString &str, DateFormat hint) |
76 | { |
77 | if (str.isEmpty()) { |
78 | return 0; |
79 | } |
80 | |
81 | if (hint == RFCDate) { |
82 | time_t t = parseRFCDate(str); |
83 | return t != 0 ? t : parseISODate(str); |
84 | } else { |
85 | time_t t = parseISODate(str); |
86 | return t != 0 ? t : parseRFCDate(str); |
87 | } |
88 | } |
89 | |
90 | QString dateTimeToString(uint date) |
91 | { |
92 | if (date == 0) { |
93 | return QString(); |
94 | } |
95 | |
96 | const QString format = QStringLiteral("ddd MMM d HH:mm:ss yyyy" ); |
97 | QDateTime dt; |
98 | dt.setMSecsSinceEpoch(quint64(date) * 1000); |
99 | return dt.toUTC().toString(format); |
100 | } |
101 | |
102 | QString calcMD5Sum(const QString &str) |
103 | { |
104 | md5Machine.reset(); |
105 | md5Machine.addData(str.toUtf8()); |
106 | return QLatin1String(md5Machine.result().toHex().constData()); |
107 | } |
108 | |
109 | QString resolveEntities(const QString &str) |
110 | { |
111 | return KCharsets::resolveEntities(str); |
112 | } |
113 | |
114 | QString escapeSpecialCharacters(const QString &strp) |
115 | { |
116 | QString str(strp); |
117 | str.replace(QLatin1Char('&'), QLatin1String("&" )); |
118 | str.replace(QLatin1Char('\"'), QLatin1String(""" )); |
119 | str.replace(QLatin1Char('<'), QLatin1String("<" )); |
120 | str.replace(QLatin1Char('>'), QLatin1String(">" )); |
121 | str.replace(QLatin1Char('\''), QLatin1String("'" )); |
122 | return str.trimmed(); |
123 | } |
124 | |
125 | QString convertNewlines(const QString &strp) |
126 | { |
127 | QString str(strp); |
128 | str.replace(QLatin1Char('\n'), QLatin1String("<br/>" )); |
129 | return str; |
130 | } |
131 | |
132 | QString plainTextToHtml(const QString &plainText) |
133 | { |
134 | QString str(plainText); |
135 | str.replace(QLatin1Char('&'), QLatin1String("&" )); |
136 | str.replace(QLatin1Char('\"'), QLatin1String(""" )); |
137 | str.replace(QLatin1Char('<'), QLatin1String("<" )); |
138 | // str.replace(QLatin1Char('>'), QLatin1String(">")); |
139 | str.replace(QLatin1Char('\n'), QLatin1String("<br/>" )); |
140 | return str.trimmed(); |
141 | } |
142 | |
143 | QString htmlToPlainText(const QString &html) |
144 | { |
145 | QString str(html); |
146 | // TODO: preserve some formatting, such as line breaks |
147 | str.remove(QRegularExpression(QStringLiteral("<[^>]*?>" ))); // remove tags |
148 | str = resolveEntities(str); |
149 | return str.trimmed(); |
150 | } |
151 | |
152 | static QRegularExpression tagRegExp() |
153 | { |
154 | static QRegularExpression exp(QStringLiteral("<\\w+.*/?>" )); |
155 | return exp; |
156 | } |
157 | |
158 | bool stringContainsMarkup(const QString &str) |
159 | { |
160 | // check for entities |
161 | if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;" )))) { |
162 | return true; |
163 | } |
164 | |
165 | const int ltc = str.count(QLatin1Char('<')); |
166 | if (ltc == 0) { |
167 | return false; |
168 | } |
169 | |
170 | return str.contains(tagRegExp()); |
171 | } |
172 | |
173 | bool isHtml(const QString &str) |
174 | { |
175 | // check for entities |
176 | if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;" )))) { |
177 | return true; |
178 | } |
179 | |
180 | const int ltc = str.count(QLatin1Char('<')); |
181 | if (ltc == 0) { |
182 | return false; |
183 | } |
184 | |
185 | return str.contains(tagRegExp()); |
186 | } |
187 | |
188 | QString normalize(const QString &str) |
189 | { |
190 | return isHtml(str) ? str.trimmed() : plainTextToHtml(str); |
191 | } |
192 | |
193 | QString normalize(const QString &strp, bool isCDATA, bool containsMarkup) |
194 | { |
195 | if (containsMarkup) { |
196 | return strp.trimmed(); |
197 | } else { |
198 | if (isCDATA) { |
199 | QString str = resolveEntities(strp); |
200 | str = escapeSpecialCharacters(str); |
201 | str = convertNewlines(str); |
202 | str = str.trimmed(); |
203 | return str; |
204 | } else { |
205 | QString str = escapeSpecialCharacters(strp); |
206 | str = str.trimmed(); |
207 | return str; |
208 | } |
209 | } |
210 | } |
211 | |
212 | PersonPtr personFromString(const QString &strp) |
213 | { |
214 | QString str = strp.trimmed(); |
215 | if (str.isEmpty()) { |
216 | return PersonPtr(new PersonImpl()); |
217 | } |
218 | |
219 | str = resolveEntities(str); |
220 | QString name; |
221 | QString uri; |
222 | QString email; |
223 | |
224 | // look for something looking like a mail address ("foo@bar.com", |
225 | // "<foo@bar.com>") and extract it |
226 | |
227 | const QRegularExpression remail(QStringLiteral("<?([^@\\s<]+@[^>\\s]+)>?" )); // FIXME: user "proper" regexp, |
228 | // search kmail source for it |
229 | |
230 | QRegularExpressionMatch match = remail.match(str); |
231 | if (match.hasMatch()) { |
232 | const QString all = match.captured(0); |
233 | email = match.captured(1); |
234 | str.remove(all); // remove mail address |
235 | } |
236 | |
237 | // replace "mailto", "(", ")" (to be extended) |
238 | email.remove(QStringLiteral("mailto:" )); |
239 | email.remove(QRegularExpression(QStringLiteral("[()]" ))); |
240 | |
241 | // simplify the rest and use it as name |
242 | |
243 | name = str.simplified(); |
244 | |
245 | // after removing the email, str might have |
246 | // the format "(Foo M. Bar)". We cut off |
247 | // parentheses if there are any. However, if |
248 | // str is of the format "Foo M. Bar (President)", |
249 | // we should not cut anything. |
250 | |
251 | QRegularExpression rename(QRegularExpression::anchoredPattern(QStringLiteral("^\\(([^)]*)\\)" ))); |
252 | match = rename.match(name); |
253 | if (match.hasMatch()) { |
254 | name = match.captured(1); |
255 | } |
256 | |
257 | name = name.isEmpty() ? QString() : name; |
258 | email = email.isEmpty() ? QString() : email; |
259 | uri = uri.isEmpty() ? QString() : uri; |
260 | |
261 | if (name.isEmpty() && email.isEmpty() && uri.isEmpty()) { |
262 | return PersonPtr(new PersonImpl()); |
263 | } |
264 | |
265 | return PersonPtr(new PersonImpl(name, uri, email)); |
266 | } |
267 | |
268 | ElementType::ElementType(const QString &localnamep, const QString &nsp) |
269 | : ns(nsp) |
270 | , localname(localnamep) |
271 | { |
272 | } |
273 | |
274 | bool ElementType::operator==(const ElementType &other) const |
275 | { |
276 | return localname == other.localname && ns == other.ns; |
277 | } |
278 | |
279 | } // namespace Syndication |
280 | |