1/*
2 This file is part of the syndication library
3 SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8#include "tools.h"
9#include "personimpl.h"
10
11#include <KCharsets>
12
13#include <QByteArray>
14#include <QCryptographicHash>
15#include <QDateTime>
16#include <QRegularExpression>
17#include <QTimeZone>
18
19#include <ctime>
20
21namespace Syndication
22{
23QCryptographicHash md5Machine(QCryptographicHash::Md5);
24
25unsigned int calcHash(const QString &str)
26{
27 return calcHash(array: str.toUtf8());
28}
29
30unsigned int calcHash(const QByteArray &array)
31{
32 if (array.isEmpty()) {
33 return 0;
34 } else {
35 const char *s = array.data();
36 unsigned int hash = 5381;
37 int c;
38 while ((c = *s++)) {
39 hash = ((hash << 5) + hash) + c; // hash*33 + c
40 }
41 return hash;
42 }
43}
44
45static uint toTimeT(QDateTime &kdt)
46{
47 if (kdt.isValid()) {
48 // work around unspecified timezones/date-only timestamps by setting the time to 12:00 UTC
49 if (kdt.time().isNull() //
50 || (kdt.time() == QTime(0, 0) && kdt.timeSpec() == Qt::LocalTime)) {
51 kdt.setTimeZone(toZone: QTimeZone::utc());
52 kdt.setTime(time: QTime(12, 0));
53 }
54 return kdt.toMSecsSinceEpoch() / 1000;
55 } else {
56 return 0;
57 }
58}
59
60uint parseISODate(const QString &str)
61{
62 QDateTime kdt = QDateTime::fromString(string: str, format: Qt::ISODate);
63 return toTimeT(kdt);
64}
65
66uint parseRFCDate(const QString &str)
67{
68 QDateTime kdt = QDateTime::fromString(string: str, format: Qt::RFC2822Date);
69 // Qt5 used to ignore invalid textual offsets but Qt6 rejects those, so handle that explictly
70 if (!kdt.isValid()) {
71 kdt = QDateTime::fromString(string: QStringView(str).chopped(n: 4), format: Qt::RFC2822Date);
72 }
73 return toTimeT(kdt);
74}
75
76uint parseDate(const QString &str, DateFormat hint)
77{
78 if (str.isEmpty()) {
79 return 0;
80 }
81
82 if (hint == RFCDate) {
83 time_t t = parseRFCDate(str);
84 return t != 0 ? t : parseISODate(str);
85 } else {
86 time_t t = parseISODate(str);
87 return t != 0 ? t : parseRFCDate(str);
88 }
89}
90
91QString dateTimeToString(uint date)
92{
93 if (date == 0) {
94 return QString();
95 }
96
97 const QString format = QStringLiteral("ddd MMM d HH:mm:ss yyyy");
98 QDateTime dt;
99 dt.setMSecsSinceEpoch(quint64(date) * 1000);
100 return dt.toUTC().toString(format);
101}
102
103QString calcMD5Sum(const QString &str)
104{
105 md5Machine.reset();
106 md5Machine.addData(data: str.toUtf8());
107 return QLatin1String(md5Machine.result().toHex().constData());
108}
109
110QString resolveEntities(const QString &str)
111{
112 return KCharsets::resolveEntities(text: str);
113}
114
115QString escapeSpecialCharacters(const QString &strp)
116{
117 QString str(strp);
118 str.replace(c: QLatin1Char('&'), after: QLatin1String("&amp;"));
119 str.replace(c: QLatin1Char('\"'), after: QLatin1String("&quot;"));
120 str.replace(c: QLatin1Char('<'), after: QLatin1String("&lt;"));
121 str.replace(c: QLatin1Char('>'), after: QLatin1String("&gt;"));
122 str.replace(c: QLatin1Char('\''), after: QLatin1String("&apos;"));
123 return str.trimmed();
124}
125
126QString convertNewlines(const QString &strp)
127{
128 QString str(strp);
129 str.replace(c: QLatin1Char('\n'), after: QLatin1String("<br/>"));
130 return str;
131}
132
133QString plainTextToHtml(const QString &plainText)
134{
135 QString str(plainText);
136 str.replace(c: QLatin1Char('&'), after: QLatin1String("&amp;"));
137 str.replace(c: QLatin1Char('\"'), after: QLatin1String("&quot;"));
138 str.replace(c: QLatin1Char('<'), after: QLatin1String("&lt;"));
139 // str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
140 str.replace(c: QLatin1Char('\n'), after: QLatin1String("<br/>"));
141 return str.trimmed();
142}
143
144QString htmlToPlainText(const QString &html)
145{
146 QString str(html);
147 // TODO: preserve some formatting, such as line breaks
148 str.remove(re: QRegularExpression(QStringLiteral("<[^>]*?>"))); // remove tags
149 str = resolveEntities(str);
150 return str.trimmed();
151}
152
153static QRegularExpression tagRegExp()
154{
155 static QRegularExpression exp(QStringLiteral("<\\w+.*/?>"));
156 return exp;
157}
158
159bool stringContainsMarkup(const QString &str)
160{
161 // check for entities
162 if (str.contains(re: QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
163 return true;
164 }
165
166 const int ltc = str.count(c: QLatin1Char('<'));
167 if (ltc == 0) {
168 return false;
169 }
170
171 return str.contains(re: tagRegExp());
172}
173
174bool isHtml(const QString &str)
175{
176 // check for entities
177 if (str.contains(re: QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
178 return true;
179 }
180
181 const int ltc = str.count(c: QLatin1Char('<'));
182 if (ltc == 0) {
183 return false;
184 }
185
186 return str.contains(re: tagRegExp());
187}
188
189QString normalize(const QString &str)
190{
191 return isHtml(str) ? str.trimmed() : plainTextToHtml(plainText: str);
192}
193
194QString normalize(const QString &strp, bool isCDATA, bool containsMarkup)
195{
196 if (containsMarkup) {
197 return strp.trimmed();
198 } else {
199 if (isCDATA) {
200 QString str = resolveEntities(str: strp);
201 str = escapeSpecialCharacters(strp: str);
202 str = convertNewlines(strp: str);
203 str = str.trimmed();
204 return str;
205 } else {
206 QString str = escapeSpecialCharacters(strp);
207 str = str.trimmed();
208 return str;
209 }
210 }
211}
212
213PersonPtr personFromString(const QString &strp)
214{
215 QString str = strp.trimmed();
216 if (str.isEmpty()) {
217 return PersonPtr(new PersonImpl());
218 }
219
220 str = resolveEntities(str);
221 QString name;
222 QString uri;
223 QString email;
224
225 // look for something looking like a mail address ("foo@bar.com",
226 // "<foo@bar.com>") and extract it
227
228 const QRegularExpression remail(QStringLiteral("<?([^@\\s<]+@[^>\\s]+)>?")); // FIXME: user "proper" regexp,
229 // search kmail source for it
230
231 QRegularExpressionMatch match = remail.match(subject: str);
232 if (match.hasMatch()) {
233 const QString all = match.captured(nth: 0);
234 email = match.captured(nth: 1);
235 str.remove(s: all); // remove mail address
236 }
237
238 // replace "mailto", "(", ")" (to be extended)
239 email.remove(QStringLiteral("mailto:"));
240 email.remove(re: QRegularExpression(QStringLiteral("[()]")));
241
242 // simplify the rest and use it as name
243
244 name = str.simplified();
245
246 // after removing the email, str might have
247 // the format "(Foo M. Bar)". We cut off
248 // parentheses if there are any. However, if
249 // str is of the format "Foo M. Bar (President)",
250 // we should not cut anything.
251
252 QRegularExpression rename(QRegularExpression::anchoredPattern(QStringLiteral("^\\(([^)]*)\\)")));
253 match = rename.match(subject: name);
254 if (match.hasMatch()) {
255 name = match.captured(nth: 1);
256 }
257
258 name = name.isEmpty() ? QString() : name;
259 email = email.isEmpty() ? QString() : email;
260 uri = uri.isEmpty() ? QString() : uri;
261
262 if (name.isEmpty() && email.isEmpty() && uri.isEmpty()) {
263 return PersonPtr(new PersonImpl());
264 }
265
266 return PersonPtr(new PersonImpl(name, uri, email));
267}
268
269ElementType::ElementType(const QString &localnamep, const QString &nsp)
270 : ns(nsp)
271 , localname(localnamep)
272{
273}
274
275bool ElementType::operator==(const ElementType &other) const
276{
277 return localname == other.localname && ns == other.ns;
278}
279
280} // namespace Syndication
281

source code of syndication/src/tools.cpp