1/*
2 This file is part of the syndication library
3 SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8#include "tools.h"
9#include "personimpl.h"
10
11#include <KCharsets>
12
13#include <QByteArray>
14#include <QCryptographicHash>
15#include <QDateTime>
16#include <QRegularExpression>
17
18#include <ctime>
19
20namespace Syndication
21{
22QCryptographicHash md5Machine(QCryptographicHash::Md5);
23
24unsigned int calcHash(const QString &str)
25{
26 return calcHash(str.toUtf8());
27}
28
29unsigned int calcHash(const QByteArray &array)
30{
31 if (array.isEmpty()) {
32 return 0;
33 } else {
34 const char *s = array.data();
35 unsigned int hash = 5381;
36 int c;
37 while ((c = *s++)) {
38 hash = ((hash << 5) + hash) + c; // hash*33 + c
39 }
40 return hash;
41 }
42}
43
44static uint toTimeT(QDateTime &kdt)
45{
46 if (kdt.isValid()) {
47 // work around unspecified timezones/date-only timestamps by setting the time to 12:00 UTC
48 if (kdt.time().isNull() //
49 || (kdt.time() == QTime(0, 0) && kdt.timeSpec() == Qt::LocalTime)) {
50 kdt.setTimeSpec(Qt::UTC);
51 kdt.setTime(QTime(12, 0));
52 }
53 return kdt.toMSecsSinceEpoch() / 1000;
54 } else {
55 return 0;
56 }
57}
58
59uint parseISODate(const QString &str)
60{
61 QDateTime kdt = QDateTime::fromString(str, Qt::ISODate);
62 return toTimeT(kdt);
63}
64
65uint parseRFCDate(const QString &str)
66{
67 QDateTime kdt = QDateTime::fromString(str, Qt::RFC2822Date);
68 // Qt5 used to ignore invalid textual offsets but Qt6 rejects those, so handle that explictly
69 if (!kdt.isValid() && str.endsWith(QLatin1String(" GMT"))) {
70 kdt = QDateTime::fromString(QStringView(str).chopped(4), Qt::RFC2822Date);
71 }
72 return toTimeT(kdt);
73}
74
75uint parseDate(const QString &str, DateFormat hint)
76{
77 if (str.isEmpty()) {
78 return 0;
79 }
80
81 if (hint == RFCDate) {
82 time_t t = parseRFCDate(str);
83 return t != 0 ? t : parseISODate(str);
84 } else {
85 time_t t = parseISODate(str);
86 return t != 0 ? t : parseRFCDate(str);
87 }
88}
89
90QString dateTimeToString(uint date)
91{
92 if (date == 0) {
93 return QString();
94 }
95
96 const QString format = QStringLiteral("ddd MMM d HH:mm:ss yyyy");
97 QDateTime dt;
98 dt.setMSecsSinceEpoch(quint64(date) * 1000);
99 return dt.toUTC().toString(format);
100}
101
102QString calcMD5Sum(const QString &str)
103{
104 md5Machine.reset();
105 md5Machine.addData(str.toUtf8());
106 return QLatin1String(md5Machine.result().toHex().constData());
107}
108
109QString resolveEntities(const QString &str)
110{
111 return KCharsets::resolveEntities(str);
112}
113
114QString escapeSpecialCharacters(const QString &strp)
115{
116 QString str(strp);
117 str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
118 str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
119 str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
120 str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
121 str.replace(QLatin1Char('\''), QLatin1String("&apos;"));
122 return str.trimmed();
123}
124
125QString convertNewlines(const QString &strp)
126{
127 QString str(strp);
128 str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
129 return str;
130}
131
132QString plainTextToHtml(const QString &plainText)
133{
134 QString str(plainText);
135 str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
136 str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
137 str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
138 // str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
139 str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
140 return str.trimmed();
141}
142
143QString htmlToPlainText(const QString &html)
144{
145 QString str(html);
146 // TODO: preserve some formatting, such as line breaks
147 str.remove(QRegularExpression(QStringLiteral("<[^>]*?>"))); // remove tags
148 str = resolveEntities(str);
149 return str.trimmed();
150}
151
152static QRegularExpression tagRegExp()
153{
154 static QRegularExpression exp(QStringLiteral("<\\w+.*/?>"));
155 return exp;
156}
157
158bool stringContainsMarkup(const QString &str)
159{
160 // check for entities
161 if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
162 return true;
163 }
164
165 const int ltc = str.count(QLatin1Char('<'));
166 if (ltc == 0) {
167 return false;
168 }
169
170 return str.contains(tagRegExp());
171}
172
173bool isHtml(const QString &str)
174{
175 // check for entities
176 if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
177 return true;
178 }
179
180 const int ltc = str.count(QLatin1Char('<'));
181 if (ltc == 0) {
182 return false;
183 }
184
185 return str.contains(tagRegExp());
186}
187
188QString normalize(const QString &str)
189{
190 return isHtml(str) ? str.trimmed() : plainTextToHtml(str);
191}
192
193QString normalize(const QString &strp, bool isCDATA, bool containsMarkup)
194{
195 if (containsMarkup) {
196 return strp.trimmed();
197 } else {
198 if (isCDATA) {
199 QString str = resolveEntities(strp);
200 str = escapeSpecialCharacters(str);
201 str = convertNewlines(str);
202 str = str.trimmed();
203 return str;
204 } else {
205 QString str = escapeSpecialCharacters(strp);
206 str = str.trimmed();
207 return str;
208 }
209 }
210}
211
212PersonPtr personFromString(const QString &strp)
213{
214 QString str = strp.trimmed();
215 if (str.isEmpty()) {
216 return PersonPtr(new PersonImpl());
217 }
218
219 str = resolveEntities(str);
220 QString name;
221 QString uri;
222 QString email;
223
224 // look for something looking like a mail address ("foo@bar.com",
225 // "<foo@bar.com>") and extract it
226
227 const QRegularExpression remail(QStringLiteral("<?([^@\\s<]+@[^>\\s]+)>?")); // FIXME: user "proper" regexp,
228 // search kmail source for it
229
230 QRegularExpressionMatch match = remail.match(str);
231 if (match.hasMatch()) {
232 const QString all = match.captured(0);
233 email = match.captured(1);
234 str.remove(all); // remove mail address
235 }
236
237 // replace "mailto", "(", ")" (to be extended)
238 email.remove(QStringLiteral("mailto:"));
239 email.remove(QRegularExpression(QStringLiteral("[()]")));
240
241 // simplify the rest and use it as name
242
243 name = str.simplified();
244
245 // after removing the email, str might have
246 // the format "(Foo M. Bar)". We cut off
247 // parentheses if there are any. However, if
248 // str is of the format "Foo M. Bar (President)",
249 // we should not cut anything.
250
251 QRegularExpression rename(QRegularExpression::anchoredPattern(QStringLiteral("^\\(([^)]*)\\)")));
252 match = rename.match(name);
253 if (match.hasMatch()) {
254 name = match.captured(1);
255 }
256
257 name = name.isEmpty() ? QString() : name;
258 email = email.isEmpty() ? QString() : email;
259 uri = uri.isEmpty() ? QString() : uri;
260
261 if (name.isEmpty() && email.isEmpty() && uri.isEmpty()) {
262 return PersonPtr(new PersonImpl());
263 }
264
265 return PersonPtr(new PersonImpl(name, uri, email));
266}
267
268ElementType::ElementType(const QString &localnamep, const QString &nsp)
269 : ns(nsp)
270 , localname(localnamep)
271{
272}
273
274bool ElementType::operator==(const ElementType &other) const
275{
276 return localname == other.localname && ns == other.ns;
277}
278
279} // namespace Syndication
280

source code of syndication/src/tools.cpp