1 | /* |
2 | This file is part of the syndication library |
3 | SPDX-FileCopyrightText: 2005 Frank Osterfeld <osterfeld@kde.org> |
4 | |
5 | SPDX-License-Identifier: LGPL-2.0-or-later |
6 | */ |
7 | |
8 | #include <rss2/category.h> |
9 | #include <rss2/cloud.h> |
10 | #include <rss2/document.h> |
11 | #include <rss2/image.h> |
12 | #include <rss2/item.h> |
13 | #include <rss2/textinput.h> |
14 | |
15 | #include <constants.h> |
16 | #include <documentvisitor.h> |
17 | #include <tools.h> |
18 | |
19 | #include <QDomDocument> |
20 | #include <QList> |
21 | #include <QSet> |
22 | #include <QString> |
23 | |
24 | #include <vector> |
25 | |
26 | namespace Syndication |
27 | { |
28 | namespace RSS2 |
29 | { |
30 | class SYNDICATION_NO_EXPORT Document:: |
31 | { |
32 | public: |
33 | () |
34 | : itemDescriptionIsCDATA(false) |
35 | , itemDescriptionContainsMarkup(false) |
36 | , itemDescGuessed(false) |
37 | , itemTitleIsCDATA(false) |
38 | , itemTitleContainsMarkup(false) |
39 | , itemTitlesGuessed(false) |
40 | { |
41 | } |
42 | mutable bool ; |
43 | mutable bool ; |
44 | mutable bool ; |
45 | mutable bool ; |
46 | mutable bool ; |
47 | mutable bool ; |
48 | }; |
49 | |
50 | Document::(const QDomElement &element) |
51 | : SpecificDocument() |
52 | , ElementWrapper(element) |
53 | , d(new DocumentPrivate) |
54 | { |
55 | } |
56 | |
57 | Document Document::(const QDomDocument &doc) |
58 | { |
59 | QDomNode channelNode = doc.namedItem(QStringLiteral("rss" )).namedItem(QStringLiteral("channel" )); |
60 | |
61 | return Document(channelNode.toElement()); |
62 | } |
63 | |
64 | Document::() |
65 | : SpecificDocument() |
66 | , ElementWrapper() |
67 | , d(new DocumentPrivate) |
68 | { |
69 | } |
70 | |
71 | Document::(const Document &other) |
72 | : SpecificDocument(other) |
73 | , ElementWrapper(other) |
74 | { |
75 | d = other.d; |
76 | } |
77 | |
78 | Document::() |
79 | { |
80 | } |
81 | |
82 | Document &Document::(const Document &other) |
83 | { |
84 | ElementWrapper::operator=(other); |
85 | d = other.d; |
86 | return *this; |
87 | } |
88 | bool Document::() const |
89 | { |
90 | return !isNull(); |
91 | } |
92 | |
93 | QString Document::() const |
94 | { |
95 | return extractElementTextNS(namespaceURI: QString(), QStringLiteral("title" )); |
96 | } |
97 | |
98 | QString Document::() const |
99 | { |
100 | return extractElementTextNS(namespaceURI: QString(), QStringLiteral("link" )); |
101 | } |
102 | |
103 | QString Document::() const |
104 | { |
105 | const QString desc = extractElementTextNS(namespaceURI: QString(), QStringLiteral("description" )); |
106 | return normalize(str: desc); |
107 | } |
108 | |
109 | QString Document::() const |
110 | { |
111 | const QString lang = extractElementTextNS(namespaceURI: QString(), QStringLiteral("language" )); |
112 | |
113 | if (!lang.isNull()) { |
114 | return lang; |
115 | } else { |
116 | return extractElementTextNS(namespaceURI: dublinCoreNamespace(), QStringLiteral("language" )); |
117 | } |
118 | } |
119 | |
120 | QString Document::() const |
121 | { |
122 | const QString rights = extractElementTextNS(namespaceURI: QString(), QStringLiteral("copyright" )); |
123 | if (!rights.isNull()) { |
124 | return rights; |
125 | } else { |
126 | // if <copyright> is not provided, use <dc:rights> |
127 | return extractElementTextNS(namespaceURI: dublinCoreNamespace(), QStringLiteral("rights" )); |
128 | } |
129 | } |
130 | |
131 | QString Document::() const |
132 | { |
133 | return extractElementTextNS(namespaceURI: QString(), QStringLiteral("managingEditor" )); |
134 | } |
135 | |
136 | QString Document::() const |
137 | { |
138 | return extractElementTextNS(namespaceURI: QString(), QStringLiteral("webMaster" )); |
139 | } |
140 | |
141 | time_t Document::() const |
142 | { |
143 | QString str = extractElementTextNS(namespaceURI: QString(), QStringLiteral("pubDate" )); |
144 | |
145 | if (!str.isNull()) { |
146 | return parseDate(str, hint: RFCDate); |
147 | } else { |
148 | // if there is no pubDate, check for dc:date |
149 | str = extractElementTextNS(namespaceURI: dublinCoreNamespace(), QStringLiteral("date" )); |
150 | return parseDate(str, hint: ISODate); |
151 | } |
152 | } |
153 | |
154 | time_t Document::() const |
155 | { |
156 | const QString str = extractElementTextNS(namespaceURI: QString(), QStringLiteral("lastBuildDate" )); |
157 | |
158 | return parseDate(str, hint: RFCDate); |
159 | } |
160 | |
161 | QList<Category> Document::() const |
162 | { |
163 | const QList<QDomElement> catNodes = elementsByTagNameNS(nsURI: QString(), QStringLiteral("category" )); |
164 | |
165 | QList<Category> categories; |
166 | categories.reserve(asize: catNodes.count()); |
167 | |
168 | std::transform(first: catNodes.cbegin(), last: catNodes.cend(), result: std::back_inserter(x&: categories), unary_op: [](const QDomElement &element) { |
169 | return Category(element); |
170 | }); |
171 | |
172 | return categories; |
173 | } |
174 | |
175 | QString Document::() const |
176 | { |
177 | return extractElementTextNS(namespaceURI: QString(), QStringLiteral("generator" )); |
178 | } |
179 | |
180 | QString Document::() const |
181 | { |
182 | return extractElementTextNS(namespaceURI: QString(), QStringLiteral("docs" )); |
183 | } |
184 | |
185 | Cloud Document::() const |
186 | { |
187 | return Cloud(firstElementByTagNameNS(nsURI: QString(), QStringLiteral("cloud" ))); |
188 | } |
189 | |
190 | int Document::() const |
191 | { |
192 | bool ok; |
193 | int c; |
194 | |
195 | QString text = extractElementTextNS(namespaceURI: QString(), QStringLiteral("ttl" )); |
196 | c = text.toInt(ok: &ok); |
197 | return ok ? c : 0; |
198 | } |
199 | |
200 | Image Document::() const |
201 | { |
202 | return Image(firstElementByTagNameNS(nsURI: QString(), QStringLiteral("image" ))); |
203 | } |
204 | |
205 | TextInput Document::() const |
206 | { |
207 | TextInput ti = TextInput(firstElementByTagNameNS(nsURI: QString(), QStringLiteral("textInput" ))); |
208 | |
209 | if (!ti.isNull()) { |
210 | return ti; |
211 | } |
212 | |
213 | // Netscape's version of RSS 0.91 has textinput, not textInput |
214 | return TextInput(firstElementByTagNameNS(nsURI: QString(), QStringLiteral("textinput" ))); |
215 | } |
216 | |
217 | QSet<int> Document::() const |
218 | { |
219 | QSet<int> skipHours; |
220 | QDomElement skipHoursNode = firstElementByTagNameNS(nsURI: QString(), QStringLiteral("skipHours" )); |
221 | if (!skipHoursNode.isNull()) { |
222 | ElementWrapper skipHoursWrapper(skipHoursNode); |
223 | bool ok = false; |
224 | const QList<QDomElement> hours = skipHoursWrapper.elementsByTagNameNS(nsURI: QString(), QStringLiteral("hour" )); |
225 | for (const auto &element : hours) { |
226 | const int h = element.text().toInt(ok: &ok); |
227 | if (ok) { |
228 | skipHours.insert(value: h); |
229 | } |
230 | } |
231 | } |
232 | |
233 | return skipHours; |
234 | } |
235 | |
236 | QSet<Document::DayOfWeek> Document::() const |
237 | { |
238 | QSet<DayOfWeek> skipDays; |
239 | QDomElement skipDaysNode = firstElementByTagNameNS(nsURI: QString(), QStringLiteral("skipDays" )); |
240 | if (!skipDaysNode.isNull()) { |
241 | ElementWrapper skipDaysWrapper(skipDaysNode); |
242 | struct DayInfo { |
243 | QLatin1String name; |
244 | DayOfWeek enumValue; |
245 | }; |
246 | static const std::vector<DayInfo> weekDays = { |
247 | {.name: QLatin1String("Monday" ), .enumValue: Monday}, |
248 | {.name: QLatin1String("Tuesday" ), .enumValue: Tuesday}, |
249 | {.name: QLatin1String("Wednesday" ), .enumValue: Wednesday}, |
250 | {.name: QLatin1String("Thursday" ), .enumValue: Thursday}, |
251 | {.name: QLatin1String("Friday" ), .enumValue: Friday}, |
252 | {.name: QLatin1String("Saturday" ), .enumValue: Saturday}, |
253 | {.name: QLatin1String("Sunday" ), .enumValue: Sunday}, |
254 | }; |
255 | |
256 | const QList<QDomElement> days = skipDaysWrapper.elementsByTagNameNS(nsURI: QString(), QStringLiteral("day" )); |
257 | for (const auto &element : days) { |
258 | const QString day = element.text(); |
259 | auto it = std::find_if(first: weekDays.cbegin(), last: weekDays.cend(), pred: [&day](const DayInfo &info) { |
260 | return info.name == day; |
261 | }); |
262 | if (it != weekDays.cend()) { |
263 | skipDays.insert(value: it->enumValue); |
264 | } |
265 | } |
266 | } |
267 | |
268 | return skipDays; |
269 | } |
270 | |
271 | QList<Item> Document::() const |
272 | { |
273 | const QList<QDomElement> itemNodes = elementsByTagNameNS(nsURI: QString(), QStringLiteral("item" )); |
274 | |
275 | QList<Item> items; |
276 | items.reserve(asize: itemNodes.count()); |
277 | |
278 | DocumentPtr doccpy(new Document(*this)); |
279 | |
280 | std::transform(first: itemNodes.cbegin(), last: itemNodes.cend(), result: std::back_inserter(x&: items), unary_op: [&doccpy](const QDomElement &element) { |
281 | return Item(element, doccpy); |
282 | }); |
283 | |
284 | return items; |
285 | } |
286 | QList<QDomElement> Document::unhandledElements() const |
287 | { |
288 | // TODO: do not hardcode this list here |
289 | static std::vector<ElementType> handled; // QVector would require a default ctor, and ElementType is too big for QList |
290 | if (handled.empty()) { |
291 | handled.reserve(n: 22); |
292 | handled.push_back(x: ElementType(QStringLiteral("title" ))); |
293 | handled.push_back(x: ElementType(QStringLiteral("link" ))); |
294 | handled.push_back(x: ElementType(QStringLiteral("description" ))); |
295 | handled.push_back(x: ElementType(QStringLiteral("language" ))); |
296 | handled.push_back(x: ElementType(QStringLiteral("copyright" ))); |
297 | handled.push_back(x: ElementType(QStringLiteral("managingEditor" ))); |
298 | handled.push_back(x: ElementType(QStringLiteral("webMaster" ))); |
299 | handled.push_back(x: ElementType(QStringLiteral("pubDate" ))); |
300 | handled.push_back(x: ElementType(QStringLiteral("lastBuildDate" ))); |
301 | handled.push_back(x: ElementType(QStringLiteral("skipDays" ))); |
302 | handled.push_back(x: ElementType(QStringLiteral("skipHours" ))); |
303 | handled.push_back(x: ElementType(QStringLiteral("item" ))); |
304 | handled.push_back(x: ElementType(QStringLiteral("textinput" ))); |
305 | handled.push_back(x: ElementType(QStringLiteral("textInput" ))); |
306 | handled.push_back(x: ElementType(QStringLiteral("image" ))); |
307 | handled.push_back(x: ElementType(QStringLiteral("ttl" ))); |
308 | handled.push_back(x: ElementType(QStringLiteral("generator" ))); |
309 | handled.push_back(x: ElementType(QStringLiteral("docs" ))); |
310 | handled.push_back(x: ElementType(QStringLiteral("cloud" ))); |
311 | handled.push_back(x: ElementType(QStringLiteral("language" ), dublinCoreNamespace())); |
312 | handled.push_back(x: ElementType(QStringLiteral("rights" ), dublinCoreNamespace())); |
313 | handled.push_back(x: ElementType(QStringLiteral("date" ), dublinCoreNamespace())); |
314 | } |
315 | |
316 | QList<QDomElement> notHandled; |
317 | |
318 | QDomNodeList children = element().childNodes(); |
319 | const int numChildren = children.size(); |
320 | for (int i = 0; i < numChildren; ++i) { |
321 | QDomElement el = children.at(index: i).toElement(); |
322 | if (!el.isNull() // |
323 | && std::find(first: handled.cbegin(), last: handled.cend(), val: ElementType(el.localName(), el.namespaceURI())) == handled.cend()) { |
324 | notHandled.append(t: el); |
325 | } |
326 | } |
327 | |
328 | return notHandled; |
329 | } |
330 | |
331 | QString Document::() const |
332 | { |
333 | QString info; |
334 | info += QLatin1String("### Document: ###################\n" ); |
335 | if (!title().isNull()) { |
336 | info += QLatin1String("title: #" ) + title() + QLatin1String("#\n" ); |
337 | } |
338 | if (!description().isNull()) { |
339 | info += QLatin1String("description: #" ) + description() + QLatin1String("#\n" ); |
340 | } |
341 | if (!link().isNull()) { |
342 | info += QLatin1String("link: #" ) + link() + QLatin1String("#\n" ); |
343 | } |
344 | if (!language().isNull()) { |
345 | info += QLatin1String("language: #" ) + language() + QLatin1String("#\n" ); |
346 | } |
347 | if (!copyright().isNull()) { |
348 | info += QLatin1String("copyright: #" ) + copyright() + QLatin1String("#\n" ); |
349 | } |
350 | if (!managingEditor().isNull()) { |
351 | info += QLatin1String("managingEditor: #" ) + managingEditor() + QLatin1String("#\n" ); |
352 | } |
353 | if (!webMaster().isNull()) { |
354 | info += QLatin1String("webMaster: #" ) + webMaster() + QLatin1String("#\n" ); |
355 | } |
356 | |
357 | QString dpubdate = dateTimeToString(date: pubDate()); |
358 | if (!dpubdate.isNull()) { |
359 | info += QLatin1String("pubDate: #" ) + dpubdate + QLatin1String("#\n" ); |
360 | } |
361 | |
362 | QString dlastbuilddate = dateTimeToString(date: lastBuildDate()); |
363 | if (!dlastbuilddate.isNull()) { |
364 | info += QLatin1String("lastBuildDate: #" ) + dlastbuilddate + QLatin1String("#\n" ); |
365 | } |
366 | |
367 | if (!textInput().isNull()) { |
368 | info += textInput().debugInfo(); |
369 | } |
370 | if (!cloud().isNull()) { |
371 | info += cloud().debugInfo(); |
372 | } |
373 | if (!image().isNull()) { |
374 | info += image().debugInfo(); |
375 | } |
376 | |
377 | const QList<Category> cats = categories(); |
378 | |
379 | for (const auto &c : cats) { |
380 | info += c.debugInfo(); |
381 | } |
382 | |
383 | const QList<Item> litems = items(); |
384 | for (const auto &item : litems) { |
385 | info += item.debugInfo(); |
386 | } |
387 | info += QLatin1String("### Document end ################\n" ); |
388 | return info; |
389 | } |
390 | |
391 | void Document::(bool *isCDATA, bool *containsMarkup) const |
392 | { |
393 | if (!d->itemTitlesGuessed) { |
394 | QString titles; |
395 | QList<Item> litems = items(); |
396 | |
397 | if (litems.isEmpty()) { |
398 | d->itemTitlesGuessed = true; |
399 | return; |
400 | } |
401 | |
402 | QDomElement titleEl = (*litems.begin()).firstElementByTagNameNS(nsURI: QString(), QStringLiteral("title" )); |
403 | d->itemTitleIsCDATA = titleEl.firstChild().isCDATASection(); |
404 | |
405 | const int nmax = std::min<int>(a: litems.size(), b: 10); // we check a maximum of 10 items |
406 | int i = 0; |
407 | |
408 | for (const auto &item : litems) { |
409 | if (i++ >= nmax) { |
410 | break; |
411 | } |
412 | titles += item.originalTitle(); |
413 | } |
414 | |
415 | d->itemTitleContainsMarkup = stringContainsMarkup(str: titles); |
416 | d->itemTitlesGuessed = true; |
417 | } |
418 | |
419 | if (isCDATA != nullptr) { |
420 | *isCDATA = d->itemTitleIsCDATA; |
421 | } |
422 | if (containsMarkup != nullptr) { |
423 | *containsMarkup = d->itemTitleContainsMarkup; |
424 | } |
425 | } |
426 | |
427 | void Document::(bool *isCDATA, bool *containsMarkup) const |
428 | { |
429 | if (!d->itemDescGuessed) { |
430 | QString desc; |
431 | QList<Item> litems = items(); |
432 | |
433 | if (litems.isEmpty()) { |
434 | d->itemDescGuessed = true; |
435 | return; |
436 | } |
437 | |
438 | QDomElement descEl = (*litems.begin()).firstElementByTagNameNS(nsURI: QString(), QStringLiteral("description" )); |
439 | d->itemDescriptionIsCDATA = descEl.firstChild().isCDATASection(); |
440 | |
441 | const int nmax = std::min<int>(a: litems.size(), b: 10); // we check a maximum of 10 items |
442 | int i = 0; |
443 | |
444 | for (const auto &item : litems) { |
445 | if (i++ >= nmax) { |
446 | break; |
447 | } |
448 | desc += item.originalDescription(); |
449 | } |
450 | |
451 | d->itemDescriptionContainsMarkup = stringContainsMarkup(str: desc); |
452 | d->itemDescGuessed = true; |
453 | } |
454 | |
455 | if (isCDATA != nullptr) { |
456 | *isCDATA = d->itemDescriptionIsCDATA; |
457 | } |
458 | if (containsMarkup != nullptr) { |
459 | *containsMarkup = d->itemDescriptionContainsMarkup; |
460 | } |
461 | } |
462 | |
463 | bool Document::(DocumentVisitor *visitor) |
464 | { |
465 | return visitor->visitRSS2Document(document: this); |
466 | } |
467 | |
468 | } // namespace RSS2 |
469 | } // namespace Syndication |
470 | |