| 1 | /* |
| 2 | This file is part of the syndication library |
| 3 | SPDX-FileCopyrightText: 2005 Frank Osterfeld <osterfeld@kde.org> |
| 4 | |
| 5 | SPDX-License-Identifier: LGPL-2.0-or-later |
| 6 | */ |
| 7 | |
| 8 | #include <rss2/category.h> |
| 9 | #include <rss2/cloud.h> |
| 10 | #include <rss2/document.h> |
| 11 | #include <rss2/image.h> |
| 12 | #include <rss2/item.h> |
| 13 | #include <rss2/textinput.h> |
| 14 | |
| 15 | #include <constants.h> |
| 16 | #include <documentvisitor.h> |
| 17 | #include <tools.h> |
| 18 | |
| 19 | #include <QDomDocument> |
| 20 | #include <QList> |
| 21 | #include <QSet> |
| 22 | #include <QString> |
| 23 | |
| 24 | #include <vector> |
| 25 | |
| 26 | namespace Syndication |
| 27 | { |
| 28 | namespace RSS2 |
| 29 | { |
| 30 | class SYNDICATION_NO_EXPORT Document:: |
| 31 | { |
| 32 | public: |
| 33 | () |
| 34 | : itemDescriptionIsCDATA(false) |
| 35 | , itemDescriptionContainsMarkup(false) |
| 36 | , itemDescGuessed(false) |
| 37 | , itemTitleIsCDATA(false) |
| 38 | , itemTitleContainsMarkup(false) |
| 39 | , itemTitlesGuessed(false) |
| 40 | { |
| 41 | } |
| 42 | mutable bool ; |
| 43 | mutable bool ; |
| 44 | mutable bool ; |
| 45 | mutable bool ; |
| 46 | mutable bool ; |
| 47 | mutable bool ; |
| 48 | }; |
| 49 | |
| 50 | Document::(const QDomElement &element) |
| 51 | : SpecificDocument() |
| 52 | , ElementWrapper(element) |
| 53 | , d(new DocumentPrivate) |
| 54 | { |
| 55 | } |
| 56 | |
| 57 | Document Document::(const QDomDocument &doc) |
| 58 | { |
| 59 | QDomNode channelNode = doc.namedItem(QStringLiteral("rss" )).namedItem(QStringLiteral("channel" )); |
| 60 | |
| 61 | return Document(channelNode.toElement()); |
| 62 | } |
| 63 | |
| 64 | Document::() |
| 65 | : SpecificDocument() |
| 66 | , ElementWrapper() |
| 67 | , d(new DocumentPrivate) |
| 68 | { |
| 69 | } |
| 70 | |
| 71 | Document::(const Document &other) |
| 72 | : SpecificDocument(other) |
| 73 | , ElementWrapper(other) |
| 74 | { |
| 75 | d = other.d; |
| 76 | } |
| 77 | |
| 78 | Document::() |
| 79 | { |
| 80 | } |
| 81 | |
| 82 | Document &Document::(const Document &other) |
| 83 | { |
| 84 | ElementWrapper::operator=(other); |
| 85 | d = other.d; |
| 86 | return *this; |
| 87 | } |
| 88 | bool Document::() const |
| 89 | { |
| 90 | return !isNull(); |
| 91 | } |
| 92 | |
| 93 | QString Document::() const |
| 94 | { |
| 95 | return extractElementTextNS(namespaceURI: QString(), QStringLiteral("title" )); |
| 96 | } |
| 97 | |
| 98 | QString Document::() const |
| 99 | { |
| 100 | return extractElementTextNS(namespaceURI: QString(), QStringLiteral("link" )); |
| 101 | } |
| 102 | |
| 103 | QString Document::() const |
| 104 | { |
| 105 | const QString desc = extractElementTextNS(namespaceURI: QString(), QStringLiteral("description" )); |
| 106 | return normalize(str: desc); |
| 107 | } |
| 108 | |
| 109 | QString Document::() const |
| 110 | { |
| 111 | const QString lang = extractElementTextNS(namespaceURI: QString(), QStringLiteral("language" )); |
| 112 | |
| 113 | if (!lang.isNull()) { |
| 114 | return lang; |
| 115 | } else { |
| 116 | return extractElementTextNS(namespaceURI: dublinCoreNamespace(), QStringLiteral("language" )); |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | QString Document::() const |
| 121 | { |
| 122 | const QString rights = extractElementTextNS(namespaceURI: QString(), QStringLiteral("copyright" )); |
| 123 | if (!rights.isNull()) { |
| 124 | return rights; |
| 125 | } else { |
| 126 | // if <copyright> is not provided, use <dc:rights> |
| 127 | return extractElementTextNS(namespaceURI: dublinCoreNamespace(), QStringLiteral("rights" )); |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | QString Document::() const |
| 132 | { |
| 133 | return extractElementTextNS(namespaceURI: QString(), QStringLiteral("managingEditor" )); |
| 134 | } |
| 135 | |
| 136 | QString Document::() const |
| 137 | { |
| 138 | return extractElementTextNS(namespaceURI: QString(), QStringLiteral("webMaster" )); |
| 139 | } |
| 140 | |
| 141 | time_t Document::() const |
| 142 | { |
| 143 | QString str = extractElementTextNS(namespaceURI: QString(), QStringLiteral("pubDate" )); |
| 144 | |
| 145 | if (!str.isNull()) { |
| 146 | return parseDate(str, hint: RFCDate); |
| 147 | } else { |
| 148 | // if there is no pubDate, check for dc:date |
| 149 | str = extractElementTextNS(namespaceURI: dublinCoreNamespace(), QStringLiteral("date" )); |
| 150 | return parseDate(str, hint: ISODate); |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | time_t Document::() const |
| 155 | { |
| 156 | const QString str = extractElementTextNS(namespaceURI: QString(), QStringLiteral("lastBuildDate" )); |
| 157 | |
| 158 | return parseDate(str, hint: RFCDate); |
| 159 | } |
| 160 | |
| 161 | QList<Category> Document::() const |
| 162 | { |
| 163 | const QList<QDomElement> catNodes = elementsByTagNameNS(nsURI: QString(), QStringLiteral("category" )); |
| 164 | |
| 165 | QList<Category> categories; |
| 166 | categories.reserve(asize: catNodes.count()); |
| 167 | |
| 168 | std::transform(first: catNodes.cbegin(), last: catNodes.cend(), result: std::back_inserter(x&: categories), unary_op: [](const QDomElement &element) { |
| 169 | return Category(element); |
| 170 | }); |
| 171 | |
| 172 | return categories; |
| 173 | } |
| 174 | |
| 175 | QString Document::() const |
| 176 | { |
| 177 | return extractElementTextNS(namespaceURI: QString(), QStringLiteral("generator" )); |
| 178 | } |
| 179 | |
| 180 | QString Document::() const |
| 181 | { |
| 182 | return extractElementTextNS(namespaceURI: QString(), QStringLiteral("docs" )); |
| 183 | } |
| 184 | |
| 185 | Cloud Document::() const |
| 186 | { |
| 187 | return Cloud(firstElementByTagNameNS(nsURI: QString(), QStringLiteral("cloud" ))); |
| 188 | } |
| 189 | |
| 190 | int Document::() const |
| 191 | { |
| 192 | bool ok; |
| 193 | int c; |
| 194 | |
| 195 | QString text = extractElementTextNS(namespaceURI: QString(), QStringLiteral("ttl" )); |
| 196 | c = text.toInt(ok: &ok); |
| 197 | return ok ? c : 0; |
| 198 | } |
| 199 | |
| 200 | Image Document::() const |
| 201 | { |
| 202 | return Image(firstElementByTagNameNS(nsURI: QString(), QStringLiteral("image" ))); |
| 203 | } |
| 204 | |
| 205 | TextInput Document::() const |
| 206 | { |
| 207 | TextInput ti = TextInput(firstElementByTagNameNS(nsURI: QString(), QStringLiteral("textInput" ))); |
| 208 | |
| 209 | if (!ti.isNull()) { |
| 210 | return ti; |
| 211 | } |
| 212 | |
| 213 | // Netscape's version of RSS 0.91 has textinput, not textInput |
| 214 | return TextInput(firstElementByTagNameNS(nsURI: QString(), QStringLiteral("textinput" ))); |
| 215 | } |
| 216 | |
| 217 | QSet<int> Document::() const |
| 218 | { |
| 219 | QSet<int> skipHours; |
| 220 | QDomElement skipHoursNode = firstElementByTagNameNS(nsURI: QString(), QStringLiteral("skipHours" )); |
| 221 | if (!skipHoursNode.isNull()) { |
| 222 | ElementWrapper skipHoursWrapper(skipHoursNode); |
| 223 | bool ok = false; |
| 224 | const QList<QDomElement> hours = skipHoursWrapper.elementsByTagNameNS(nsURI: QString(), QStringLiteral("hour" )); |
| 225 | for (const auto &element : hours) { |
| 226 | const int h = element.text().toInt(ok: &ok); |
| 227 | if (ok) { |
| 228 | skipHours.insert(value: h); |
| 229 | } |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | return skipHours; |
| 234 | } |
| 235 | |
| 236 | QSet<Document::DayOfWeek> Document::() const |
| 237 | { |
| 238 | QSet<DayOfWeek> skipDays; |
| 239 | QDomElement skipDaysNode = firstElementByTagNameNS(nsURI: QString(), QStringLiteral("skipDays" )); |
| 240 | if (!skipDaysNode.isNull()) { |
| 241 | ElementWrapper skipDaysWrapper(skipDaysNode); |
| 242 | struct DayInfo { |
| 243 | QLatin1String name; |
| 244 | DayOfWeek enumValue; |
| 245 | }; |
| 246 | static const std::vector<DayInfo> weekDays = { |
| 247 | {.name: QLatin1String("Monday" ), .enumValue: Monday}, |
| 248 | {.name: QLatin1String("Tuesday" ), .enumValue: Tuesday}, |
| 249 | {.name: QLatin1String("Wednesday" ), .enumValue: Wednesday}, |
| 250 | {.name: QLatin1String("Thursday" ), .enumValue: Thursday}, |
| 251 | {.name: QLatin1String("Friday" ), .enumValue: Friday}, |
| 252 | {.name: QLatin1String("Saturday" ), .enumValue: Saturday}, |
| 253 | {.name: QLatin1String("Sunday" ), .enumValue: Sunday}, |
| 254 | }; |
| 255 | |
| 256 | const QList<QDomElement> days = skipDaysWrapper.elementsByTagNameNS(nsURI: QString(), QStringLiteral("day" )); |
| 257 | for (const auto &element : days) { |
| 258 | const QString day = element.text(); |
| 259 | auto it = std::find_if(first: weekDays.cbegin(), last: weekDays.cend(), pred: [&day](const DayInfo &info) { |
| 260 | return info.name == day; |
| 261 | }); |
| 262 | if (it != weekDays.cend()) { |
| 263 | skipDays.insert(value: it->enumValue); |
| 264 | } |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | return skipDays; |
| 269 | } |
| 270 | |
| 271 | QList<Item> Document::() const |
| 272 | { |
| 273 | const QList<QDomElement> itemNodes = elementsByTagNameNS(nsURI: QString(), QStringLiteral("item" )); |
| 274 | |
| 275 | QList<Item> items; |
| 276 | items.reserve(asize: itemNodes.count()); |
| 277 | |
| 278 | DocumentPtr doccpy(new Document(*this)); |
| 279 | |
| 280 | std::transform(first: itemNodes.cbegin(), last: itemNodes.cend(), result: std::back_inserter(x&: items), unary_op: [&doccpy](const QDomElement &element) { |
| 281 | return Item(element, doccpy); |
| 282 | }); |
| 283 | |
| 284 | return items; |
| 285 | } |
| 286 | QList<QDomElement> Document::unhandledElements() const |
| 287 | { |
| 288 | // TODO: do not hardcode this list here |
| 289 | static std::vector<ElementType> handled; // QVector would require a default ctor, and ElementType is too big for QList |
| 290 | if (handled.empty()) { |
| 291 | handled.reserve(n: 22); |
| 292 | handled.push_back(x: ElementType(QStringLiteral("title" ))); |
| 293 | handled.push_back(x: ElementType(QStringLiteral("link" ))); |
| 294 | handled.push_back(x: ElementType(QStringLiteral("description" ))); |
| 295 | handled.push_back(x: ElementType(QStringLiteral("language" ))); |
| 296 | handled.push_back(x: ElementType(QStringLiteral("copyright" ))); |
| 297 | handled.push_back(x: ElementType(QStringLiteral("managingEditor" ))); |
| 298 | handled.push_back(x: ElementType(QStringLiteral("webMaster" ))); |
| 299 | handled.push_back(x: ElementType(QStringLiteral("pubDate" ))); |
| 300 | handled.push_back(x: ElementType(QStringLiteral("lastBuildDate" ))); |
| 301 | handled.push_back(x: ElementType(QStringLiteral("skipDays" ))); |
| 302 | handled.push_back(x: ElementType(QStringLiteral("skipHours" ))); |
| 303 | handled.push_back(x: ElementType(QStringLiteral("item" ))); |
| 304 | handled.push_back(x: ElementType(QStringLiteral("textinput" ))); |
| 305 | handled.push_back(x: ElementType(QStringLiteral("textInput" ))); |
| 306 | handled.push_back(x: ElementType(QStringLiteral("image" ))); |
| 307 | handled.push_back(x: ElementType(QStringLiteral("ttl" ))); |
| 308 | handled.push_back(x: ElementType(QStringLiteral("generator" ))); |
| 309 | handled.push_back(x: ElementType(QStringLiteral("docs" ))); |
| 310 | handled.push_back(x: ElementType(QStringLiteral("cloud" ))); |
| 311 | handled.push_back(x: ElementType(QStringLiteral("language" ), dublinCoreNamespace())); |
| 312 | handled.push_back(x: ElementType(QStringLiteral("rights" ), dublinCoreNamespace())); |
| 313 | handled.push_back(x: ElementType(QStringLiteral("date" ), dublinCoreNamespace())); |
| 314 | } |
| 315 | |
| 316 | QList<QDomElement> notHandled; |
| 317 | |
| 318 | QDomNodeList children = element().childNodes(); |
| 319 | const int numChildren = children.size(); |
| 320 | for (int i = 0; i < numChildren; ++i) { |
| 321 | QDomElement el = children.at(index: i).toElement(); |
| 322 | if (!el.isNull() // |
| 323 | && std::find(first: handled.cbegin(), last: handled.cend(), val: ElementType(el.localName(), el.namespaceURI())) == handled.cend()) { |
| 324 | notHandled.append(t: el); |
| 325 | } |
| 326 | } |
| 327 | |
| 328 | return notHandled; |
| 329 | } |
| 330 | |
| 331 | QString Document::() const |
| 332 | { |
| 333 | QString info; |
| 334 | info += QLatin1String("### Document: ###################\n" ); |
| 335 | if (!title().isNull()) { |
| 336 | info += QLatin1String("title: #" ) + title() + QLatin1String("#\n" ); |
| 337 | } |
| 338 | if (!description().isNull()) { |
| 339 | info += QLatin1String("description: #" ) + description() + QLatin1String("#\n" ); |
| 340 | } |
| 341 | if (!link().isNull()) { |
| 342 | info += QLatin1String("link: #" ) + link() + QLatin1String("#\n" ); |
| 343 | } |
| 344 | if (!language().isNull()) { |
| 345 | info += QLatin1String("language: #" ) + language() + QLatin1String("#\n" ); |
| 346 | } |
| 347 | if (!copyright().isNull()) { |
| 348 | info += QLatin1String("copyright: #" ) + copyright() + QLatin1String("#\n" ); |
| 349 | } |
| 350 | if (!managingEditor().isNull()) { |
| 351 | info += QLatin1String("managingEditor: #" ) + managingEditor() + QLatin1String("#\n" ); |
| 352 | } |
| 353 | if (!webMaster().isNull()) { |
| 354 | info += QLatin1String("webMaster: #" ) + webMaster() + QLatin1String("#\n" ); |
| 355 | } |
| 356 | |
| 357 | QString dpubdate = dateTimeToString(date: pubDate()); |
| 358 | if (!dpubdate.isNull()) { |
| 359 | info += QLatin1String("pubDate: #" ) + dpubdate + QLatin1String("#\n" ); |
| 360 | } |
| 361 | |
| 362 | QString dlastbuilddate = dateTimeToString(date: lastBuildDate()); |
| 363 | if (!dlastbuilddate.isNull()) { |
| 364 | info += QLatin1String("lastBuildDate: #" ) + dlastbuilddate + QLatin1String("#\n" ); |
| 365 | } |
| 366 | |
| 367 | if (!textInput().isNull()) { |
| 368 | info += textInput().debugInfo(); |
| 369 | } |
| 370 | if (!cloud().isNull()) { |
| 371 | info += cloud().debugInfo(); |
| 372 | } |
| 373 | if (!image().isNull()) { |
| 374 | info += image().debugInfo(); |
| 375 | } |
| 376 | |
| 377 | const QList<Category> cats = categories(); |
| 378 | |
| 379 | for (const auto &c : cats) { |
| 380 | info += c.debugInfo(); |
| 381 | } |
| 382 | |
| 383 | const QList<Item> litems = items(); |
| 384 | for (const auto &item : litems) { |
| 385 | info += item.debugInfo(); |
| 386 | } |
| 387 | info += QLatin1String("### Document end ################\n" ); |
| 388 | return info; |
| 389 | } |
| 390 | |
| 391 | void Document::(bool *isCDATA, bool *containsMarkup) const |
| 392 | { |
| 393 | if (!d->itemTitlesGuessed) { |
| 394 | QString titles; |
| 395 | QList<Item> litems = items(); |
| 396 | |
| 397 | if (litems.isEmpty()) { |
| 398 | d->itemTitlesGuessed = true; |
| 399 | return; |
| 400 | } |
| 401 | |
| 402 | QDomElement titleEl = (*litems.begin()).firstElementByTagNameNS(nsURI: QString(), QStringLiteral("title" )); |
| 403 | d->itemTitleIsCDATA = titleEl.firstChild().isCDATASection(); |
| 404 | |
| 405 | const int nmax = std::min<int>(a: litems.size(), b: 10); // we check a maximum of 10 items |
| 406 | int i = 0; |
| 407 | |
| 408 | for (const auto &item : litems) { |
| 409 | if (i++ >= nmax) { |
| 410 | break; |
| 411 | } |
| 412 | titles += item.originalTitle(); |
| 413 | } |
| 414 | |
| 415 | d->itemTitleContainsMarkup = stringContainsMarkup(str: titles); |
| 416 | d->itemTitlesGuessed = true; |
| 417 | } |
| 418 | |
| 419 | if (isCDATA != nullptr) { |
| 420 | *isCDATA = d->itemTitleIsCDATA; |
| 421 | } |
| 422 | if (containsMarkup != nullptr) { |
| 423 | *containsMarkup = d->itemTitleContainsMarkup; |
| 424 | } |
| 425 | } |
| 426 | |
| 427 | void Document::(bool *isCDATA, bool *containsMarkup) const |
| 428 | { |
| 429 | if (!d->itemDescGuessed) { |
| 430 | QString desc; |
| 431 | QList<Item> litems = items(); |
| 432 | |
| 433 | if (litems.isEmpty()) { |
| 434 | d->itemDescGuessed = true; |
| 435 | return; |
| 436 | } |
| 437 | |
| 438 | QDomElement descEl = (*litems.begin()).firstElementByTagNameNS(nsURI: QString(), QStringLiteral("description" )); |
| 439 | d->itemDescriptionIsCDATA = descEl.firstChild().isCDATASection(); |
| 440 | |
| 441 | const int nmax = std::min<int>(a: litems.size(), b: 10); // we check a maximum of 10 items |
| 442 | int i = 0; |
| 443 | |
| 444 | for (const auto &item : litems) { |
| 445 | if (i++ >= nmax) { |
| 446 | break; |
| 447 | } |
| 448 | desc += item.originalDescription(); |
| 449 | } |
| 450 | |
| 451 | d->itemDescriptionContainsMarkup = stringContainsMarkup(str: desc); |
| 452 | d->itemDescGuessed = true; |
| 453 | } |
| 454 | |
| 455 | if (isCDATA != nullptr) { |
| 456 | *isCDATA = d->itemDescriptionIsCDATA; |
| 457 | } |
| 458 | if (containsMarkup != nullptr) { |
| 459 | *containsMarkup = d->itemDescriptionContainsMarkup; |
| 460 | } |
| 461 | } |
| 462 | |
| 463 | bool Document::(DocumentVisitor *visitor) |
| 464 | { |
| 465 | return visitor->visitRSS2Document(document: this); |
| 466 | } |
| 467 | |
| 468 | } // namespace RSS2 |
| 469 | } // namespace Syndication |
| 470 | |