| 1 | /* |
| 2 | This file is part of the syndication library |
| 3 | SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org> |
| 4 | |
| 5 | SPDX-License-Identifier: LGPL-2.0-or-later |
| 6 | */ |
| 7 | |
| 8 | #ifndef SYNDICATION_TOOLS_H |
| 9 | #define SYNDICATION_TOOLS_H |
| 10 | |
| 11 | #include "person.h" |
| 12 | #include "syndication_export.h" |
| 13 | |
| 14 | #include <QString> |
| 15 | |
| 16 | class QByteArray; |
| 17 | class QString; |
| 18 | |
| 19 | namespace Syndication |
| 20 | { |
| 21 | /*! |
| 22 | * date formats supported by date parsers |
| 23 | * |
| 24 | * \value ISODate ISO 8601 extended format. (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25", datetime with timezone: "2003-12-13T18:30:02.25+01:00") |
| 25 | * \value RFCDate RFC 822. (e.g. "Sat, 07 Sep 2002 00:00:01 GMT") |
| 26 | */ |
| 27 | enum DateFormat { |
| 28 | ISODate, |
| 29 | RFCDate, |
| 30 | }; |
| 31 | |
| 32 | /*! |
| 33 | * parses a date string in ISO 8601 extended format. |
| 34 | * (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25", |
| 35 | * datetime with timezone: "2003-12-13T18:30:02.25+01:00") |
| 36 | * |
| 37 | * \a str a string in ISO 8601 format |
| 38 | * |
| 39 | * Returns parsed date in seconds since epoch, 0 if no date could |
| 40 | * be parsed from the string. |
| 41 | */ |
| 42 | SYNDICATION_EXPORT |
| 43 | uint parseISODate(const QString &str); |
| 44 | |
| 45 | /*! |
| 46 | * parses a date string as defined in RFC 822. |
| 47 | * (Sat, 07 Sep 2002 00:00:01 GMT) |
| 48 | * |
| 49 | * \a str a string in RFC 822 format |
| 50 | * |
| 51 | * Returns parsed date in seconds since epoch, 0 if no date could |
| 52 | * be parsed from the string. |
| 53 | */ |
| 54 | SYNDICATION_EXPORT |
| 55 | uint parseRFCDate(const QString &str); |
| 56 | |
| 57 | /*! |
| 58 | * parses a date string in ISO (see parseISODate()) or RFC 822 (see |
| 59 | * parseRFCDate()) format. |
| 60 | * |
| 61 | * It tries both parsers and returns the first valid parsing result found (or 0 |
| 62 | * otherwise). |
| 63 | * |
| 64 | * To speed up parsing, you can give a hint which format you expect. |
| 65 | * The method will try the corresponding parser first then. |
| 66 | * |
| 67 | * \a str a date string |
| 68 | * |
| 69 | * \a hint the expected format |
| 70 | * |
| 71 | * Returns parsed date in seconds since epoch, 0 if no date could |
| 72 | * be parsed from the string. |
| 73 | */ |
| 74 | SYNDICATION_EXPORT |
| 75 | uint parseDate(const QString &str, DateFormat hint = RFCDate); |
| 76 | |
| 77 | /*! |
| 78 | * \internal |
| 79 | * returns a string representation of a datetime. |
| 80 | * this is used internally to create debugging output. |
| 81 | * |
| 82 | * \a date the date to convert |
| 83 | * |
| 84 | * Returns string representation of the date, or a null string if |
| 85 | * \a date is 0 |
| 86 | */ |
| 87 | SYNDICATION_EXPORT |
| 88 | QString dateTimeToString(uint date); |
| 89 | |
| 90 | /*! |
| 91 | * resolves entities to respective unicode chars. |
| 92 | * |
| 93 | * \a str a string |
| 94 | */ |
| 95 | SYNDICATION_EXPORT |
| 96 | QString resolveEntities(const QString &str); |
| 97 | /*! |
| 98 | * replaces the characters <, >, &, ", ' |
| 99 | * with < > &, " '. |
| 100 | * |
| 101 | * \a str the string to escape |
| 102 | */ |
| 103 | SYNDICATION_EXPORT |
| 104 | QString escapeSpecialCharacters(const QString &str); |
| 105 | |
| 106 | /*! |
| 107 | * replaces newlines ("\n") by <br/> |
| 108 | * |
| 109 | * \a str string to convert |
| 110 | */ |
| 111 | SYNDICATION_EXPORT |
| 112 | QString convertNewlines(const QString &str); |
| 113 | |
| 114 | /*! |
| 115 | * converts a plain text string to HTML |
| 116 | * |
| 117 | * \a plainText a string in plain text. |
| 118 | */ |
| 119 | SYNDICATION_EXPORT |
| 120 | QString plainTextToHtml(const QString &plainText); |
| 121 | |
| 122 | /*! |
| 123 | * converts a HTML string to plain text |
| 124 | * |
| 125 | * \a html string in HTML format |
| 126 | * |
| 127 | * Returns stripped text |
| 128 | */ |
| 129 | SYNDICATION_EXPORT |
| 130 | QString htmlToPlainText(const QString &html); |
| 131 | |
| 132 | /*! |
| 133 | * guesses whether a string contains plain text or HTML |
| 134 | * |
| 135 | * \a str the string in unknown format |
| 136 | * |
| 137 | * Returns \c true if the heuristic thinks it's HTML, \c false |
| 138 | * if thinks it is plain text |
| 139 | */ |
| 140 | SYNDICATION_EXPORT |
| 141 | bool isHtml(const QString &str); |
| 142 | |
| 143 | /*! |
| 144 | * guesses whether a string contains (HTML) markup or not. This |
| 145 | * implements not an exact check for valid HTML markup, but a |
| 146 | * simple (and relatively fast) heuristic. |
| 147 | * |
| 148 | * \a str the string that might or might not contain markup |
| 149 | * |
| 150 | * Returns \c true if the heuristic thinks it contains markup, \c false |
| 151 | * if thinks it is markup-free plain text |
| 152 | */ |
| 153 | SYNDICATION_EXPORT |
| 154 | bool stringContainsMarkup(const QString &str); |
| 155 | |
| 156 | /*! |
| 157 | * Ensures HTML formatting for a string. |
| 158 | * |
| 159 | * guesses via isHtml() if \a str contains HTML or plain text, and returns |
| 160 | * plainTextToHtml(str) if it thinks it is plain text, or the unmodified \a str |
| 161 | * otherwise. |
| 162 | * |
| 163 | * \a str a string with unknown content |
| 164 | * Returns string as HTML (as long as the heuristics work) |
| 165 | */ |
| 166 | SYNDICATION_EXPORT |
| 167 | QString normalize(const QString &str); |
| 168 | |
| 169 | /*! |
| 170 | * normalizes a string based on feed-wide properties of tag content. |
| 171 | * It is based on the assumption that all items in a feed encode their |
| 172 | * title/description content in the same way (CDATA or not, plain text |
| 173 | * vs. HTML). isCDATA and containsMarkup are determined once by the feed, |
| 174 | * and then passed to this method. |
| 175 | * |
| 176 | * The returned string contains HTML, with special characters <, >, |
| 177 | * &, ", and ' escaped, and all other entities resolved. |
| 178 | * Whitespace is collapsed, relevant whitespace is replaced by respective |
| 179 | * HTML tags (<br/>). |
| 180 | * |
| 181 | * \a str a string |
| 182 | * |
| 183 | * \a isCDATA whether the feed uses CDATA for the tag \a str was read from |
| 184 | * |
| 185 | * \a containsMarkup whether the feed uses HTML markup in the |
| 186 | * tag \a str was read from. |
| 187 | * |
| 188 | * Returns string as HTML (as long as the heuristics work) |
| 189 | */ |
| 190 | SYNDICATION_EXPORT |
| 191 | QString normalize(const QString &str, bool isCDATA, bool containsMarkup); |
| 192 | |
| 193 | /*! |
| 194 | * Parses a person object from a string by identifying name and email address |
| 195 | * in the string. Currently detected variants are: |
| 196 | * "foo@bar.com", "Foo", "Foo <foo@bar.com>", "foo@bar.com (Foo)". |
| 197 | * |
| 198 | * \a str the string to parse the person from. |
| 199 | * |
| 200 | * Returns a Person object containing the parsed information. |
| 201 | */ |
| 202 | SYNDICATION_EXPORT |
| 203 | PersonPtr personFromString(const QString &str); |
| 204 | |
| 205 | /*! |
| 206 | * \internal |
| 207 | * calculates a hash value for a string |
| 208 | */ |
| 209 | unsigned int calcHash(const QString &str); |
| 210 | |
| 211 | /*! |
| 212 | * \internal |
| 213 | * calculates a hash value for a byte array |
| 214 | */ |
| 215 | unsigned int calcHash(const QByteArray &array); |
| 216 | |
| 217 | /*! |
| 218 | * \internal |
| 219 | * calculates a md5 checksum for a string |
| 220 | */ |
| 221 | QString calcMD5Sum(const QString &str); |
| 222 | |
| 223 | /*! |
| 224 | * \internal |
| 225 | * used internally to represent element types |
| 226 | */ |
| 227 | struct ElementType { |
| 228 | ElementType(const QString &localnamep, |
| 229 | const QString &nsp = QString()); // implicit |
| 230 | |
| 231 | bool operator==(const ElementType &other) const; |
| 232 | |
| 233 | QString ns; |
| 234 | QString localname; |
| 235 | }; |
| 236 | |
| 237 | } // namespace Syndication |
| 238 | |
| 239 | #endif // SYNDICATION_TOOLS_H |
| 240 | |