1 | /* |
2 | This file is part of the syndication library |
3 | SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org> |
4 | |
5 | SPDX-License-Identifier: LGPL-2.0-or-later |
6 | */ |
7 | |
8 | #ifndef SYNDICATION_TOOLS_H |
9 | #define SYNDICATION_TOOLS_H |
10 | |
11 | #include "person.h" |
12 | #include "syndication_export.h" |
13 | |
14 | #include <QString> |
15 | |
16 | class QByteArray; |
17 | class QString; |
18 | |
19 | namespace Syndication |
20 | { |
21 | /*! |
22 | * date formats supported by date parsers |
23 | * |
24 | * \value ISODate ISO 8601 extended format. (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25", datetime with timezone: "2003-12-13T18:30:02.25+01:00") |
25 | * \value RFCDate RFC 822. (e.g. "Sat, 07 Sep 2002 00:00:01 GMT") |
26 | */ |
27 | enum DateFormat { |
28 | ISODate, |
29 | RFCDate, |
30 | }; |
31 | |
32 | /*! |
33 | * parses a date string in ISO 8601 extended format. |
34 | * (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25", |
35 | * datetime with timezone: "2003-12-13T18:30:02.25+01:00") |
36 | * |
37 | * \a str a string in ISO 8601 format |
38 | * |
39 | * Returns parsed date in seconds since epoch, 0 if no date could |
40 | * be parsed from the string. |
41 | */ |
42 | SYNDICATION_EXPORT |
43 | uint parseISODate(const QString &str); |
44 | |
45 | /*! |
46 | * parses a date string as defined in RFC 822. |
47 | * (Sat, 07 Sep 2002 00:00:01 GMT) |
48 | * |
49 | * \a str a string in RFC 822 format |
50 | * |
51 | * Returns parsed date in seconds since epoch, 0 if no date could |
52 | * be parsed from the string. |
53 | */ |
54 | SYNDICATION_EXPORT |
55 | uint parseRFCDate(const QString &str); |
56 | |
57 | /*! |
58 | * parses a date string in ISO (see parseISODate()) or RFC 822 (see |
59 | * parseRFCDate()) format. |
60 | * |
61 | * It tries both parsers and returns the first valid parsing result found (or 0 |
62 | * otherwise). |
63 | * |
64 | * To speed up parsing, you can give a hint which format you expect. |
65 | * The method will try the corresponding parser first then. |
66 | * |
67 | * \a str a date string |
68 | * |
69 | * \a hint the expected format |
70 | * |
71 | * Returns parsed date in seconds since epoch, 0 if no date could |
72 | * be parsed from the string. |
73 | */ |
74 | SYNDICATION_EXPORT |
75 | uint parseDate(const QString &str, DateFormat hint = RFCDate); |
76 | |
77 | /*! |
78 | * \internal |
79 | * returns a string representation of a datetime. |
80 | * this is used internally to create debugging output. |
81 | * |
82 | * \a date the date to convert |
83 | * |
84 | * Returns string representation of the date, or a null string if |
85 | * \a date is 0 |
86 | */ |
87 | SYNDICATION_EXPORT |
88 | QString dateTimeToString(uint date); |
89 | |
90 | /*! |
91 | * resolves entities to respective unicode chars. |
92 | * |
93 | * \a str a string |
94 | */ |
95 | SYNDICATION_EXPORT |
96 | QString resolveEntities(const QString &str); |
97 | /*! |
98 | * replaces the characters <, >, &, ", ' |
99 | * with < > &, " '. |
100 | * |
101 | * \a str the string to escape |
102 | */ |
103 | SYNDICATION_EXPORT |
104 | QString escapeSpecialCharacters(const QString &str); |
105 | |
106 | /*! |
107 | * replaces newlines ("\n") by <br/> |
108 | * |
109 | * \a str string to convert |
110 | */ |
111 | SYNDICATION_EXPORT |
112 | QString convertNewlines(const QString &str); |
113 | |
114 | /*! |
115 | * converts a plain text string to HTML |
116 | * |
117 | * \a plainText a string in plain text. |
118 | */ |
119 | SYNDICATION_EXPORT |
120 | QString plainTextToHtml(const QString &plainText); |
121 | |
122 | /*! |
123 | * converts a HTML string to plain text |
124 | * |
125 | * \a html string in HTML format |
126 | * |
127 | * Returns stripped text |
128 | */ |
129 | SYNDICATION_EXPORT |
130 | QString htmlToPlainText(const QString &html); |
131 | |
132 | /*! |
133 | * guesses whether a string contains plain text or HTML |
134 | * |
135 | * \a str the string in unknown format |
136 | * |
137 | * Returns \c true if the heuristic thinks it's HTML, \c false |
138 | * if thinks it is plain text |
139 | */ |
140 | SYNDICATION_EXPORT |
141 | bool isHtml(const QString &str); |
142 | |
143 | /*! |
144 | * guesses whether a string contains (HTML) markup or not. This |
145 | * implements not an exact check for valid HTML markup, but a |
146 | * simple (and relatively fast) heuristic. |
147 | * |
148 | * \a str the string that might or might not contain markup |
149 | * |
150 | * Returns \c true if the heuristic thinks it contains markup, \c false |
151 | * if thinks it is markup-free plain text |
152 | */ |
153 | SYNDICATION_EXPORT |
154 | bool stringContainsMarkup(const QString &str); |
155 | |
156 | /*! |
157 | * Ensures HTML formatting for a string. |
158 | * |
159 | * guesses via isHtml() if \a str contains HTML or plain text, and returns |
160 | * plainTextToHtml(str) if it thinks it is plain text, or the unmodified \a str |
161 | * otherwise. |
162 | * |
163 | * \a str a string with unknown content |
164 | * Returns string as HTML (as long as the heuristics work) |
165 | */ |
166 | SYNDICATION_EXPORT |
167 | QString normalize(const QString &str); |
168 | |
169 | /*! |
170 | * normalizes a string based on feed-wide properties of tag content. |
171 | * It is based on the assumption that all items in a feed encode their |
172 | * title/description content in the same way (CDATA or not, plain text |
173 | * vs. HTML). isCDATA and containsMarkup are determined once by the feed, |
174 | * and then passed to this method. |
175 | * |
176 | * The returned string contains HTML, with special characters <, >, |
177 | * &, ", and ' escaped, and all other entities resolved. |
178 | * Whitespace is collapsed, relevant whitespace is replaced by respective |
179 | * HTML tags (<br/>). |
180 | * |
181 | * \a str a string |
182 | * |
183 | * \a isCDATA whether the feed uses CDATA for the tag \a str was read from |
184 | * |
185 | * \a containsMarkup whether the feed uses HTML markup in the |
186 | * tag \a str was read from. |
187 | * |
188 | * Returns string as HTML (as long as the heuristics work) |
189 | */ |
190 | SYNDICATION_EXPORT |
191 | QString normalize(const QString &str, bool isCDATA, bool containsMarkup); |
192 | |
193 | /*! |
194 | * Parses a person object from a string by identifying name and email address |
195 | * in the string. Currently detected variants are: |
196 | * "foo@bar.com", "Foo", "Foo <foo@bar.com>", "foo@bar.com (Foo)". |
197 | * |
198 | * \a str the string to parse the person from. |
199 | * |
200 | * Returns a Person object containing the parsed information. |
201 | */ |
202 | SYNDICATION_EXPORT |
203 | PersonPtr personFromString(const QString &str); |
204 | |
205 | /*! |
206 | * \internal |
207 | * calculates a hash value for a string |
208 | */ |
209 | unsigned int calcHash(const QString &str); |
210 | |
211 | /*! |
212 | * \internal |
213 | * calculates a hash value for a byte array |
214 | */ |
215 | unsigned int calcHash(const QByteArray &array); |
216 | |
217 | /*! |
218 | * \internal |
219 | * calculates a md5 checksum for a string |
220 | */ |
221 | QString calcMD5Sum(const QString &str); |
222 | |
223 | /*! |
224 | * \internal |
225 | * used internally to represent element types |
226 | */ |
227 | struct ElementType { |
228 | ElementType(const QString &localnamep, |
229 | const QString &nsp = QString()); // implicit |
230 | |
231 | bool operator==(const ElementType &other) const; |
232 | |
233 | QString ns; |
234 | QString localname; |
235 | }; |
236 | |
237 | } // namespace Syndication |
238 | |
239 | #endif // SYNDICATION_TOOLS_H |
240 | |