1 | /* |
2 | This file is part of the syndication library |
3 | SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org> |
4 | |
5 | SPDX-License-Identifier: LGPL-2.0-or-later |
6 | */ |
7 | |
8 | #ifndef SYNDICATION_TOOLS_H |
9 | #define SYNDICATION_TOOLS_H |
10 | |
11 | #include "person.h" |
12 | #include "syndication_export.h" |
13 | |
14 | #include <QString> |
15 | |
16 | class QByteArray; |
17 | class QString; |
18 | |
19 | namespace Syndication |
20 | { |
21 | /** date formats supported by date parsers */ |
22 | |
23 | enum DateFormat { |
24 | ISODate, /**< ISO 8601 extended format. |
25 | * (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25", |
26 | * datetime with timezone: "2003-12-13T18:30:02.25+01:00") |
27 | */ |
28 | RFCDate, /**< RFC 822. (e.g. "Sat, 07 Sep 2002 00:00:01 GMT") */ |
29 | }; |
30 | |
31 | /** |
32 | * parses a date string in ISO 8601 extended format. |
33 | * (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25", |
34 | * datetime with timezone: "2003-12-13T18:30:02.25+01:00") |
35 | * |
36 | * @param str a string in ISO 8601 format |
37 | * @return parsed date in seconds since epoch, 0 if no date could |
38 | * be parsed from the string. |
39 | */ |
40 | SYNDICATION_EXPORT |
41 | uint parseISODate(const QString &str); |
42 | |
43 | /** |
44 | * parses a date string as defined in RFC 822. |
45 | * (Sat, 07 Sep 2002 00:00:01 GMT) |
46 | * |
47 | * @param str a string in RFC 822 format |
48 | * @return parsed date in seconds since epoch, 0 if no date could |
49 | * be parsed from the string. |
50 | */ |
51 | SYNDICATION_EXPORT |
52 | uint parseRFCDate(const QString &str); |
53 | |
54 | /** |
55 | * parses a date string in ISO (see parseISODate()) or RFC 822 (see |
56 | * parseRFCDate()) format. |
57 | * It tries both parsers and returns the first valid parsing result found (or 0 |
58 | * otherwise). |
59 | * To speed up parsing, you can give a hint which format you expect. |
60 | * The method will try the corresponding parser first then. |
61 | * |
62 | * @param str a date string |
63 | * @param hint the expected format |
64 | * @return parsed date in seconds since epoch, 0 if no date could |
65 | * be parsed from the string. |
66 | */ |
67 | SYNDICATION_EXPORT |
68 | uint parseDate(const QString &str, DateFormat hint = RFCDate); |
69 | |
70 | /** |
71 | * @internal |
72 | * returns a string representation of a datetime. |
73 | * this is used internally to create debugging output. |
74 | * |
75 | * @param date the date to convert |
76 | * @return string representation of the date, or a null string if |
77 | * @c date is 0 |
78 | */ |
79 | SYNDICATION_EXPORT |
80 | QString dateTimeToString(uint date); |
81 | |
82 | /** |
83 | * resolves entities to respective unicode chars. |
84 | * |
85 | * @param str a string |
86 | */ |
87 | SYNDICATION_EXPORT |
88 | QString resolveEntities(const QString &str); |
89 | /** |
90 | * replaces the characters < >, &, ", ' |
91 | * with &lt; &gt; &amp;, &quot; &apos;. |
92 | * @param str the string to escape |
93 | */ |
94 | SYNDICATION_EXPORT |
95 | QString escapeSpecialCharacters(const QString &str); |
96 | |
97 | /** |
98 | * replaces newlines ("\n") by <br/> |
99 | * @param str string to convert |
100 | */ |
101 | SYNDICATION_EXPORT |
102 | QString convertNewlines(const QString &str); |
103 | |
104 | /** |
105 | * converts a plain text string to HTML |
106 | * |
107 | * @param plainText a string in plain text. |
108 | */ |
109 | SYNDICATION_EXPORT |
110 | QString plainTextToHtml(const QString &plainText); |
111 | |
112 | /** |
113 | * converts a HTML string to plain text |
114 | * |
115 | * @param html string in HTML format |
116 | * @return stripped text |
117 | */ |
118 | SYNDICATION_EXPORT |
119 | QString htmlToPlainText(const QString &html); |
120 | |
121 | /** |
122 | * guesses whether a string contains plain text or HTML |
123 | * |
124 | * @param str the string in unknown format |
125 | * @return @c true if the heuristic thinks it's HTML, @c false |
126 | * if thinks it is plain text |
127 | */ |
128 | SYNDICATION_EXPORT |
129 | bool isHtml(const QString &str); |
130 | |
131 | /** |
132 | * guesses whether a string contains (HTML) markup or not. This |
133 | * implements not an exact check for valid HTML markup, but a |
134 | * simple (and relatively fast) heuristic. |
135 | * |
136 | * @param str the string that might or might not contain markup |
137 | * @return @c true if the heuristic thinks it contains markup, @c false |
138 | * if thinks it is markup-free plain text |
139 | */ |
140 | SYNDICATION_EXPORT |
141 | bool stringContainsMarkup(const QString &str); |
142 | |
143 | /** |
144 | * Ensures HTML formatting for a string. |
145 | * guesses via isHtml() if @c str contains HTML or plain text, and returns |
146 | * plainTextToHtml(str) if it thinks it is plain text, or the unmodified @c str |
147 | * otherwise. |
148 | * |
149 | * @param str a string with unknown content |
150 | * @return string as HTML (as long as the heuristics work) |
151 | */ |
152 | SYNDICATION_EXPORT |
153 | QString normalize(const QString &str); |
154 | |
155 | /** |
156 | * normalizes a string based on feed-wide properties of tag content. |
157 | * It is based on the assumption that all items in a feed encode their |
158 | * title/description content in the same way (CDATA or not, plain text |
159 | * vs. HTML). isCDATA and containsMarkup are determined once by the feed, |
160 | * and then passed to this method. |
161 | * |
162 | * The returned string contains HTML, with special characters <, >, |
163 | * &, ", and ' escaped, and all other entities resolved. |
164 | * Whitespace is collapsed, relevant whitespace is replaced by respective |
165 | * HTML tags (<br/>). |
166 | * |
167 | * @param str a string |
168 | * @param isCDATA whether the feed uses CDATA for the tag @c str was read from |
169 | * @param containsMarkup whether the feed uses HTML markup in the |
170 | * tag @c str was read from. |
171 | * @return string as HTML (as long as the heuristics work) |
172 | */ |
173 | SYNDICATION_EXPORT |
174 | QString normalize(const QString &str, bool isCDATA, bool containsMarkup); |
175 | |
176 | /** |
177 | * Parses a person object from a string by identifying name and email address |
178 | * in the string. Currently detected variants are: |
179 | * "foo@bar.com", "Foo", "Foo <foo@bar.com>", "foo@bar.com (Foo)". |
180 | * |
181 | * @param str the string to parse the person from. |
182 | * @return a Person object containing the parsed information. |
183 | */ |
184 | SYNDICATION_EXPORT |
185 | PersonPtr personFromString(const QString &str); |
186 | |
187 | /** |
188 | * @internal |
189 | * calculates a hash value for a string |
190 | */ |
191 | unsigned int calcHash(const QString &str); |
192 | |
193 | /** |
194 | * @internal |
195 | * calculates a hash value for a byte array |
196 | */ |
197 | unsigned int calcHash(const QByteArray &array); |
198 | |
199 | /** |
200 | * @internal |
201 | * calculates a md5 checksum for a string |
202 | */ |
203 | QString calcMD5Sum(const QString &str); |
204 | |
205 | //@cond PRIVATE |
206 | /** |
207 | * @internal |
208 | * used internally to represent element types |
209 | */ |
210 | struct ElementType { |
211 | ElementType(const QString &localnamep, |
212 | const QString &nsp = QString()); // implicit |
213 | |
214 | bool operator==(const ElementType &other) const; |
215 | |
216 | QString ns; |
217 | QString localname; |
218 | }; |
219 | //@endcond |
220 | |
221 | } // namespace Syndication |
222 | |
223 | #endif // SYNDICATION_TOOLS_H |
224 | |