1/*
2 This file is part of the syndication library
3 SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8#ifndef SYNDICATION_TOOLS_H
9#define SYNDICATION_TOOLS_H
10
11#include "person.h"
12#include "syndication_export.h"
13
14#include <QString>
15
16class QByteArray;
17class QString;
18
19namespace Syndication
20{
21/*!
22 * date formats supported by date parsers
23 *
24 * \value ISODate ISO 8601 extended format. (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25", datetime with timezone: "2003-12-13T18:30:02.25+01:00")
25 * \value RFCDate RFC 822. (e.g. "Sat, 07 Sep 2002 00:00:01 GMT")
26 */
27enum DateFormat {
28 ISODate,
29 RFCDate,
30};
31
32/*!
33 * parses a date string in ISO 8601 extended format.
34 * (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25",
35 * datetime with timezone: "2003-12-13T18:30:02.25+01:00")
36 *
37 * \a str a string in ISO 8601 format
38 *
39 * Returns parsed date in seconds since epoch, 0 if no date could
40 * be parsed from the string.
41 */
42SYNDICATION_EXPORT
43uint parseISODate(const QString &str);
44
45/*!
46 * parses a date string as defined in RFC 822.
47 * (Sat, 07 Sep 2002 00:00:01 GMT)
48 *
49 * \a str a string in RFC 822 format
50 *
51 * Returns parsed date in seconds since epoch, 0 if no date could
52 * be parsed from the string.
53 */
54SYNDICATION_EXPORT
55uint parseRFCDate(const QString &str);
56
57/*!
58 * parses a date string in ISO (see parseISODate()) or RFC 822 (see
59 * parseRFCDate()) format.
60 *
61 * It tries both parsers and returns the first valid parsing result found (or 0
62 * otherwise).
63 *
64 * To speed up parsing, you can give a hint which format you expect.
65 * The method will try the corresponding parser first then.
66 *
67 * \a str a date string
68 *
69 * \a hint the expected format
70 *
71 * Returns parsed date in seconds since epoch, 0 if no date could
72 * be parsed from the string.
73 */
74SYNDICATION_EXPORT
75uint parseDate(const QString &str, DateFormat hint = RFCDate);
76
77/*!
78 * \internal
79 * returns a string representation of a datetime.
80 * this is used internally to create debugging output.
81 *
82 * \a date the date to convert
83 *
84 * Returns string representation of the date, or a null string if
85 * \a date is 0
86 */
87SYNDICATION_EXPORT
88QString dateTimeToString(uint date);
89
90/*!
91 * resolves entities to respective unicode chars.
92 *
93 * \a str a string
94 */
95SYNDICATION_EXPORT
96QString resolveEntities(const QString &str);
97/*!
98 * replaces the characters <, >, &, ", '
99 * with &lt; &gt; &amp;, &quot; &apos;.
100 *
101 * \a str the string to escape
102 */
103SYNDICATION_EXPORT
104QString escapeSpecialCharacters(const QString &str);
105
106/*!
107 * replaces newlines ("\n") by <br/>
108 *
109 * \a str string to convert
110 */
111SYNDICATION_EXPORT
112QString convertNewlines(const QString &str);
113
114/*!
115 * converts a plain text string to HTML
116 *
117 * \a plainText a string in plain text.
118 */
119SYNDICATION_EXPORT
120QString plainTextToHtml(const QString &plainText);
121
122/*!
123 * converts a HTML string to plain text
124 *
125 * \a html string in HTML format
126 *
127 * Returns stripped text
128 */
129SYNDICATION_EXPORT
130QString htmlToPlainText(const QString &html);
131
132/*!
133 * guesses whether a string contains plain text or HTML
134 *
135 * \a str the string in unknown format
136 *
137 * Returns \c true if the heuristic thinks it's HTML, \c false
138 * if thinks it is plain text
139 */
140SYNDICATION_EXPORT
141bool isHtml(const QString &str);
142
143/*!
144 * guesses whether a string contains (HTML) markup or not. This
145 * implements not an exact check for valid HTML markup, but a
146 * simple (and relatively fast) heuristic.
147 *
148 * \a str the string that might or might not contain markup
149 *
150 * Returns \c true if the heuristic thinks it contains markup, \c false
151 * if thinks it is markup-free plain text
152 */
153SYNDICATION_EXPORT
154bool stringContainsMarkup(const QString &str);
155
156/*!
157 * Ensures HTML formatting for a string.
158 *
159 * guesses via isHtml() if \a str contains HTML or plain text, and returns
160 * plainTextToHtml(str) if it thinks it is plain text, or the unmodified \a str
161 * otherwise.
162 *
163 * \a str a string with unknown content
164 * Returns string as HTML (as long as the heuristics work)
165 */
166SYNDICATION_EXPORT
167QString normalize(const QString &str);
168
169/*!
170 * normalizes a string based on feed-wide properties of tag content.
171 * It is based on the assumption that all items in a feed encode their
172 * title/description content in the same way (CDATA or not, plain text
173 * vs. HTML). isCDATA and containsMarkup are determined once by the feed,
174 * and then passed to this method.
175 *
176 * The returned string contains HTML, with special characters <, >,
177 * &, ", and ' escaped, and all other entities resolved.
178 * Whitespace is collapsed, relevant whitespace is replaced by respective
179 * HTML tags (<br/>).
180 *
181 * \a str a string
182 *
183 * \a isCDATA whether the feed uses CDATA for the tag \a str was read from
184 *
185 * \a containsMarkup whether the feed uses HTML markup in the
186 * tag \a str was read from.
187 *
188 * Returns string as HTML (as long as the heuristics work)
189 */
190SYNDICATION_EXPORT
191QString normalize(const QString &str, bool isCDATA, bool containsMarkup);
192
193/*!
194 * Parses a person object from a string by identifying name and email address
195 * in the string. Currently detected variants are:
196 * "foo@bar.com", "Foo", "Foo <foo@bar.com>", "foo@bar.com (Foo)".
197 *
198 * \a str the string to parse the person from.
199 *
200 * Returns a Person object containing the parsed information.
201 */
202SYNDICATION_EXPORT
203PersonPtr personFromString(const QString &str);
204
205/*!
206 * \internal
207 * calculates a hash value for a string
208 */
209unsigned int calcHash(const QString &str);
210
211/*!
212 * \internal
213 * calculates a hash value for a byte array
214 */
215unsigned int calcHash(const QByteArray &array);
216
217/*!
218 * \internal
219 * calculates a md5 checksum for a string
220 */
221QString calcMD5Sum(const QString &str);
222
223/*!
224 * \internal
225 * used internally to represent element types
226 */
227struct ElementType {
228 ElementType(const QString &localnamep,
229 const QString &nsp = QString()); // implicit
230
231 bool operator==(const ElementType &other) const;
232
233 QString ns;
234 QString localname;
235};
236
237} // namespace Syndication
238
239#endif // SYNDICATION_TOOLS_H
240

source code of syndication/src/tools.h