1 | /* |
2 | Implementation of the data protocol (rfc 2397) |
3 | |
4 | SPDX-FileCopyrightText: 2002, 2003 Leo Savernik <l.savernik@aon.at> |
5 | |
6 | SPDX-License-Identifier: LGPL-2.0-only |
7 | */ |
8 | |
9 | #include "dataprotocol_p.h" |
10 | |
11 | #include "global.h" |
12 | #include "metadata.h" |
13 | |
14 | #include <QByteArray> |
15 | #include <QStringDecoder> |
16 | |
17 | using namespace KIO; |
18 | |
19 | /** structure containing header information */ |
20 | struct { |
21 | QString ; // MIME type of content (lowercase) |
22 | MetaData ; // attribute/value pairs (attribute lowercase, |
23 | // value unchanged) |
24 | bool ; // true if data is base64 encoded |
25 | QByteArray ; // reference to decoded url |
26 | int ; // zero-indexed position within url |
27 | // where the real data begins. May point beyond |
28 | // the end to indicate that there is no data |
29 | }; |
30 | |
31 | /** returns the position of the first occurrence of any of the given |
32 | * characters @p c1 or comma (',') or semicolon (';') or buf.length() |
33 | * if none is contained. |
34 | * |
35 | * @param buf buffer where to look for c |
36 | * @param begin zero-indexed starting position |
37 | * @param c1 character to find or '\0' to ignore |
38 | */ |
39 | static int find(const QByteArray &buf, int begin, const char c1) |
40 | { |
41 | static const char comma = ','; |
42 | static const char semicolon = ';'; |
43 | int pos = begin; |
44 | int size = buf.length(); |
45 | while (pos < size) { |
46 | const char ch = buf[pos]; |
47 | if (ch == comma || ch == semicolon || (c1 != '\0' && ch == c1)) { |
48 | break; |
49 | } |
50 | pos++; |
51 | } /*wend*/ |
52 | return pos; |
53 | } |
54 | |
55 | /** extracts the string between the current position @p pos and the first |
56 | * occurrence of either @p c1 or comma (',') or semicolon (';') exclusively |
57 | * and updates @p pos to point at the found delimiter or at the end of the |
58 | * buffer if neither character occurred. |
59 | * @param buf buffer where to look for |
60 | * @param pos zero-indexed position within buffer |
61 | * @param c1 character to find or '\0' to ignore |
62 | */ |
63 | static inline QString (const QByteArray &buf, int &pos, const char c1 = '\0') |
64 | { |
65 | int oldpos = pos; |
66 | pos = find(buf, begin: oldpos, c1); |
67 | return QString::fromLatin1(ba: buf.mid(index: oldpos, len: pos - oldpos)); |
68 | } |
69 | |
70 | /** ignores all whitespaces |
71 | * @param buf buffer to operate on |
72 | * @param pos position to shift to first non-whitespace character |
73 | * Upon return @p pos will either point to the first non-whitespace |
74 | * character or to the end of the buffer. |
75 | */ |
76 | static inline void ignoreWS(const QByteArray &buf, int &pos) |
77 | { |
78 | int size = buf.length(); |
79 | while (pos < size && (buf[pos] == ' ' || buf[pos] == '\t')) { |
80 | ++pos; |
81 | } |
82 | } |
83 | |
84 | /** parses a quoted string as per rfc 822. |
85 | * |
86 | * If trailing quote is missing, the whole rest of the buffer is returned. |
87 | * @param buf buffer to operate on |
88 | * @param pos position pointing to the leading quote |
89 | * @return the extracted string. @p pos will be updated to point to the |
90 | * character following the trailing quote. |
91 | */ |
92 | static QString parseQuotedString(const QByteArray &buf, int &pos) |
93 | { |
94 | int size = buf.length(); |
95 | QString res; |
96 | res.reserve(asize: size); // can't be larger than buf |
97 | pos++; // jump over leading quote |
98 | bool escaped = false; // if true means next character is literal |
99 | bool parsing = true; // true as long as end quote not found |
100 | while (parsing && pos < size) { |
101 | const QChar ch = QLatin1Char(buf[pos++]); |
102 | if (escaped) { |
103 | res += ch; |
104 | escaped = false; |
105 | } else { |
106 | switch (ch.unicode()) { |
107 | case '"': |
108 | parsing = false; |
109 | break; |
110 | case '\\': |
111 | escaped = true; |
112 | break; |
113 | default: |
114 | res += ch; |
115 | break; |
116 | } /*end switch*/ |
117 | } /*end if*/ |
118 | } /*wend*/ |
119 | res.squeeze(); |
120 | return res; |
121 | } |
122 | |
123 | /** parses the header of a data url |
124 | * @param url the data url |
125 | * @param mimeOnly if the only interesting information is the MIME type |
126 | * @return DataHeader structure with the header information |
127 | */ |
128 | static DataHeader (const QUrl &url, const bool mimeOnly) |
129 | { |
130 | DataHeader ; |
131 | |
132 | // initialize header info members |
133 | header_info.mime_type = QStringLiteral("text/plain" ); |
134 | header_info.attributes.insert(QStringLiteral("charset" ), QStringLiteral("us-ascii" )); |
135 | header_info.is_base64 = false; |
136 | |
137 | // decode url and save it |
138 | const QByteArray &raw_url = header_info.url = QByteArray::fromPercentEncoding(pctEncoded: url.path(options: QUrl::FullyEncoded).toLatin1()); |
139 | const int raw_url_len = raw_url.length(); |
140 | |
141 | header_info.data_offset = 0; |
142 | |
143 | // read MIME type |
144 | if (raw_url_len == 0) { |
145 | return header_info; |
146 | } |
147 | const QString mime_type = extract(buf: raw_url, pos&: header_info.data_offset).trimmed(); |
148 | if (!mime_type.isEmpty()) { |
149 | header_info.mime_type = mime_type; |
150 | } |
151 | if (mimeOnly) { |
152 | return header_info; |
153 | } |
154 | |
155 | if (header_info.data_offset >= raw_url_len) { |
156 | return header_info; |
157 | } |
158 | // jump over delimiter token and return if data reached |
159 | if (raw_url[header_info.data_offset++] == ',') { |
160 | return header_info; |
161 | } |
162 | |
163 | // read all attributes and store them |
164 | bool data_begin_reached = false; |
165 | while (!data_begin_reached && header_info.data_offset < raw_url_len) { |
166 | // read attribute |
167 | const QString attribute = extract(buf: raw_url, pos&: header_info.data_offset, c1: '=').trimmed(); |
168 | if (header_info.data_offset >= raw_url_len || raw_url[header_info.data_offset] != '=') { |
169 | // no assignment, must be base64 option |
170 | if (attribute == QLatin1String("base64" )) { |
171 | header_info.is_base64 = true; |
172 | } |
173 | } else { |
174 | header_info.data_offset++; // jump over '=' token |
175 | |
176 | // read value |
177 | ignoreWS(buf: raw_url, pos&: header_info.data_offset); |
178 | if (header_info.data_offset >= raw_url_len) { |
179 | return header_info; |
180 | } |
181 | |
182 | QString value; |
183 | if (raw_url[header_info.data_offset] == '"') { |
184 | value = parseQuotedString(buf: raw_url, pos&: header_info.data_offset); |
185 | ignoreWS(buf: raw_url, pos&: header_info.data_offset); |
186 | } else { |
187 | value = extract(buf: raw_url, pos&: header_info.data_offset).trimmed(); |
188 | } |
189 | |
190 | // add attribute to map |
191 | header_info.attributes[attribute.toLower()] = value; |
192 | |
193 | } /*end if*/ |
194 | if (header_info.data_offset < raw_url_len && raw_url[header_info.data_offset] == ',') { |
195 | data_begin_reached = true; |
196 | } |
197 | header_info.data_offset++; // jump over separator token |
198 | } /*wend*/ |
199 | |
200 | return header_info; |
201 | } |
202 | |
203 | DataProtocol::DataProtocol() |
204 | { |
205 | } |
206 | |
207 | DataProtocol::~DataProtocol() = default; |
208 | |
209 | void DataProtocol::get(const QUrl &url) |
210 | { |
211 | ref(); |
212 | // qDebug() << this; |
213 | |
214 | const DataHeader hdr = parseDataHeader(url, mimeOnly: false); |
215 | |
216 | const int size = hdr.url.length(); |
217 | const int data_ofs = qMin(a: hdr.data_offset, b: size); |
218 | // FIXME: string is copied, would be nice if we could have a reference only |
219 | const QByteArray url_data = hdr.url.mid(index: data_ofs); |
220 | QByteArray outData; |
221 | |
222 | if (hdr.is_base64) { |
223 | // base64 stuff is expected to contain the correct charset, so we just |
224 | // decode it and pass it to the receiver |
225 | outData = QByteArray::fromBase64(base64: url_data); |
226 | } else { |
227 | QStringDecoder codec(hdr.attributes[QStringLiteral("charset" )].toLatin1().constData()); |
228 | if (codec.isValid()) { |
229 | outData = QString(codec.decode(ba: url_data)).toUtf8(); |
230 | } else { |
231 | outData = url_data; |
232 | } /*end if*/ |
233 | } /*end if*/ |
234 | |
235 | // qDebug() << "emit mimeType@"<<this; |
236 | Q_EMIT mimeType(hdr.mime_type); |
237 | // qDebug() << "emit totalSize@"<<this; |
238 | Q_EMIT totalSize(outData.size()); |
239 | |
240 | // qDebug() << "emit setMetaData@"<<this; |
241 | setAllMetaData(hdr.attributes); |
242 | |
243 | // qDebug() << "emit sendMetaData@"<<this; |
244 | sendMetaData(); |
245 | // qDebug() << "(1) queue size " << dispatchQueue.size(); |
246 | // empiric studies have shown that this shouldn't be queued & dispatched |
247 | Q_EMIT data(outData); |
248 | // qDebug() << "(2) queue size " << dispatchQueue.size(); |
249 | dispatch_data(ba: QByteArray{}); |
250 | // qDebug() << "(3) queue size " << dispatchQueue.size(); |
251 | dispatch_finished(); |
252 | // qDebug() << "(4) queue size " << dispatchQueue.size(); |
253 | deref(); |
254 | } |
255 | |
256 | /* --------------------------------------------------------------------- */ |
257 | |
258 | void DataProtocol::mimetype(const QUrl &url) |
259 | { |
260 | ref(); |
261 | Q_EMIT mimeType(parseDataHeader(url, mimeOnly: true).mime_type); |
262 | Q_EMIT finished(); |
263 | deref(); |
264 | } |
265 | |
266 | /* --------------------------------------------------------------------- */ |
267 | |
268 | #if !defined(TESTKIO) |
269 | #include "moc_dataprotocol_p.cpp" |
270 | #endif |
271 | |