1/*
2 Implementation of the data protocol (rfc 2397)
3
4 SPDX-FileCopyrightText: 2002, 2003 Leo Savernik <l.savernik@aon.at>
5
6 SPDX-License-Identifier: LGPL-2.0-only
7*/
8
9#include "dataprotocol_p.h"
10
11#include "global.h"
12#include "metadata.h"
13
14#include <QByteArray>
15#include <QStringDecoder>
16
17using namespace KIO;
18
19/** structure containing header information */
20struct DataHeader {
21 QString mime_type; // MIME type of content (lowercase)
22 MetaData attributes; // attribute/value pairs (attribute lowercase,
23 // value unchanged)
24 bool is_base64; // true if data is base64 encoded
25 QByteArray url; // reference to decoded url
26 int data_offset; // zero-indexed position within url
27 // where the real data begins. May point beyond
28 // the end to indicate that there is no data
29};
30
31/** returns the position of the first occurrence of any of the given
32 * characters @p c1 or comma (',') or semicolon (';') or buf.length()
33 * if none is contained.
34 *
35 * @param buf buffer where to look for c
36 * @param begin zero-indexed starting position
37 * @param c1 character to find or '\0' to ignore
38 */
39static int find(const QByteArray &buf, int begin, const char c1)
40{
41 static const char comma = ',';
42 static const char semicolon = ';';
43 int pos = begin;
44 int size = buf.length();
45 while (pos < size) {
46 const char ch = buf[pos];
47 if (ch == comma || ch == semicolon || (c1 != '\0' && ch == c1)) {
48 break;
49 }
50 pos++;
51 } /*wend*/
52 return pos;
53}
54
55/** extracts the string between the current position @p pos and the first
56 * occurrence of either @p c1 or comma (',') or semicolon (';') exclusively
57 * and updates @p pos to point at the found delimiter or at the end of the
58 * buffer if neither character occurred.
59 * @param buf buffer where to look for
60 * @param pos zero-indexed position within buffer
61 * @param c1 character to find or '\0' to ignore
62 */
63static inline QString extract(const QByteArray &buf, int &pos, const char c1 = '\0')
64{
65 int oldpos = pos;
66 pos = find(buf, begin: oldpos, c1);
67 return QString::fromLatin1(ba: buf.mid(index: oldpos, len: pos - oldpos));
68}
69
70/** ignores all whitespaces
71 * @param buf buffer to operate on
72 * @param pos position to shift to first non-whitespace character
73 * Upon return @p pos will either point to the first non-whitespace
74 * character or to the end of the buffer.
75 */
76static inline void ignoreWS(const QByteArray &buf, int &pos)
77{
78 int size = buf.length();
79 while (pos < size && (buf[pos] == ' ' || buf[pos] == '\t')) {
80 ++pos;
81 }
82}
83
84/** parses a quoted string as per rfc 822.
85 *
86 * If trailing quote is missing, the whole rest of the buffer is returned.
87 * @param buf buffer to operate on
88 * @param pos position pointing to the leading quote
89 * @return the extracted string. @p pos will be updated to point to the
90 * character following the trailing quote.
91 */
92static QString parseQuotedString(const QByteArray &buf, int &pos)
93{
94 int size = buf.length();
95 QString res;
96 res.reserve(asize: size); // can't be larger than buf
97 pos++; // jump over leading quote
98 bool escaped = false; // if true means next character is literal
99 bool parsing = true; // true as long as end quote not found
100 while (parsing && pos < size) {
101 const QChar ch = QLatin1Char(buf[pos++]);
102 if (escaped) {
103 res += ch;
104 escaped = false;
105 } else {
106 switch (ch.unicode()) {
107 case '"':
108 parsing = false;
109 break;
110 case '\\':
111 escaped = true;
112 break;
113 default:
114 res += ch;
115 break;
116 } /*end switch*/
117 } /*end if*/
118 } /*wend*/
119 res.squeeze();
120 return res;
121}
122
123/** parses the header of a data url
124 * @param url the data url
125 * @param mimeOnly if the only interesting information is the MIME type
126 * @return DataHeader structure with the header information
127 */
128static DataHeader parseDataHeader(const QUrl &url, const bool mimeOnly)
129{
130 DataHeader header_info;
131
132 // initialize header info members
133 header_info.mime_type = QStringLiteral("text/plain");
134 header_info.attributes.insert(QStringLiteral("charset"), QStringLiteral("us-ascii"));
135 header_info.is_base64 = false;
136
137 // decode url and save it
138 const QByteArray &raw_url = header_info.url = QByteArray::fromPercentEncoding(pctEncoded: url.path(options: QUrl::FullyEncoded).toLatin1());
139 const int raw_url_len = raw_url.length();
140
141 header_info.data_offset = 0;
142
143 // read MIME type
144 if (raw_url_len == 0) {
145 return header_info;
146 }
147 const QString mime_type = extract(buf: raw_url, pos&: header_info.data_offset).trimmed();
148 if (!mime_type.isEmpty()) {
149 header_info.mime_type = mime_type;
150 }
151 if (mimeOnly) {
152 return header_info;
153 }
154
155 if (header_info.data_offset >= raw_url_len) {
156 return header_info;
157 }
158 // jump over delimiter token and return if data reached
159 if (raw_url[header_info.data_offset++] == ',') {
160 return header_info;
161 }
162
163 // read all attributes and store them
164 bool data_begin_reached = false;
165 while (!data_begin_reached && header_info.data_offset < raw_url_len) {
166 // read attribute
167 const QString attribute = extract(buf: raw_url, pos&: header_info.data_offset, c1: '=').trimmed();
168 if (header_info.data_offset >= raw_url_len || raw_url[header_info.data_offset] != '=') {
169 // no assignment, must be base64 option
170 if (attribute == QLatin1String("base64")) {
171 header_info.is_base64 = true;
172 }
173 } else {
174 header_info.data_offset++; // jump over '=' token
175
176 // read value
177 ignoreWS(buf: raw_url, pos&: header_info.data_offset);
178 if (header_info.data_offset >= raw_url_len) {
179 return header_info;
180 }
181
182 QString value;
183 if (raw_url[header_info.data_offset] == '"') {
184 value = parseQuotedString(buf: raw_url, pos&: header_info.data_offset);
185 ignoreWS(buf: raw_url, pos&: header_info.data_offset);
186 } else {
187 value = extract(buf: raw_url, pos&: header_info.data_offset).trimmed();
188 }
189
190 // add attribute to map
191 header_info.attributes[attribute.toLower()] = value;
192
193 } /*end if*/
194 if (header_info.data_offset < raw_url_len && raw_url[header_info.data_offset] == ',') {
195 data_begin_reached = true;
196 }
197 header_info.data_offset++; // jump over separator token
198 } /*wend*/
199
200 return header_info;
201}
202
203DataProtocol::DataProtocol()
204{
205}
206
207DataProtocol::~DataProtocol() = default;
208
209void DataProtocol::get(const QUrl &url)
210{
211 ref();
212 // qDebug() << this;
213
214 const DataHeader hdr = parseDataHeader(url, mimeOnly: false);
215
216 const int size = hdr.url.length();
217 const int data_ofs = qMin(a: hdr.data_offset, b: size);
218 // FIXME: string is copied, would be nice if we could have a reference only
219 const QByteArray url_data = hdr.url.mid(index: data_ofs);
220 QByteArray outData;
221
222 if (hdr.is_base64) {
223 // base64 stuff is expected to contain the correct charset, so we just
224 // decode it and pass it to the receiver
225 outData = QByteArray::fromBase64(base64: url_data);
226 } else {
227 QStringDecoder codec(hdr.attributes[QStringLiteral("charset")].toLatin1().constData());
228 if (codec.isValid()) {
229 outData = QString(codec.decode(ba: url_data)).toUtf8();
230 } else {
231 outData = url_data;
232 } /*end if*/
233 } /*end if*/
234
235 // qDebug() << "emit mimeType@"<<this;
236 Q_EMIT mimeType(hdr.mime_type);
237 // qDebug() << "emit totalSize@"<<this;
238 Q_EMIT totalSize(outData.size());
239
240 // qDebug() << "emit setMetaData@"<<this;
241 setAllMetaData(hdr.attributes);
242
243 // qDebug() << "emit sendMetaData@"<<this;
244 sendMetaData();
245 // qDebug() << "(1) queue size " << dispatchQueue.size();
246 // empiric studies have shown that this shouldn't be queued & dispatched
247 Q_EMIT data(outData);
248 // qDebug() << "(2) queue size " << dispatchQueue.size();
249 dispatch_data(ba: QByteArray{});
250 // qDebug() << "(3) queue size " << dispatchQueue.size();
251 dispatch_finished();
252 // qDebug() << "(4) queue size " << dispatchQueue.size();
253 deref();
254}
255
256/* --------------------------------------------------------------------- */
257
258void DataProtocol::mimetype(const QUrl &url)
259{
260 ref();
261 Q_EMIT mimeType(parseDataHeader(url, mimeOnly: true).mime_type);
262 Q_EMIT finished();
263 deref();
264}
265
266/* --------------------------------------------------------------------- */
267
268#if !defined(TESTKIO)
269#include "moc_dataprotocol_p.cpp"
270#endif
271

source code of kio/src/core/dataprotocol.cpp