dataprotocol.cpp source code [kio/src/core/dataprotocol.cpp]

1	/*
2	Implementation of the data protocol (rfc 2397)
3
4	SPDX-FileCopyrightText: 2002, 2003 Leo Savernik <l.savernik@aon.at>
5
6	SPDX-License-Identifier: LGPL-2.0-only
7	*/
8
9	#include "dataprotocol_p.h"
10
11	#include "global.h"
12	#include "metadata.h"
13
14	#include <QByteArray>
15	#include <QStringDecoder>
16
17	using namespace KIO;
18
19	/ structure containing header information /
20	struct DataHeader {
21	QString mime_type; // MIME type of content (lowercase)
22	MetaData attributes; // attribute/value pairs (attribute lowercase,
23	// value unchanged)
24	bool is_base64; // true if data is base64 encoded
25	QByteArray url; // reference to decoded url
26	int data_offset; // zero-indexed position within url
27	// where the real data begins. May point beyond
28	// the end to indicate that there is no data
29	};
30
31	/ returns the position of the first occurrence of any of the given*
32	* characters c1 or comma (',') or semicolon (';') or buf.length()
33	* if none is contained.
34	*
35	* \a buf buffer where to look for c
36	* \a begin zero-indexed starting position
37	* \a c1 character to find or '\0' to ignore
38	*/
39	static int find(const QByteArray &buf, int begin, const char c1)
40	{
41	static const char comma = `','`;
42	static const char semicolon = `';'`;
43	int pos = begin;
44	int size = buf.length();
45	while (pos < size) {
46	const char ch = buf[pos];
47	if (ch == comma \|\| ch == semicolon \|\| (c1 != `'\0'` && ch == c1)) {
48	break;
49	}
50	pos++;
51	} /wend/
52	return pos;
53	}
54
55	/ extracts the string between the current position pos and the first*
56	* occurrence of either c1 or comma (',') or semicolon (';') exclusively
57	* and updates pos to point at the found delimiter or at the end of the
58	* buffer if neither character occurred.
59	* \a buf buffer where to look for
60	* \a pos zero-indexed position within buffer
61	* \a c1 character to find or '\0' to ignore
62	*/
63	static inline QString extract(const QByteArray &buf, int &pos, const char c1 = `'\0'`)
64	{
65	int oldpos = pos;
66	pos = find(buf, begin: oldpos, c1);
67	return QString::fromLatin1(ba: buf.mid(index: oldpos, len: pos - oldpos));
68	}
69
70	/ ignores all whitespaces*
71	* \a buf buffer to operate on
72	* \a pos position to shift to first non-whitespace character
73	* Upon return pos will either point to the first non-whitespace
74	* character or to the end of the buffer.
75	*/
76	static inline void ignoreWS(const QByteArray &buf, int &pos)
77	{
78	int size = buf.length();
79	while (pos < size && (buf[pos] == `' '` \|\| buf[pos] == `'\t'`)) {
80	++pos;
81	}
82	}
83
84	/ parses a quoted string as per rfc 822.*
85	*
86	* If trailing quote is missing, the whole rest of the buffer is returned.
87	* \a buf buffer to operate on
88	* \a pos position pointing to the leading quote
89	* Returns the extracted string. pos will be updated to point to the
90	* character following the trailing quote.
91	*/
92	static QString parseQuotedString(const QByteArray &buf, int &pos)
93	{
94	int size = buf.length();
95	QString res;
96	res.reserve(asize: size); // can't be larger than buf
97	pos++; // jump over leading quote
98	bool escaped = false; // if true means next character is literal
99	bool parsing = true; // true as long as end quote not found
100	while (parsing && pos < size) {
101	const QChar ch = QLatin1Char(buf[pos++]);
102	if (escaped) {
103	res += ch;
104	escaped = false;
105	} else {
106	switch (ch.unicode()) {
107	case `'"'`:
108	parsing = false;
109	break;
110	case `'\\'`:
111	escaped = true;
112	break;
113	default:
114	res += ch;
115	break;
116	} /end switch/
117	} /end if/
118	} /wend/
119	res.squeeze();
120	return res;
121	}
122
123	/ parses the header of a data url*
124	* \a url the data url
125	* \a mimeOnly if the only interesting information is the MIME type
126	* Returns DataHeader structure with the header information
127	*/
128	static DataHeader parseDataHeader(const QUrl &url, const bool mimeOnly)
129	{
130	DataHeader header_info;
131
132	// initialize header info members
133	header_info.mime_type = QStringLiteral("text/plain");
134	header_info.attributes.insert(QStringLiteral("charset"), QStringLiteral("us-ascii"));
135	header_info.is_base64 = false;
136
137	// decode url and save it
138	const QByteArray &raw_url = header_info.url = QByteArray::fromPercentEncoding(pctEncoded: url.path(options: QUrl::FullyEncoded).toLatin1());
139	const int raw_url_len = raw_url.length();
140
141	header_info.data_offset = `0`;
142
143	// read MIME type
144	if (raw_url_len == `0`) {
145	return header_info;
146	}
147	const QString mime_type = extract(buf: raw_url, pos&: header_info.data_offset).trimmed();
148	if (!mime_type.isEmpty()) {
149	header_info.mime_type = mime_type;
150	}
151	if (mimeOnly) {
152	return header_info;
153	}
154
155	if (header_info.data_offset >= raw_url_len) {
156	return header_info;
157	}
158	// jump over delimiter token and return if data reached
159	if (raw_url[header_info.data_offset++] == `','`) {
160	return header_info;
161	}
162
163	// read all attributes and store them
164	bool data_begin_reached = false;
165	while (!data_begin_reached && header_info.data_offset < raw_url_len) {
166	// read attribute
167	const QString attribute = extract(buf: raw_url, pos&: header_info.data_offset, c1: `'='`).trimmed();
168	if (header_info.data_offset >= raw_url_len \|\| raw_url[header_info.data_offset] != `'='`) {
169	// no assignment, must be base64 option
170	if (attribute == QLatin1String("base64")) {
171	header_info.is_base64 = true;
172	}
173	} else {
174	header_info.data_offset++; // jump over '=' token
175
176	// read value
177	ignoreWS(buf: raw_url, pos&: header_info.data_offset);
178	if (header_info.data_offset >= raw_url_len) {
179	return header_info;
180	}
181
182	QString value;
183	if (raw_url[header_info.data_offset] == `'"'`) {
184	value = parseQuotedString(buf: raw_url, pos&: header_info.data_offset);
185	ignoreWS(buf: raw_url, pos&: header_info.data_offset);
186	} else {
187	value = extract(buf: raw_url, pos&: header_info.data_offset).trimmed();
188	}
189
190	// add attribute to map
191	header_info.attributes[attribute.toLower()] = value;
192
193	} /end if/
194	if (header_info.data_offset < raw_url_len && raw_url[header_info.data_offset] == `','`) {
195	data_begin_reached = true;
196	}
197	header_info.data_offset++; // jump over separator token
198	} /wend/
199
200	return header_info;
201	}
202
203	DataProtocol::DataProtocol()
204	{
205	}
206
207	DataProtocol::~DataProtocol() = default;
208
209	void DataProtocol::get(const QUrl &url)
210	{
211	ref();
212	// qDebug() << this;
213
214	const DataHeader hdr = parseDataHeader(url, mimeOnly: false);
215
216	const int size = hdr.url.length();
217	const int data_ofs = qMin(a: hdr.data_offset, b: size);
218	// FIXME: string is copied, would be nice if we could have a reference only
219	const QByteArray url_data = hdr.url.mid(index: data_ofs);
220	QByteArray outData;
221
222	if (hdr.is_base64) {
223	// base64 stuff is expected to contain the correct charset, so we just
224	// decode it and pass it to the receiver
225	outData = QByteArray::fromBase64(base64: url_data);
226	} else {
227	QStringDecoder codec(hdr.attributes[QStringLiteral("charset")].toLatin1().constData());
228	if (codec.isValid()) {
229	outData = QString(codec.decode(ba: url_data)).toUtf8();
230	} else {
231	outData = url_data;
232	} /end if/
233	} /end if/
234
235	// qDebug() << "emit mimeType@"<<this;
236	Q_EMIT mimeType(hdr.mime_type);
237	// qDebug() << "emit totalSize@"<<this;
238	Q_EMIT totalSize(outData.size());
239
240	// qDebug() << "emit setMetaData@"<<this;
241	setAllMetaData(hdr.attributes);
242
243	// qDebug() << "emit sendMetaData@"<<this;
244	sendMetaData();
245	// qDebug() << "(1) queue size " << dispatchQueue.size();
246	// empiric studies have shown that this shouldn't be queued & dispatched
247	Q_EMIT data(outData);
248	// qDebug() << "(2) queue size " << dispatchQueue.size();
249	dispatch_data(ba: QByteArray{});
250	// qDebug() << "(3) queue size " << dispatchQueue.size();
251	dispatch_finished();
252	// qDebug() << "(4) queue size " << dispatchQueue.size();
253	deref();
254	}
255
256	/ --------------------------------------------------------------------- /
257
258	void DataProtocol::mimetype(const QUrl &url)
259	{
260	ref();
261	Q_EMIT mimeType(parseDataHeader(url, mimeOnly: true).mime_type);
262	Q_EMIT finished();
263	deref();
264	}
265
266	/ --------------------------------------------------------------------- /
267
268	#if !defined(TESTKIO)
269	#include "moc_dataprotocol_p.cpp"
270	#endif
271

source code of kio/src/core/dataprotocol.cpp