1 | // Copyright (C) 2021 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qhttpmessagestreamparser_p.h" |
5 | |
6 | #include <QtCore/QtGlobal> |
7 | |
8 | QT_BEGIN_NAMESPACE |
9 | |
10 | using namespace Qt::StringLiterals; |
11 | |
12 | /*! |
13 | * \class QHttpMessageStreamParser |
14 | * \brief Decodes a stream of headers and payloads encoded according to rfc2616 (HTTP/1.1) |
15 | * |
16 | * It complains about invalid sequences, but is quite permissive in accepting them |
17 | */ |
18 | |
19 | QHttpMessageStreamParser::QHttpMessageStreamParser( |
20 | std::function<void(const QByteArray &, const QByteArray &)> headerHandler, |
21 | std::function<void(const QByteArray &body)> bodyHandler, |
22 | std::function<void(QtMsgType error, QString msg)> errorHandler, Mode mode) |
23 | : m_headerHandler(std::move(headerHandler)), |
24 | m_bodyHandler(std::move(bodyHandler)), |
25 | m_errorHandler(std::move(errorHandler)), |
26 | m_mode(mode) |
27 | { |
28 | } |
29 | |
30 | bool QHttpMessageStreamParser::receiveEof() |
31 | { |
32 | if (m_state != State::PreHeader) { |
33 | errorMessage(error: QtWarningMsg, msg: u"Partial message at end of file"_s ); |
34 | return false; |
35 | } |
36 | return true; |
37 | } |
38 | |
39 | void QHttpMessageStreamParser::receiveData(QByteArray data) |
40 | { |
41 | const char lf = '\n'; |
42 | const char cr = '\r'; |
43 | const char colon = ':'; |
44 | const char space = ' '; |
45 | const char tab = '\t'; |
46 | qsizetype dataPos = 0; |
47 | bool didAdvance = false; |
48 | auto advance = [&]() { |
49 | data = data.mid(index: dataPos); |
50 | dataPos = 0; |
51 | didAdvance = true; |
52 | }; |
53 | while (dataPos < data.size()) { |
54 | switch (m_state) { |
55 | case State::PreHeader: |
56 | switch (data.at(i: dataPos)) { |
57 | case lf: |
58 | errorMessage(error: QtWarningMsg, |
59 | QStringLiteral("Unexpected newline without preceding carriage " |
60 | "return at start of headers" ) |
61 | .arg(a: QString::fromUtf8(ba: m_currentHeaderField))); |
62 | m_state = State::AfterCrLf; |
63 | ++dataPos; |
64 | continue; |
65 | case cr: |
66 | m_state = State::AfterCr; |
67 | ++dataPos; |
68 | continue; |
69 | case tab: |
70 | case space: |
71 | errorMessage(error: QtWarningMsg, |
72 | msg: u"Unexpected space at start of headers, skipping"_s .arg( |
73 | a: QString::fromUtf8(ba: m_currentHeaderField))); |
74 | while (dataPos < data.size()) { |
75 | char c = data.at(i: ++dataPos); |
76 | if (c != space && c != tab) { |
77 | advance(); |
78 | m_state = State::InHeaderField; |
79 | break; |
80 | } |
81 | } |
82 | break; |
83 | default: |
84 | m_state = State::InHeaderField; |
85 | break; |
86 | } |
87 | Q_ASSERT(m_currentHeaderField.isEmpty() && m_currentHeaderValue.isEmpty()); |
88 | break; |
89 | case State::InHeaderField: { |
90 | didAdvance = false; |
91 | while (!didAdvance) { |
92 | char c = data.at(i: dataPos); |
93 | switch (c) { |
94 | case lf: |
95 | m_currentHeaderField.append(a: data.mid(index: 0, len: dataPos)); |
96 | errorMessage( |
97 | error: QtWarningMsg, |
98 | msg: u"Unexpected carriage return without newline in unterminated header %1"_s |
99 | .arg(a: QString::fromUtf8(ba: m_currentHeaderField))); |
100 | |
101 | m_state = State::AfterCrLf; |
102 | advance(); |
103 | ++dataPos; |
104 | break; |
105 | case cr: |
106 | m_state = State::AfterCr; |
107 | m_currentHeaderField.append(a: data.mid(index: 0, len: dataPos)); |
108 | errorMessage(error: QtWarningMsg, |
109 | msg: u"Newline before colon in header %1"_s .arg( |
110 | a: QString::fromUtf8(ba: m_currentHeaderField))); |
111 | advance(); |
112 | ++dataPos; |
113 | break; |
114 | case colon: |
115 | m_currentHeaderField.append(a: data.mid(index: 0, len: dataPos)); |
116 | m_state = State::HeaderValueSpace; |
117 | ++dataPos; |
118 | advance(); |
119 | break; |
120 | case space: |
121 | case tab: |
122 | errorMessage(error: QtWarningMsg, msg: u"Space in header field name"_s ); |
123 | Q_FALLTHROUGH(); |
124 | default: |
125 | if (++dataPos == data.size()) { |
126 | m_currentHeaderField.append(a: data); |
127 | return; |
128 | } |
129 | break; |
130 | } |
131 | } |
132 | } break; |
133 | case State::HeaderValueSpace: |
134 | while (dataPos < data.size()) { |
135 | char c = data.at(i: dataPos); |
136 | if (c != space && c != tab) { |
137 | advance(); |
138 | m_state = State::InHeaderValue; |
139 | m_currentHeaderValue.clear(); |
140 | break; |
141 | } |
142 | ++dataPos; |
143 | } |
144 | break; |
145 | case State::InHeaderValue: { |
146 | didAdvance = false; |
147 | while (!didAdvance) { |
148 | char c = data.at(i: dataPos); |
149 | switch (c) { |
150 | case lf: |
151 | m_currentHeaderValue.append(a: data.mid(index: 0, len: dataPos)); |
152 | errorMessage(error: QtWarningMsg, |
153 | QStringLiteral("Unexpected newline without preceding " |
154 | "carriage return in header %1" ) |
155 | .arg(a: QString::fromUtf8(ba: m_currentHeaderField))); |
156 | |
157 | m_state = State::AfterCrLf; |
158 | advance(); |
159 | ++dataPos; |
160 | break; |
161 | case cr: |
162 | m_currentHeaderValue.append(a: data.mid(index: 0, len: dataPos)); |
163 | m_state = State::AfterCr; |
164 | advance(); |
165 | ++dataPos; |
166 | break; |
167 | default: |
168 | if (++dataPos == data.size()) { |
169 | m_currentHeaderValue.append(a: data); |
170 | return; |
171 | } |
172 | break; |
173 | } |
174 | } |
175 | } break; |
176 | case State::AfterCr: { |
177 | char c = data.at(i: dataPos); |
178 | switch (c) { |
179 | case lf: |
180 | m_state = State::AfterCrLf; |
181 | ++dataPos; |
182 | break; |
183 | case cr: |
184 | errorMessage(error: QtWarningMsg, |
185 | QStringLiteral("Double carriage return encountred, interpreting it as " |
186 | "header end after header %1" ) |
187 | .arg(a: QString::fromUtf8(ba: m_currentHeaderField))); |
188 | m_currentPacket.clear(); |
189 | m_currentPacketSize = 0; |
190 | ++dataPos; |
191 | advance(); |
192 | m_state = State::InBody; |
193 | callHasHeader(); |
194 | break; |
195 | case space: |
196 | case tab: |
197 | errorMessage( |
198 | error: QtWarningMsg, |
199 | msg: u"Unexpected carriage return without following newline in header %1"_s .arg( |
200 | a: QString::fromUtf8(ba: m_currentHeaderField))); |
201 | m_state = State::InHeaderValue; |
202 | // m_currentHeaderValue.append(data.mid(0,dataPos)) to preserve the (non |
203 | // significant) newlines in header value |
204 | advance(); |
205 | break; |
206 | default: |
207 | errorMessage( |
208 | error: QtWarningMsg, |
209 | msg: u"Unexpected carriage return without following newline in header %1"_s .arg( |
210 | a: QString::fromUtf8(ba: m_currentHeaderField))); |
211 | m_state = State::InHeaderField; |
212 | advance(); |
213 | callHasHeader(); |
214 | break; |
215 | } |
216 | } break; |
217 | case State::AfterCrLf: { |
218 | char c = data.at(i: dataPos); |
219 | switch (c) { |
220 | case lf: |
221 | errorMessage(error: QtWarningMsg, |
222 | msg: u"Newline without carriage return in header %1"_s .arg( |
223 | a: QString::fromUtf8(ba: m_currentHeaderField))); |
224 | // avoid seeing it as end of headers? |
225 | m_state = State::AfterCrLfCr; |
226 | break; |
227 | case cr: |
228 | m_state = State::AfterCrLfCr; |
229 | ++dataPos; |
230 | break; |
231 | case space: |
232 | case tab: |
233 | m_state = State::InHeaderValue; |
234 | // m_currentHeaderValue.append(data.mid(0,dataPos)) to preserve the (non |
235 | // significant) newlines in header value |
236 | advance(); |
237 | break; |
238 | default: |
239 | m_state = State::InHeaderField; |
240 | advance(); |
241 | callHasHeader(); |
242 | break; |
243 | } |
244 | } break; |
245 | case State::AfterCrLfCr: { |
246 | char c = data.at(i: dataPos); |
247 | switch (c) { |
248 | case lf: |
249 | m_currentPacket.clear(); |
250 | m_currentPacketSize = 0; |
251 | ++dataPos; |
252 | advance(); |
253 | m_state = State::InBody; |
254 | callHasHeader(); |
255 | break; |
256 | default: |
257 | errorMessage( |
258 | error: QtWarningMsg, |
259 | msg: u"crlfcr without final lf encountred, ignoring it (non clear terminator)"_s ); |
260 | m_state = State::InHeaderField; |
261 | advance(); |
262 | callHasHeader(); |
263 | break; |
264 | } |
265 | } break; |
266 | case State::InBody: { |
267 | if (m_contentSize == -1) { |
268 | errorMessage(error: QtWarningMsg, msg: u"missing valid Content-Length header"_s ); |
269 | m_state = State::PreHeader; |
270 | continue; |
271 | } |
272 | qint64 missing = m_contentSize - m_currentPacketSize; |
273 | if (missing > 0) { |
274 | dataPos = qMin(a: qsizetype(missing), b: data.size()); |
275 | m_currentPacketSize += dataPos; |
276 | if (m_mode == BUFFERED) |
277 | m_currentPacket.append(a: data.mid(index: 0, len: dataPos)); |
278 | advance(); |
279 | } |
280 | if (m_currentPacketSize >= m_contentSize) { |
281 | m_state = State::PreHeader; |
282 | callHasBody(); |
283 | } |
284 | } break; |
285 | } |
286 | } |
287 | if (m_state == State::InBody && (m_contentSize == -1 || m_contentSize == 0)) { |
288 | // nothing to read, but emit empty body... |
289 | m_state = State::PreHeader; |
290 | if (m_contentSize == -1) |
291 | errorMessage(error: QtWarningMsg, msg: u"missing valid Content-Length header"_s ); |
292 | callHasBody(); |
293 | } |
294 | } |
295 | |
296 | void QHttpMessageStreamParser::() |
297 | { |
298 | static const QByteArray s_contentLengthFieldName = "Content-Length" ; |
299 | if (m_currentHeaderField.isEmpty() && m_currentHeaderValue.isEmpty()) |
300 | return; |
301 | QByteArray field = m_currentHeaderField; |
302 | QByteArray value = m_currentHeaderValue; |
303 | m_currentHeaderField.clear(); |
304 | m_currentHeaderValue.clear(); |
305 | if (s_contentLengthFieldName.compare(a: field, cs: Qt::CaseInsensitive) == 0) { |
306 | bool ok = false; |
307 | const int size = value.toInt(ok: &ok); |
308 | if (ok) { |
309 | m_contentSize = size; |
310 | } else { |
311 | errorMessage( |
312 | error: QtWarningMsg, |
313 | msg: u"Invalid %1: %2"_s .arg(args: QString::fromUtf8(ba: field), args: QString::fromUtf8(ba: value))); |
314 | } |
315 | } |
316 | if (m_headerHandler) |
317 | m_headerHandler(field, value); |
318 | } |
319 | |
320 | void QHttpMessageStreamParser::callHasBody() |
321 | { |
322 | // uses an empty QByteArray in callback for dry run |
323 | if (m_mode == UNBUFFERED) { |
324 | if (m_bodyHandler) |
325 | m_bodyHandler(QByteArray()); |
326 | return; |
327 | } |
328 | |
329 | QByteArray body = m_currentPacket; |
330 | m_currentPacket.clear(); |
331 | m_currentPacketSize = 0; |
332 | m_contentSize = -1; |
333 | |
334 | if (m_bodyHandler) |
335 | m_bodyHandler(body); |
336 | } |
337 | |
338 | void QHttpMessageStreamParser::errorMessage(QtMsgType error, QString msg) |
339 | { |
340 | if (m_errorHandler) |
341 | m_errorHandler(error, msg); |
342 | } |
343 | |
344 | QT_END_NAMESPACE |
345 | |