| 1 | // Copyright (C) 2021 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 3 | |
| 4 | #include "qhttpmessagestreamparser_p.h" |
| 5 | |
| 6 | #include <QtCore/QtGlobal> |
| 7 | |
| 8 | QT_BEGIN_NAMESPACE |
| 9 | |
| 10 | using namespace Qt::StringLiterals; |
| 11 | |
| 12 | /*! |
| 13 | * \class QHttpMessageStreamParser |
| 14 | * \brief Decodes a stream of headers and payloads encoded according to rfc2616 (HTTP/1.1) |
| 15 | * |
| 16 | * It complains about invalid sequences, but is quite permissive in accepting them |
| 17 | */ |
| 18 | |
| 19 | QHttpMessageStreamParser::QHttpMessageStreamParser( |
| 20 | std::function<void(const QByteArray &, const QByteArray &)> headerHandler, |
| 21 | std::function<void(const QByteArray &body)> bodyHandler, |
| 22 | std::function<void(QtMsgType error, QString msg)> errorHandler, Mode mode) |
| 23 | : m_headerHandler(std::move(headerHandler)), |
| 24 | m_bodyHandler(std::move(bodyHandler)), |
| 25 | m_errorHandler(std::move(errorHandler)), |
| 26 | m_mode(mode) |
| 27 | { |
| 28 | } |
| 29 | |
| 30 | bool QHttpMessageStreamParser::receiveEof() |
| 31 | { |
| 32 | if (m_state != State::PreHeader) { |
| 33 | errorMessage(error: QtWarningMsg, msg: u"Partial message at end of file"_s ); |
| 34 | return false; |
| 35 | } |
| 36 | return true; |
| 37 | } |
| 38 | |
| 39 | void QHttpMessageStreamParser::receiveData(QByteArray data) |
| 40 | { |
| 41 | const char lf = '\n'; |
| 42 | const char cr = '\r'; |
| 43 | const char colon = ':'; |
| 44 | const char space = ' '; |
| 45 | const char tab = '\t'; |
| 46 | qsizetype dataPos = 0; |
| 47 | bool didAdvance = false; |
| 48 | auto advance = [&]() { |
| 49 | data = data.mid(index: dataPos); |
| 50 | dataPos = 0; |
| 51 | didAdvance = true; |
| 52 | }; |
| 53 | while (dataPos < data.size()) { |
| 54 | switch (m_state) { |
| 55 | case State::PreHeader: |
| 56 | switch (data.at(i: dataPos)) { |
| 57 | case lf: |
| 58 | errorMessage(error: QtWarningMsg, |
| 59 | QStringLiteral("Unexpected newline without preceding carriage " |
| 60 | "return at start of headers" ) |
| 61 | .arg(a: QString::fromUtf8(ba: m_currentHeaderField))); |
| 62 | m_state = State::AfterCrLf; |
| 63 | ++dataPos; |
| 64 | continue; |
| 65 | case cr: |
| 66 | m_state = State::AfterCr; |
| 67 | ++dataPos; |
| 68 | continue; |
| 69 | case tab: |
| 70 | case space: |
| 71 | errorMessage(error: QtWarningMsg, |
| 72 | msg: u"Unexpected space at start of headers, skipping"_s .arg( |
| 73 | a: QString::fromUtf8(ba: m_currentHeaderField))); |
| 74 | while (dataPos < data.size()) { |
| 75 | char c = data.at(i: ++dataPos); |
| 76 | if (c != space && c != tab) { |
| 77 | advance(); |
| 78 | m_state = State::InHeaderField; |
| 79 | break; |
| 80 | } |
| 81 | } |
| 82 | break; |
| 83 | default: |
| 84 | m_state = State::InHeaderField; |
| 85 | break; |
| 86 | } |
| 87 | Q_ASSERT(m_currentHeaderField.isEmpty() && m_currentHeaderValue.isEmpty()); |
| 88 | break; |
| 89 | case State::InHeaderField: { |
| 90 | didAdvance = false; |
| 91 | while (!didAdvance) { |
| 92 | char c = data.at(i: dataPos); |
| 93 | switch (c) { |
| 94 | case lf: |
| 95 | m_currentHeaderField.append(a: data.mid(index: 0, len: dataPos)); |
| 96 | errorMessage( |
| 97 | error: QtWarningMsg, |
| 98 | msg: u"Unexpected carriage return without newline in unterminated header %1"_s |
| 99 | .arg(a: QString::fromUtf8(ba: m_currentHeaderField))); |
| 100 | |
| 101 | m_state = State::AfterCrLf; |
| 102 | advance(); |
| 103 | ++dataPos; |
| 104 | break; |
| 105 | case cr: |
| 106 | m_state = State::AfterCr; |
| 107 | m_currentHeaderField.append(a: data.mid(index: 0, len: dataPos)); |
| 108 | errorMessage(error: QtWarningMsg, |
| 109 | msg: u"Newline before colon in header %1"_s .arg( |
| 110 | a: QString::fromUtf8(ba: m_currentHeaderField))); |
| 111 | advance(); |
| 112 | ++dataPos; |
| 113 | break; |
| 114 | case colon: |
| 115 | m_currentHeaderField.append(a: data.mid(index: 0, len: dataPos)); |
| 116 | m_state = State::HeaderValueSpace; |
| 117 | ++dataPos; |
| 118 | advance(); |
| 119 | break; |
| 120 | case space: |
| 121 | case tab: |
| 122 | errorMessage(error: QtWarningMsg, msg: u"Space in header field name"_s ); |
| 123 | Q_FALLTHROUGH(); |
| 124 | default: |
| 125 | if (++dataPos == data.size()) { |
| 126 | m_currentHeaderField.append(a: data); |
| 127 | return; |
| 128 | } |
| 129 | break; |
| 130 | } |
| 131 | } |
| 132 | } break; |
| 133 | case State::HeaderValueSpace: |
| 134 | while (dataPos < data.size()) { |
| 135 | char c = data.at(i: dataPos); |
| 136 | if (c != space && c != tab) { |
| 137 | advance(); |
| 138 | m_state = State::InHeaderValue; |
| 139 | m_currentHeaderValue.clear(); |
| 140 | break; |
| 141 | } |
| 142 | ++dataPos; |
| 143 | } |
| 144 | break; |
| 145 | case State::InHeaderValue: { |
| 146 | didAdvance = false; |
| 147 | while (!didAdvance) { |
| 148 | char c = data.at(i: dataPos); |
| 149 | switch (c) { |
| 150 | case lf: |
| 151 | m_currentHeaderValue.append(a: data.mid(index: 0, len: dataPos)); |
| 152 | errorMessage(error: QtWarningMsg, |
| 153 | QStringLiteral("Unexpected newline without preceding " |
| 154 | "carriage return in header %1" ) |
| 155 | .arg(a: QString::fromUtf8(ba: m_currentHeaderField))); |
| 156 | |
| 157 | m_state = State::AfterCrLf; |
| 158 | advance(); |
| 159 | ++dataPos; |
| 160 | break; |
| 161 | case cr: |
| 162 | m_currentHeaderValue.append(a: data.mid(index: 0, len: dataPos)); |
| 163 | m_state = State::AfterCr; |
| 164 | advance(); |
| 165 | ++dataPos; |
| 166 | break; |
| 167 | default: |
| 168 | if (++dataPos == data.size()) { |
| 169 | m_currentHeaderValue.append(a: data); |
| 170 | return; |
| 171 | } |
| 172 | break; |
| 173 | } |
| 174 | } |
| 175 | } break; |
| 176 | case State::AfterCr: { |
| 177 | char c = data.at(i: dataPos); |
| 178 | switch (c) { |
| 179 | case lf: |
| 180 | m_state = State::AfterCrLf; |
| 181 | ++dataPos; |
| 182 | break; |
| 183 | case cr: |
| 184 | errorMessage(error: QtWarningMsg, |
| 185 | QStringLiteral("Double carriage return encountred, interpreting it as " |
| 186 | "header end after header %1" ) |
| 187 | .arg(a: QString::fromUtf8(ba: m_currentHeaderField))); |
| 188 | m_currentPacket.clear(); |
| 189 | m_currentPacketSize = 0; |
| 190 | ++dataPos; |
| 191 | advance(); |
| 192 | m_state = State::InBody; |
| 193 | callHasHeader(); |
| 194 | break; |
| 195 | case space: |
| 196 | case tab: |
| 197 | errorMessage( |
| 198 | error: QtWarningMsg, |
| 199 | msg: u"Unexpected carriage return without following newline in header %1"_s .arg( |
| 200 | a: QString::fromUtf8(ba: m_currentHeaderField))); |
| 201 | m_state = State::InHeaderValue; |
| 202 | // m_currentHeaderValue.append(data.mid(0,dataPos)) to preserve the (non |
| 203 | // significant) newlines in header value |
| 204 | advance(); |
| 205 | break; |
| 206 | default: |
| 207 | errorMessage( |
| 208 | error: QtWarningMsg, |
| 209 | msg: u"Unexpected carriage return without following newline in header %1"_s .arg( |
| 210 | a: QString::fromUtf8(ba: m_currentHeaderField))); |
| 211 | m_state = State::InHeaderField; |
| 212 | advance(); |
| 213 | callHasHeader(); |
| 214 | break; |
| 215 | } |
| 216 | } break; |
| 217 | case State::AfterCrLf: { |
| 218 | char c = data.at(i: dataPos); |
| 219 | switch (c) { |
| 220 | case lf: |
| 221 | errorMessage(error: QtWarningMsg, |
| 222 | msg: u"Newline without carriage return in header %1"_s .arg( |
| 223 | a: QString::fromUtf8(ba: m_currentHeaderField))); |
| 224 | // avoid seeing it as end of headers? |
| 225 | m_state = State::AfterCrLfCr; |
| 226 | break; |
| 227 | case cr: |
| 228 | m_state = State::AfterCrLfCr; |
| 229 | ++dataPos; |
| 230 | break; |
| 231 | case space: |
| 232 | case tab: |
| 233 | m_state = State::InHeaderValue; |
| 234 | // m_currentHeaderValue.append(data.mid(0,dataPos)) to preserve the (non |
| 235 | // significant) newlines in header value |
| 236 | advance(); |
| 237 | break; |
| 238 | default: |
| 239 | m_state = State::InHeaderField; |
| 240 | advance(); |
| 241 | callHasHeader(); |
| 242 | break; |
| 243 | } |
| 244 | } break; |
| 245 | case State::AfterCrLfCr: { |
| 246 | char c = data.at(i: dataPos); |
| 247 | switch (c) { |
| 248 | case lf: |
| 249 | m_currentPacket.clear(); |
| 250 | m_currentPacketSize = 0; |
| 251 | ++dataPos; |
| 252 | advance(); |
| 253 | m_state = State::InBody; |
| 254 | callHasHeader(); |
| 255 | break; |
| 256 | default: |
| 257 | errorMessage( |
| 258 | error: QtWarningMsg, |
| 259 | msg: u"crlfcr without final lf encountred, ignoring it (non clear terminator)"_s ); |
| 260 | m_state = State::InHeaderField; |
| 261 | advance(); |
| 262 | callHasHeader(); |
| 263 | break; |
| 264 | } |
| 265 | } break; |
| 266 | case State::InBody: { |
| 267 | if (m_contentSize == -1) { |
| 268 | errorMessage(error: QtWarningMsg, msg: u"missing valid Content-Length header"_s ); |
| 269 | m_state = State::PreHeader; |
| 270 | continue; |
| 271 | } |
| 272 | qint64 missing = m_contentSize - m_currentPacketSize; |
| 273 | if (missing > 0) { |
| 274 | dataPos = qMin(a: qsizetype(missing), b: data.size()); |
| 275 | m_currentPacketSize += dataPos; |
| 276 | if (m_mode == BUFFERED) |
| 277 | m_currentPacket.append(a: data.mid(index: 0, len: dataPos)); |
| 278 | advance(); |
| 279 | } |
| 280 | if (m_currentPacketSize >= m_contentSize) { |
| 281 | m_state = State::PreHeader; |
| 282 | callHasBody(); |
| 283 | } |
| 284 | } break; |
| 285 | } |
| 286 | } |
| 287 | if (m_state == State::InBody && (m_contentSize == -1 || m_contentSize == 0)) { |
| 288 | // nothing to read, but emit empty body... |
| 289 | m_state = State::PreHeader; |
| 290 | if (m_contentSize == -1) |
| 291 | errorMessage(error: QtWarningMsg, msg: u"missing valid Content-Length header"_s ); |
| 292 | callHasBody(); |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | void QHttpMessageStreamParser::() |
| 297 | { |
| 298 | static const QByteArray s_contentLengthFieldName = "Content-Length" ; |
| 299 | if (m_currentHeaderField.isEmpty() && m_currentHeaderValue.isEmpty()) |
| 300 | return; |
| 301 | QByteArray field = m_currentHeaderField; |
| 302 | QByteArray value = m_currentHeaderValue; |
| 303 | m_currentHeaderField.clear(); |
| 304 | m_currentHeaderValue.clear(); |
| 305 | if (s_contentLengthFieldName.compare(a: field, cs: Qt::CaseInsensitive) == 0) { |
| 306 | bool ok = false; |
| 307 | const int size = value.toInt(ok: &ok); |
| 308 | if (ok) { |
| 309 | m_contentSize = size; |
| 310 | } else { |
| 311 | errorMessage( |
| 312 | error: QtWarningMsg, |
| 313 | msg: u"Invalid %1: %2"_s .arg(args: QString::fromUtf8(ba: field), args: QString::fromUtf8(ba: value))); |
| 314 | } |
| 315 | } |
| 316 | if (m_headerHandler) |
| 317 | m_headerHandler(field, value); |
| 318 | } |
| 319 | |
| 320 | void QHttpMessageStreamParser::callHasBody() |
| 321 | { |
| 322 | // uses an empty QByteArray in callback for dry run |
| 323 | if (m_mode == UNBUFFERED) { |
| 324 | if (m_bodyHandler) |
| 325 | m_bodyHandler(QByteArray()); |
| 326 | return; |
| 327 | } |
| 328 | |
| 329 | QByteArray body = m_currentPacket; |
| 330 | m_currentPacket.clear(); |
| 331 | m_currentPacketSize = 0; |
| 332 | m_contentSize = -1; |
| 333 | |
| 334 | if (m_bodyHandler) |
| 335 | m_bodyHandler(body); |
| 336 | } |
| 337 | |
| 338 | void QHttpMessageStreamParser::errorMessage(QtMsgType error, QString msg) |
| 339 | { |
| 340 | if (m_errorHandler) |
| 341 | m_errorHandler(error, msg); |
| 342 | } |
| 343 | |
| 344 | QT_END_NAMESPACE |
| 345 | |