| 1 | // Copyright (C) 2017 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 3 | |
| 4 | #include "qhsts_p.h" |
| 5 | |
| 6 | #include "qhttpheaders.h" |
| 7 | |
| 8 | #include "QtCore/private/qipaddress_p.h" |
| 9 | #include "QtCore/qlist.h" |
| 10 | |
| 11 | #if QT_CONFIG(settings) |
| 12 | #include "qhstsstore_p.h" |
| 13 | #endif // QT_CONFIG(settings) |
| 14 | |
| 15 | QT_BEGIN_NAMESPACE |
| 16 | |
| 17 | static bool is_valid_domain_name(const QString &host) |
| 18 | { |
| 19 | if (!host.size()) |
| 20 | return false; |
| 21 | |
| 22 | // RFC6797 8.1.1 |
| 23 | // If the substring matching the host production from the Request-URI |
| 24 | // (of the message to which the host responded) syntactically matches |
| 25 | //the IP-literal or IPv4address productions from Section 3.2.2 of |
| 26 | //[RFC3986], then the UA MUST NOT note this host as a Known HSTS Host. |
| 27 | using namespace QIPAddressUtils; |
| 28 | |
| 29 | IPv4Address ipv4Addr = {}; |
| 30 | if (parseIp4(address&: ipv4Addr, begin: host.constBegin(), end: host.constEnd())) |
| 31 | return false; |
| 32 | |
| 33 | IPv6Address ipv6Addr = {}; |
| 34 | // Unlike parseIp4, parseIp6 returns nullptr if it managed to parse IPv6 |
| 35 | // address successfully. |
| 36 | if (!parseIp6(address&: ipv6Addr, begin: host.constBegin(), end: host.constEnd())) |
| 37 | return false; |
| 38 | |
| 39 | // TODO: for now we do not test IPvFuture address, it must be addressed |
| 40 | // by introducing parseIpFuture (actually, there is an implementation |
| 41 | // in QUrl that can be adopted/modified/moved to QIPAddressUtils). |
| 42 | return true; |
| 43 | } |
| 44 | |
| 45 | void QHstsCache::(const QHttpHeaders &, |
| 46 | const QUrl &url) |
| 47 | { |
| 48 | if (!url.isValid()) |
| 49 | return; |
| 50 | |
| 51 | QHstsHeaderParser parser; |
| 52 | if (parser.parse(headers)) { |
| 53 | updateKnownHost(hostName: url.host(), expires: parser.expirationDate(), includeSubDomains: parser.includeSubDomains()); |
| 54 | #if QT_CONFIG(settings) |
| 55 | if (hstsStore) |
| 56 | hstsStore->synchronize(); |
| 57 | #endif // QT_CONFIG(settings) |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | void QHstsCache::updateFromPolicies(const QList<QHstsPolicy> &policies) |
| 62 | { |
| 63 | for (const auto &policy : policies) |
| 64 | updateKnownHost(hostName: policy.host(), expires: policy.expiry(), includeSubDomains: policy.includesSubDomains()); |
| 65 | |
| 66 | #if QT_CONFIG(settings) |
| 67 | if (hstsStore && policies.size()) { |
| 68 | // These policies are coming either from store or from QNAM's setter |
| 69 | // function. As a result we can notice expired or new policies, time |
| 70 | // to sync ... |
| 71 | hstsStore->synchronize(); |
| 72 | } |
| 73 | #endif // QT_CONFIG(settings) |
| 74 | } |
| 75 | |
| 76 | void QHstsCache::updateKnownHost(const QUrl &url, const QDateTime &expires, |
| 77 | bool includeSubDomains) |
| 78 | { |
| 79 | if (!url.isValid()) |
| 80 | return; |
| 81 | |
| 82 | updateKnownHost(hostName: url.host(), expires, includeSubDomains); |
| 83 | #if QT_CONFIG(settings) |
| 84 | if (hstsStore) |
| 85 | hstsStore->synchronize(); |
| 86 | #endif // QT_CONFIG(settings) |
| 87 | } |
| 88 | |
| 89 | void QHstsCache::updateKnownHost(const QString &host, const QDateTime &expires, |
| 90 | bool includeSubDomains) |
| 91 | { |
| 92 | if (!is_valid_domain_name(host)) |
| 93 | return; |
| 94 | |
| 95 | // HSTS is a per-host policy, regardless of protocol, port or any of the other |
| 96 | // details in an URL; so we only want the host part. QUrl::host handles |
| 97 | // IDNA 2003 (RFC3490) for us, as required by HSTS (RFC6797, section 10). |
| 98 | const HostName hostName(host); |
| 99 | const auto pos = knownHosts.find(x: hostName); |
| 100 | QHstsPolicy::PolicyFlags flags; |
| 101 | if (includeSubDomains) |
| 102 | flags = QHstsPolicy::IncludeSubDomains; |
| 103 | |
| 104 | const QHstsPolicy newPolicy(expires, flags, hostName.name); |
| 105 | if (pos == knownHosts.end()) { |
| 106 | // A new, previously unknown host. |
| 107 | if (newPolicy.isExpired()) { |
| 108 | // Nothing to do at all - we did not know this host previously, |
| 109 | // we do not have to - since its policy expired. |
| 110 | return; |
| 111 | } |
| 112 | |
| 113 | knownHosts.insert(x: {hostName, newPolicy}); |
| 114 | #if QT_CONFIG(settings) |
| 115 | if (hstsStore) |
| 116 | hstsStore->addToObserved(policy: newPolicy); |
| 117 | #endif // QT_CONFIG(settings) |
| 118 | return; |
| 119 | } |
| 120 | |
| 121 | if (newPolicy.isExpired()) |
| 122 | knownHosts.erase(position: pos); |
| 123 | else if (pos->second != newPolicy) |
| 124 | pos->second = newPolicy; |
| 125 | else |
| 126 | return; |
| 127 | |
| 128 | #if QT_CONFIG(settings) |
| 129 | if (hstsStore) |
| 130 | hstsStore->addToObserved(policy: newPolicy); |
| 131 | #endif // QT_CONFIG(settings) |
| 132 | } |
| 133 | |
| 134 | bool QHstsCache::isKnownHost(const QUrl &url) const |
| 135 | { |
| 136 | if (!url.isValid() || !is_valid_domain_name(host: url.host())) |
| 137 | return false; |
| 138 | |
| 139 | /* |
| 140 | RFC6797, 8.2. Known HSTS Host Domain Name Matching |
| 141 | |
| 142 | * Superdomain Match |
| 143 | If a label-for-label match between an entire Known HSTS Host's |
| 144 | domain name and a right-hand portion of the given domain name |
| 145 | is found, then this Known HSTS Host's domain name is a |
| 146 | superdomain match for the given domain name. There could be |
| 147 | multiple superdomain matches for a given domain name. |
| 148 | * Congruent Match |
| 149 | If a label-for-label match between a Known HSTS Host's domain |
| 150 | name and the given domain name is found -- i.e., there are no |
| 151 | further labels to compare -- then the given domain name |
| 152 | congruently matches this Known HSTS Host. |
| 153 | |
| 154 | We start from the congruent match, and then chop labels and dots and |
| 155 | proceed with superdomain match. While RFC6797 recommends to start from |
| 156 | superdomain, the result is the same - some valid policy will make a host |
| 157 | known. |
| 158 | */ |
| 159 | |
| 160 | bool superDomainMatch = false; |
| 161 | const QString hostNameAsString(url.host()); |
| 162 | HostName nameToTest(QStringView{hostNameAsString}); |
| 163 | while (nameToTest.fragment.size()) { |
| 164 | auto const pos = knownHosts.find(x: nameToTest); |
| 165 | if (pos != knownHosts.end()) { |
| 166 | if (pos->second.isExpired()) { |
| 167 | knownHosts.erase(position: pos); |
| 168 | #if QT_CONFIG(settings) |
| 169 | if (hstsStore) { |
| 170 | // Inform our store that this policy has expired. |
| 171 | hstsStore->addToObserved(policy: pos->second); |
| 172 | } |
| 173 | #endif // QT_CONFIG(settings) |
| 174 | } else if (!superDomainMatch || pos->second.includesSubDomains()) { |
| 175 | return true; |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | const qsizetype dot = nameToTest.fragment.indexOf(c: u'.'); |
| 180 | if (dot == -1) |
| 181 | break; |
| 182 | |
| 183 | nameToTest.fragment = nameToTest.fragment.mid(pos: dot + 1); |
| 184 | superDomainMatch = true; |
| 185 | } |
| 186 | |
| 187 | return false; |
| 188 | } |
| 189 | |
| 190 | void QHstsCache::clear() |
| 191 | { |
| 192 | knownHosts.clear(); |
| 193 | } |
| 194 | |
| 195 | QList<QHstsPolicy> QHstsCache::policies() const |
| 196 | { |
| 197 | QList<QHstsPolicy> values; |
| 198 | values.reserve(asize: int(knownHosts.size())); |
| 199 | for (const auto &host : knownHosts) |
| 200 | values << host.second; |
| 201 | return values; |
| 202 | } |
| 203 | |
| 204 | #if QT_CONFIG(settings) |
| 205 | void QHstsCache::setStore(QHstsStore *store) |
| 206 | { |
| 207 | // Caller retains ownership of store, which must outlive this cache. |
| 208 | if (store != hstsStore) { |
| 209 | hstsStore = store; |
| 210 | |
| 211 | if (!hstsStore) |
| 212 | return; |
| 213 | |
| 214 | // First we augment our store with the policies we already know about |
| 215 | // (and thus the cached policy takes priority over whatever policy we |
| 216 | // had in the store for the same host, if any). |
| 217 | if (knownHosts.size()) { |
| 218 | const QList<QHstsPolicy> observed(policies()); |
| 219 | for (const auto &policy : observed) |
| 220 | hstsStore->addToObserved(policy); |
| 221 | hstsStore->synchronize(); |
| 222 | } |
| 223 | |
| 224 | // Now we update the cache with anything we have not observed yet, but |
| 225 | // the store knows about (well, it can happen we synchronize again as a |
| 226 | // result if some policies managed to expire or if we add a new one |
| 227 | // from the store to cache): |
| 228 | const QList<QHstsPolicy> restored(store->readPolicies()); |
| 229 | updateFromPolicies(policies: restored); |
| 230 | } |
| 231 | } |
| 232 | #endif // QT_CONFIG(settings) |
| 233 | |
| 234 | // The parser is quite simple: 'nextToken' knowns exactly what kind of tokens |
| 235 | // are valid and it will return false if something else was found; then |
| 236 | // we immediately stop parsing. 'parseDirective' knows how these tokens can |
| 237 | // be combined into a valid directive and if some weird combination of |
| 238 | // valid tokens is found - we immediately stop. |
| 239 | // And finally we call parseDirective again and again until some error found or |
| 240 | // we have no more bytes in the header. |
| 241 | |
| 242 | // The following isXXX functions are based on RFC2616, 2.2 Basic Rules. |
| 243 | |
| 244 | static bool isCHAR(int c) |
| 245 | { |
| 246 | // CHAR = <any US-ASCII character (octets 0 - 127)> |
| 247 | return c >= 0 && c <= 127; |
| 248 | } |
| 249 | |
| 250 | static bool isCTL(int c) |
| 251 | { |
| 252 | // CTL = <any US-ASCII control character |
| 253 | // (octets 0 - 31) and DEL (127)> |
| 254 | return (c >= 0 && c <= 31) || c == 127; |
| 255 | } |
| 256 | |
| 257 | |
| 258 | static bool isLWS(int c) |
| 259 | { |
| 260 | // LWS = [CRLF] 1*( SP | HT ) |
| 261 | // |
| 262 | // CRLF = CR LF |
| 263 | // CR = <US-ASCII CR, carriage return (13)> |
| 264 | // LF = <US-ASCII LF, linefeed (10)> |
| 265 | // SP = <US-ASCII SP, space (32)> |
| 266 | // HT = <US-ASCII HT, horizontal-tab (9)> |
| 267 | // |
| 268 | // CRLF is handled by the time we parse a header (they were replaced with |
| 269 | // spaces). We only have to deal with remaining SP|HT |
| 270 | return c == ' ' || c == '\t'; |
| 271 | } |
| 272 | |
| 273 | static bool isTEXT(char c) |
| 274 | { |
| 275 | // TEXT = <any OCTET except CTLs, |
| 276 | // but including LWS> |
| 277 | return !isCTL(c) || isLWS(c); |
| 278 | } |
| 279 | |
| 280 | static bool isSeparator(char c) |
| 281 | { |
| 282 | // separators = "(" | ")" | "<" | ">" | "@" |
| 283 | // | "," | ";" | ":" | "\" | <"> |
| 284 | // | "/" | "[" | "]" | "?" | "=" |
| 285 | // | "{" | "}" | SP | HT |
| 286 | static const char separators[] = "()<>@,;:\\\"/[]?={}" ; |
| 287 | static const char *end = separators + sizeof separators - 1; |
| 288 | return isLWS(c) || std::find(first: separators, last: end, val: c) != end; |
| 289 | } |
| 290 | |
| 291 | static QByteArrayView unescapeMaxAge(QByteArrayView value) |
| 292 | { |
| 293 | if (value.size() < 2 || value[0] != '"') |
| 294 | return value; |
| 295 | |
| 296 | Q_ASSERT(value[value.size() - 1] == '"'); |
| 297 | return value.mid(pos: 1, n: value.size() - 2); |
| 298 | } |
| 299 | |
| 300 | static bool isTOKEN(char c) |
| 301 | { |
| 302 | // token = 1*<any CHAR except CTLs or separators> |
| 303 | return isCHAR(c) && !isCTL(c) && !isSeparator(c); |
| 304 | } |
| 305 | |
| 306 | /* |
| 307 | |
| 308 | RFC6797, 6.1 Strict-Transport-Security HTTP Response Header Field. |
| 309 | Syntax: |
| 310 | |
| 311 | Strict-Tranposrt-Security = "Strict-Transport-Security" ":" |
| 312 | [ directive ] *( ";" [ directive ] ) |
| 313 | |
| 314 | directive = directive-name [ "=" directive-value ] |
| 315 | directive-name = token |
| 316 | directive-value = token | quoted-string |
| 317 | |
| 318 | RFC 2616, 2.2 Basic Rules. |
| 319 | |
| 320 | token = 1*<any CHAR except CTLs or separators> |
| 321 | quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) |
| 322 | |
| 323 | |
| 324 | qdtext = <any TEXT except <">> |
| 325 | quoted-pair = "\" CHAR |
| 326 | |
| 327 | */ |
| 328 | |
| 329 | bool QHstsHeaderParser::(const QHttpHeaders &) |
| 330 | { |
| 331 | for (const auto &value : headers.values( |
| 332 | name: QHttpHeaders::WellKnownHeader::StrictTransportSecurity)) { |
| 333 | header = value; |
| 334 | // RFC6797, 8.1: |
| 335 | // |
| 336 | // The UA MUST ignore any STS header fields not conforming to the |
| 337 | // grammar specified in Section 6.1 ("Strict-Transport-Security HTTP |
| 338 | // Response Header Field"). |
| 339 | // |
| 340 | // If a UA receives more than one STS header field in an HTTP |
| 341 | // response message over secure transport, then the UA MUST process |
| 342 | // only the first such header field. |
| 343 | // |
| 344 | // We read this as: ignore all invalid headers and take the first valid: |
| 345 | if (parseSTSHeader() && maxAgeFound) { |
| 346 | expiry = QDateTime::currentDateTimeUtc().addSecs(secs: maxAge); |
| 347 | return true; |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | // In case it was set by a syntactically correct header (but without |
| 352 | // REQUIRED max-age directive): |
| 353 | subDomainsFound = false; |
| 354 | |
| 355 | return false; |
| 356 | } |
| 357 | |
| 358 | bool QHstsHeaderParser::() |
| 359 | { |
| 360 | expiry = QDateTime(); |
| 361 | maxAgeFound = false; |
| 362 | subDomainsFound = false; |
| 363 | maxAge = 0; |
| 364 | tokenPos = 0; |
| 365 | token.clear(); |
| 366 | |
| 367 | while (tokenPos < header.size()) { |
| 368 | if (!parseDirective()) |
| 369 | return false; |
| 370 | |
| 371 | if (token.size() && token != ";" ) { |
| 372 | // After a directive we can only have a ";" or no more tokens. |
| 373 | // Invalid syntax. |
| 374 | return false; |
| 375 | } |
| 376 | } |
| 377 | |
| 378 | return true; |
| 379 | } |
| 380 | |
| 381 | bool QHstsHeaderParser::() |
| 382 | { |
| 383 | // RFC 6797, 6.1: |
| 384 | // |
| 385 | // directive = directive-name [ "=" directive-value ] |
| 386 | // directive-name = token |
| 387 | // directive-value = token | quoted-string |
| 388 | |
| 389 | |
| 390 | // RFC 2616, 2.2: |
| 391 | // |
| 392 | // token = 1*<any CHAR except CTLs or separators> |
| 393 | |
| 394 | if (!nextToken()) |
| 395 | return false; |
| 396 | |
| 397 | if (!token.size()) // No more data, but no error. |
| 398 | return true; |
| 399 | |
| 400 | if (token == ";" ) // That's a weird grammar, but that's what it is. |
| 401 | return true; |
| 402 | |
| 403 | if (!isTOKEN(c: token.at(i: 0))) // Not a valid directive-name. |
| 404 | return false; |
| 405 | |
| 406 | const QByteArray directiveName = token; |
| 407 | // 2. Try to read "=" or ";". |
| 408 | if (!nextToken()) |
| 409 | return false; |
| 410 | |
| 411 | QByteArray directiveValue; |
| 412 | if (token == ";" ) // No directive-value |
| 413 | return processDirective(name: directiveName, value: directiveValue); |
| 414 | |
| 415 | if (token == "=" ) { |
| 416 | // We expect a directive-value now: |
| 417 | if (!nextToken() || !token.size()) |
| 418 | return false; |
| 419 | directiveValue = token; |
| 420 | } else if (token.size()) { |
| 421 | // Invalid syntax: |
| 422 | return false; |
| 423 | } |
| 424 | |
| 425 | if (!processDirective(name: directiveName, value: directiveValue)) |
| 426 | return false; |
| 427 | |
| 428 | // Read either ";", or 'end of header', or some invalid token. |
| 429 | return nextToken(); |
| 430 | } |
| 431 | |
| 432 | bool QHstsHeaderParser::(const QByteArray &name, const QByteArray &value) |
| 433 | { |
| 434 | Q_ASSERT(name.size()); |
| 435 | // RFC6797 6.1/3 Directive names are case-insensitive |
| 436 | if (name.compare(a: "max-age" , cs: Qt::CaseInsensitive) == 0) { |
| 437 | // RFC 6797, 6.1.1 |
| 438 | // The syntax of the max-age directive's REQUIRED value (after |
| 439 | // quoted-string unescaping, if necessary) is defined as: |
| 440 | // |
| 441 | // max-age-value = delta-seconds |
| 442 | if (maxAgeFound) { |
| 443 | // RFC 6797, 6.1/2: |
| 444 | // All directives MUST appear only once in an STS header field. |
| 445 | return false; |
| 446 | } |
| 447 | |
| 448 | const QByteArrayView unescapedValue = unescapeMaxAge(value); |
| 449 | if (!unescapedValue.size()) |
| 450 | return false; |
| 451 | |
| 452 | bool ok = false; |
| 453 | const qint64 age = unescapedValue.toLongLong(ok: &ok); |
| 454 | if (!ok || age < 0) |
| 455 | return false; |
| 456 | |
| 457 | maxAge = age; |
| 458 | maxAgeFound = true; |
| 459 | } else if (name.compare(a: "includesubdomains" , cs: Qt::CaseInsensitive) == 0) { |
| 460 | // RFC 6797, 6.1.2. The includeSubDomains Directive. |
| 461 | // The OPTIONAL "includeSubDomains" directive is a valueless directive. |
| 462 | |
| 463 | if (subDomainsFound) { |
| 464 | // RFC 6797, 6.1/2: |
| 465 | // All directives MUST appear only once in an STS header field. |
| 466 | return false; |
| 467 | } |
| 468 | |
| 469 | subDomainsFound = true; |
| 470 | } // else we do nothing, skip unknown directives (RFC 6797, 6.1/5) |
| 471 | |
| 472 | return true; |
| 473 | } |
| 474 | |
| 475 | bool QHstsHeaderParser::() |
| 476 | { |
| 477 | // Returns true if we found a valid token or we have no more data (token is |
| 478 | // empty then). |
| 479 | |
| 480 | token.clear(); |
| 481 | |
| 482 | // Fortunately enough, by this point qhttpnetworkreply already got rid of |
| 483 | // [CRLF] parts, but we can have 1*(SP|HT) yet. |
| 484 | while (tokenPos < header.size() && isLWS(c: header.at(i: tokenPos))) |
| 485 | ++tokenPos; |
| 486 | |
| 487 | if (tokenPos == header.size()) |
| 488 | return true; |
| 489 | |
| 490 | const char ch = header.at(i: tokenPos); |
| 491 | if (ch == ';' || ch == '=') { |
| 492 | token.append(c: ch); |
| 493 | ++tokenPos; |
| 494 | return true; |
| 495 | } |
| 496 | |
| 497 | // RFC 2616, 2.2. |
| 498 | // |
| 499 | // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) |
| 500 | // qdtext = <any TEXT except <">> |
| 501 | if (ch == '"') { |
| 502 | int last = tokenPos + 1; |
| 503 | while (last < header.size()) { |
| 504 | if (header.at(i: last) == '"') { |
| 505 | // The end of a quoted-string. |
| 506 | break; |
| 507 | } else if (header.at(i: last) == '\\') { |
| 508 | // quoted-pair = "\" CHAR |
| 509 | if (last + 1 < header.size() && isCHAR(c: header.at(i: last + 1))) |
| 510 | last += 2; |
| 511 | else |
| 512 | return false; |
| 513 | } else { |
| 514 | if (!isTEXT(c: header.at(i: last))) |
| 515 | return false; |
| 516 | ++last; |
| 517 | } |
| 518 | } |
| 519 | |
| 520 | if (last >= header.size()) // no closing '"': |
| 521 | return false; |
| 522 | |
| 523 | token = header.mid(index: tokenPos, len: last - tokenPos + 1); |
| 524 | tokenPos = last + 1; |
| 525 | return true; |
| 526 | } |
| 527 | |
| 528 | // RFC 2616, 2.2: |
| 529 | // |
| 530 | // token = 1*<any CHAR except CTLs or separators> |
| 531 | if (!isTOKEN(c: ch)) |
| 532 | return false; |
| 533 | |
| 534 | int last = tokenPos + 1; |
| 535 | while (last < header.size() && isTOKEN(c: header.at(i: last))) |
| 536 | ++last; |
| 537 | |
| 538 | token = header.mid(index: tokenPos, len: last - tokenPos); |
| 539 | tokenPos = last; |
| 540 | |
| 541 | return true; |
| 542 | } |
| 543 | |
| 544 | QT_END_NAMESPACE |
| 545 | |