1 | // Copyright (C) 2017 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qhsts_p.h" |
5 | |
6 | #include "qhttpheaders.h" |
7 | |
8 | #include "QtCore/private/qipaddress_p.h" |
9 | #include "QtCore/qlist.h" |
10 | |
11 | #if QT_CONFIG(settings) |
12 | #include "qhstsstore_p.h" |
13 | #endif // QT_CONFIG(settings) |
14 | |
15 | QT_BEGIN_NAMESPACE |
16 | |
17 | static bool is_valid_domain_name(const QString &host) |
18 | { |
19 | if (!host.size()) |
20 | return false; |
21 | |
22 | // RFC6797 8.1.1 |
23 | // If the substring matching the host production from the Request-URI |
24 | // (of the message to which the host responded) syntactically matches |
25 | //the IP-literal or IPv4address productions from Section 3.2.2 of |
26 | //[RFC3986], then the UA MUST NOT note this host as a Known HSTS Host. |
27 | using namespace QIPAddressUtils; |
28 | |
29 | IPv4Address ipv4Addr = {}; |
30 | if (parseIp4(address&: ipv4Addr, begin: host.constBegin(), end: host.constEnd())) |
31 | return false; |
32 | |
33 | IPv6Address ipv6Addr = {}; |
34 | // Unlike parseIp4, parseIp6 returns nullptr if it managed to parse IPv6 |
35 | // address successfully. |
36 | if (!parseIp6(address&: ipv6Addr, begin: host.constBegin(), end: host.constEnd())) |
37 | return false; |
38 | |
39 | // TODO: for now we do not test IPvFuture address, it must be addressed |
40 | // by introducing parseIpFuture (actually, there is an implementation |
41 | // in QUrl that can be adopted/modified/moved to QIPAddressUtils). |
42 | return true; |
43 | } |
44 | |
45 | void QHstsCache::(const QHttpHeaders &, |
46 | const QUrl &url) |
47 | { |
48 | if (!url.isValid()) |
49 | return; |
50 | |
51 | QHstsHeaderParser parser; |
52 | if (parser.parse(headers)) { |
53 | updateKnownHost(hostName: url.host(), expires: parser.expirationDate(), includeSubDomains: parser.includeSubDomains()); |
54 | #if QT_CONFIG(settings) |
55 | if (hstsStore) |
56 | hstsStore->synchronize(); |
57 | #endif // QT_CONFIG(settings) |
58 | } |
59 | } |
60 | |
61 | void QHstsCache::updateFromPolicies(const QList<QHstsPolicy> &policies) |
62 | { |
63 | for (const auto &policy : policies) |
64 | updateKnownHost(hostName: policy.host(), expires: policy.expiry(), includeSubDomains: policy.includesSubDomains()); |
65 | |
66 | #if QT_CONFIG(settings) |
67 | if (hstsStore && policies.size()) { |
68 | // These policies are coming either from store or from QNAM's setter |
69 | // function. As a result we can notice expired or new policies, time |
70 | // to sync ... |
71 | hstsStore->synchronize(); |
72 | } |
73 | #endif // QT_CONFIG(settings) |
74 | } |
75 | |
76 | void QHstsCache::updateKnownHost(const QUrl &url, const QDateTime &expires, |
77 | bool includeSubDomains) |
78 | { |
79 | if (!url.isValid()) |
80 | return; |
81 | |
82 | updateKnownHost(hostName: url.host(), expires, includeSubDomains); |
83 | #if QT_CONFIG(settings) |
84 | if (hstsStore) |
85 | hstsStore->synchronize(); |
86 | #endif // QT_CONFIG(settings) |
87 | } |
88 | |
89 | void QHstsCache::updateKnownHost(const QString &host, const QDateTime &expires, |
90 | bool includeSubDomains) |
91 | { |
92 | if (!is_valid_domain_name(host)) |
93 | return; |
94 | |
95 | // HSTS is a per-host policy, regardless of protocol, port or any of the other |
96 | // details in an URL; so we only want the host part. QUrl::host handles |
97 | // IDNA 2003 (RFC3490) for us, as required by HSTS (RFC6797, section 10). |
98 | const HostName hostName(host); |
99 | const auto pos = knownHosts.find(x: hostName); |
100 | QHstsPolicy::PolicyFlags flags; |
101 | if (includeSubDomains) |
102 | flags = QHstsPolicy::IncludeSubDomains; |
103 | |
104 | const QHstsPolicy newPolicy(expires, flags, hostName.name); |
105 | if (pos == knownHosts.end()) { |
106 | // A new, previously unknown host. |
107 | if (newPolicy.isExpired()) { |
108 | // Nothing to do at all - we did not know this host previously, |
109 | // we do not have to - since its policy expired. |
110 | return; |
111 | } |
112 | |
113 | knownHosts.insert(x: {hostName, newPolicy}); |
114 | #if QT_CONFIG(settings) |
115 | if (hstsStore) |
116 | hstsStore->addToObserved(policy: newPolicy); |
117 | #endif // QT_CONFIG(settings) |
118 | return; |
119 | } |
120 | |
121 | if (newPolicy.isExpired()) |
122 | knownHosts.erase(position: pos); |
123 | else if (pos->second != newPolicy) |
124 | pos->second = newPolicy; |
125 | else |
126 | return; |
127 | |
128 | #if QT_CONFIG(settings) |
129 | if (hstsStore) |
130 | hstsStore->addToObserved(policy: newPolicy); |
131 | #endif // QT_CONFIG(settings) |
132 | } |
133 | |
134 | bool QHstsCache::isKnownHost(const QUrl &url) const |
135 | { |
136 | if (!url.isValid() || !is_valid_domain_name(host: url.host())) |
137 | return false; |
138 | |
139 | /* |
140 | RFC6797, 8.2. Known HSTS Host Domain Name Matching |
141 | |
142 | * Superdomain Match |
143 | If a label-for-label match between an entire Known HSTS Host's |
144 | domain name and a right-hand portion of the given domain name |
145 | is found, then this Known HSTS Host's domain name is a |
146 | superdomain match for the given domain name. There could be |
147 | multiple superdomain matches for a given domain name. |
148 | * Congruent Match |
149 | If a label-for-label match between a Known HSTS Host's domain |
150 | name and the given domain name is found -- i.e., there are no |
151 | further labels to compare -- then the given domain name |
152 | congruently matches this Known HSTS Host. |
153 | |
154 | We start from the congruent match, and then chop labels and dots and |
155 | proceed with superdomain match. While RFC6797 recommends to start from |
156 | superdomain, the result is the same - some valid policy will make a host |
157 | known. |
158 | */ |
159 | |
160 | bool superDomainMatch = false; |
161 | const QString hostNameAsString(url.host()); |
162 | HostName nameToTest(QStringView{hostNameAsString}); |
163 | while (nameToTest.fragment.size()) { |
164 | auto const pos = knownHosts.find(x: nameToTest); |
165 | if (pos != knownHosts.end()) { |
166 | if (pos->second.isExpired()) { |
167 | knownHosts.erase(position: pos); |
168 | #if QT_CONFIG(settings) |
169 | if (hstsStore) { |
170 | // Inform our store that this policy has expired. |
171 | hstsStore->addToObserved(policy: pos->second); |
172 | } |
173 | #endif // QT_CONFIG(settings) |
174 | } else if (!superDomainMatch || pos->second.includesSubDomains()) { |
175 | return true; |
176 | } |
177 | } |
178 | |
179 | const qsizetype dot = nameToTest.fragment.indexOf(c: u'.'); |
180 | if (dot == -1) |
181 | break; |
182 | |
183 | nameToTest.fragment = nameToTest.fragment.mid(pos: dot + 1); |
184 | superDomainMatch = true; |
185 | } |
186 | |
187 | return false; |
188 | } |
189 | |
190 | void QHstsCache::clear() |
191 | { |
192 | knownHosts.clear(); |
193 | } |
194 | |
195 | QList<QHstsPolicy> QHstsCache::policies() const |
196 | { |
197 | QList<QHstsPolicy> values; |
198 | values.reserve(asize: int(knownHosts.size())); |
199 | for (const auto &host : knownHosts) |
200 | values << host.second; |
201 | return values; |
202 | } |
203 | |
204 | #if QT_CONFIG(settings) |
205 | void QHstsCache::setStore(QHstsStore *store) |
206 | { |
207 | // Caller retains ownership of store, which must outlive this cache. |
208 | if (store != hstsStore) { |
209 | hstsStore = store; |
210 | |
211 | if (!hstsStore) |
212 | return; |
213 | |
214 | // First we augment our store with the policies we already know about |
215 | // (and thus the cached policy takes priority over whatever policy we |
216 | // had in the store for the same host, if any). |
217 | if (knownHosts.size()) { |
218 | const QList<QHstsPolicy> observed(policies()); |
219 | for (const auto &policy : observed) |
220 | hstsStore->addToObserved(policy); |
221 | hstsStore->synchronize(); |
222 | } |
223 | |
224 | // Now we update the cache with anything we have not observed yet, but |
225 | // the store knows about (well, it can happen we synchronize again as a |
226 | // result if some policies managed to expire or if we add a new one |
227 | // from the store to cache): |
228 | const QList<QHstsPolicy> restored(store->readPolicies()); |
229 | updateFromPolicies(policies: restored); |
230 | } |
231 | } |
232 | #endif // QT_CONFIG(settings) |
233 | |
234 | // The parser is quite simple: 'nextToken' knowns exactly what kind of tokens |
235 | // are valid and it will return false if something else was found; then |
236 | // we immediately stop parsing. 'parseDirective' knows how these tokens can |
237 | // be combined into a valid directive and if some weird combination of |
238 | // valid tokens is found - we immediately stop. |
239 | // And finally we call parseDirective again and again until some error found or |
240 | // we have no more bytes in the header. |
241 | |
242 | // The following isXXX functions are based on RFC2616, 2.2 Basic Rules. |
243 | |
244 | static bool isCHAR(int c) |
245 | { |
246 | // CHAR = <any US-ASCII character (octets 0 - 127)> |
247 | return c >= 0 && c <= 127; |
248 | } |
249 | |
250 | static bool isCTL(int c) |
251 | { |
252 | // CTL = <any US-ASCII control character |
253 | // (octets 0 - 31) and DEL (127)> |
254 | return (c >= 0 && c <= 31) || c == 127; |
255 | } |
256 | |
257 | |
258 | static bool isLWS(int c) |
259 | { |
260 | // LWS = [CRLF] 1*( SP | HT ) |
261 | // |
262 | // CRLF = CR LF |
263 | // CR = <US-ASCII CR, carriage return (13)> |
264 | // LF = <US-ASCII LF, linefeed (10)> |
265 | // SP = <US-ASCII SP, space (32)> |
266 | // HT = <US-ASCII HT, horizontal-tab (9)> |
267 | // |
268 | // CRLF is handled by the time we parse a header (they were replaced with |
269 | // spaces). We only have to deal with remaining SP|HT |
270 | return c == ' ' || c == '\t'; |
271 | } |
272 | |
273 | static bool isTEXT(char c) |
274 | { |
275 | // TEXT = <any OCTET except CTLs, |
276 | // but including LWS> |
277 | return !isCTL(c) || isLWS(c); |
278 | } |
279 | |
280 | static bool isSeparator(char c) |
281 | { |
282 | // separators = "(" | ")" | "<" | ">" | "@" |
283 | // | "," | ";" | ":" | "\" | <"> |
284 | // | "/" | "[" | "]" | "?" | "=" |
285 | // | "{" | "}" | SP | HT |
286 | static const char separators[] = "()<>@,;:\\\"/[]?={}" ; |
287 | static const char *end = separators + sizeof separators - 1; |
288 | return isLWS(c) || std::find(first: separators, last: end, val: c) != end; |
289 | } |
290 | |
291 | static QByteArrayView unescapeMaxAge(QByteArrayView value) |
292 | { |
293 | if (value.size() < 2 || value[0] != '"') |
294 | return value; |
295 | |
296 | Q_ASSERT(value[value.size() - 1] == '"'); |
297 | return value.mid(pos: 1, n: value.size() - 2); |
298 | } |
299 | |
300 | static bool isTOKEN(char c) |
301 | { |
302 | // token = 1*<any CHAR except CTLs or separators> |
303 | return isCHAR(c) && !isCTL(c) && !isSeparator(c); |
304 | } |
305 | |
306 | /* |
307 | |
308 | RFC6797, 6.1 Strict-Transport-Security HTTP Response Header Field. |
309 | Syntax: |
310 | |
311 | Strict-Tranposrt-Security = "Strict-Transport-Security" ":" |
312 | [ directive ] *( ";" [ directive ] ) |
313 | |
314 | directive = directive-name [ "=" directive-value ] |
315 | directive-name = token |
316 | directive-value = token | quoted-string |
317 | |
318 | RFC 2616, 2.2 Basic Rules. |
319 | |
320 | token = 1*<any CHAR except CTLs or separators> |
321 | quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) |
322 | |
323 | |
324 | qdtext = <any TEXT except <">> |
325 | quoted-pair = "\" CHAR |
326 | |
327 | */ |
328 | |
329 | bool QHstsHeaderParser::(const QHttpHeaders &) |
330 | { |
331 | for (const auto &value : headers.values( |
332 | name: QHttpHeaders::WellKnownHeader::StrictTransportSecurity)) { |
333 | header = value; |
334 | // RFC6797, 8.1: |
335 | // |
336 | // The UA MUST ignore any STS header fields not conforming to the |
337 | // grammar specified in Section 6.1 ("Strict-Transport-Security HTTP |
338 | // Response Header Field"). |
339 | // |
340 | // If a UA receives more than one STS header field in an HTTP |
341 | // response message over secure transport, then the UA MUST process |
342 | // only the first such header field. |
343 | // |
344 | // We read this as: ignore all invalid headers and take the first valid: |
345 | if (parseSTSHeader() && maxAgeFound) { |
346 | expiry = QDateTime::currentDateTimeUtc().addSecs(secs: maxAge); |
347 | return true; |
348 | } |
349 | } |
350 | |
351 | // In case it was set by a syntactically correct header (but without |
352 | // REQUIRED max-age directive): |
353 | subDomainsFound = false; |
354 | |
355 | return false; |
356 | } |
357 | |
358 | bool QHstsHeaderParser::() |
359 | { |
360 | expiry = QDateTime(); |
361 | maxAgeFound = false; |
362 | subDomainsFound = false; |
363 | maxAge = 0; |
364 | tokenPos = 0; |
365 | token.clear(); |
366 | |
367 | while (tokenPos < header.size()) { |
368 | if (!parseDirective()) |
369 | return false; |
370 | |
371 | if (token.size() && token != ";" ) { |
372 | // After a directive we can only have a ";" or no more tokens. |
373 | // Invalid syntax. |
374 | return false; |
375 | } |
376 | } |
377 | |
378 | return true; |
379 | } |
380 | |
381 | bool QHstsHeaderParser::() |
382 | { |
383 | // RFC 6797, 6.1: |
384 | // |
385 | // directive = directive-name [ "=" directive-value ] |
386 | // directive-name = token |
387 | // directive-value = token | quoted-string |
388 | |
389 | |
390 | // RFC 2616, 2.2: |
391 | // |
392 | // token = 1*<any CHAR except CTLs or separators> |
393 | |
394 | if (!nextToken()) |
395 | return false; |
396 | |
397 | if (!token.size()) // No more data, but no error. |
398 | return true; |
399 | |
400 | if (token == ";" ) // That's a weird grammar, but that's what it is. |
401 | return true; |
402 | |
403 | if (!isTOKEN(c: token.at(i: 0))) // Not a valid directive-name. |
404 | return false; |
405 | |
406 | const QByteArray directiveName = token; |
407 | // 2. Try to read "=" or ";". |
408 | if (!nextToken()) |
409 | return false; |
410 | |
411 | QByteArray directiveValue; |
412 | if (token == ";" ) // No directive-value |
413 | return processDirective(name: directiveName, value: directiveValue); |
414 | |
415 | if (token == "=" ) { |
416 | // We expect a directive-value now: |
417 | if (!nextToken() || !token.size()) |
418 | return false; |
419 | directiveValue = token; |
420 | } else if (token.size()) { |
421 | // Invalid syntax: |
422 | return false; |
423 | } |
424 | |
425 | if (!processDirective(name: directiveName, value: directiveValue)) |
426 | return false; |
427 | |
428 | // Read either ";", or 'end of header', or some invalid token. |
429 | return nextToken(); |
430 | } |
431 | |
432 | bool QHstsHeaderParser::(const QByteArray &name, const QByteArray &value) |
433 | { |
434 | Q_ASSERT(name.size()); |
435 | // RFC6797 6.1/3 Directive names are case-insensitive |
436 | if (name.compare(a: "max-age" , cs: Qt::CaseInsensitive) == 0) { |
437 | // RFC 6797, 6.1.1 |
438 | // The syntax of the max-age directive's REQUIRED value (after |
439 | // quoted-string unescaping, if necessary) is defined as: |
440 | // |
441 | // max-age-value = delta-seconds |
442 | if (maxAgeFound) { |
443 | // RFC 6797, 6.1/2: |
444 | // All directives MUST appear only once in an STS header field. |
445 | return false; |
446 | } |
447 | |
448 | const QByteArrayView unescapedValue = unescapeMaxAge(value); |
449 | if (!unescapedValue.size()) |
450 | return false; |
451 | |
452 | bool ok = false; |
453 | const qint64 age = unescapedValue.toLongLong(ok: &ok); |
454 | if (!ok || age < 0) |
455 | return false; |
456 | |
457 | maxAge = age; |
458 | maxAgeFound = true; |
459 | } else if (name.compare(a: "includesubdomains" , cs: Qt::CaseInsensitive) == 0) { |
460 | // RFC 6797, 6.1.2. The includeSubDomains Directive. |
461 | // The OPTIONAL "includeSubDomains" directive is a valueless directive. |
462 | |
463 | if (subDomainsFound) { |
464 | // RFC 6797, 6.1/2: |
465 | // All directives MUST appear only once in an STS header field. |
466 | return false; |
467 | } |
468 | |
469 | subDomainsFound = true; |
470 | } // else we do nothing, skip unknown directives (RFC 6797, 6.1/5) |
471 | |
472 | return true; |
473 | } |
474 | |
475 | bool QHstsHeaderParser::() |
476 | { |
477 | // Returns true if we found a valid token or we have no more data (token is |
478 | // empty then). |
479 | |
480 | token.clear(); |
481 | |
482 | // Fortunately enough, by this point qhttpnetworkreply already got rid of |
483 | // [CRLF] parts, but we can have 1*(SP|HT) yet. |
484 | while (tokenPos < header.size() && isLWS(c: header.at(i: tokenPos))) |
485 | ++tokenPos; |
486 | |
487 | if (tokenPos == header.size()) |
488 | return true; |
489 | |
490 | const char ch = header.at(i: tokenPos); |
491 | if (ch == ';' || ch == '=') { |
492 | token.append(c: ch); |
493 | ++tokenPos; |
494 | return true; |
495 | } |
496 | |
497 | // RFC 2616, 2.2. |
498 | // |
499 | // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) |
500 | // qdtext = <any TEXT except <">> |
501 | if (ch == '"') { |
502 | int last = tokenPos + 1; |
503 | while (last < header.size()) { |
504 | if (header.at(i: last) == '"') { |
505 | // The end of a quoted-string. |
506 | break; |
507 | } else if (header.at(i: last) == '\\') { |
508 | // quoted-pair = "\" CHAR |
509 | if (last + 1 < header.size() && isCHAR(c: header.at(i: last + 1))) |
510 | last += 2; |
511 | else |
512 | return false; |
513 | } else { |
514 | if (!isTEXT(c: header.at(i: last))) |
515 | return false; |
516 | ++last; |
517 | } |
518 | } |
519 | |
520 | if (last >= header.size()) // no closing '"': |
521 | return false; |
522 | |
523 | token = header.mid(index: tokenPos, len: last - tokenPos + 1); |
524 | tokenPos = last + 1; |
525 | return true; |
526 | } |
527 | |
528 | // RFC 2616, 2.2: |
529 | // |
530 | // token = 1*<any CHAR except CTLs or separators> |
531 | if (!isTOKEN(c: ch)) |
532 | return false; |
533 | |
534 | int last = tokenPos + 1; |
535 | while (last < header.size() && isTOKEN(c: header.at(i: last))) |
536 | ++last; |
537 | |
538 | token = header.mid(index: tokenPos, len: last - tokenPos); |
539 | tokenPos = last; |
540 | |
541 | return true; |
542 | } |
543 | |
544 | QT_END_NAMESPACE |
545 | |