1 | // Copyright (C) 2017 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qhsts_p.h" |
5 | |
6 | #include "QtCore/private/qipaddress_p.h" |
7 | #include "QtCore/qlist.h" |
8 | |
9 | #if QT_CONFIG(settings) |
10 | #include "qhstsstore_p.h" |
11 | #endif // QT_CONFIG(settings) |
12 | |
13 | QT_BEGIN_NAMESPACE |
14 | |
15 | static bool is_valid_domain_name(const QString &host) |
16 | { |
17 | if (!host.size()) |
18 | return false; |
19 | |
20 | // RFC6797 8.1.1 |
21 | // If the substring matching the host production from the Request-URI |
22 | // (of the message to which the host responded) syntactically matches |
23 | //the IP-literal or IPv4address productions from Section 3.2.2 of |
24 | //[RFC3986], then the UA MUST NOT note this host as a Known HSTS Host. |
25 | using namespace QIPAddressUtils; |
26 | |
27 | IPv4Address ipv4Addr = {}; |
28 | if (parseIp4(address&: ipv4Addr, begin: host.constBegin(), end: host.constEnd())) |
29 | return false; |
30 | |
31 | IPv6Address ipv6Addr = {}; |
32 | // Unlike parseIp4, parseIp6 returns nullptr if it managed to parse IPv6 |
33 | // address successfully. |
34 | if (!parseIp6(address&: ipv6Addr, begin: host.constBegin(), end: host.constEnd())) |
35 | return false; |
36 | |
37 | // TODO: for now we do not test IPvFuture address, it must be addressed |
38 | // by introducing parseIpFuture (actually, there is an implementation |
39 | // in QUrl that can be adopted/modified/moved to QIPAddressUtils). |
40 | return true; |
41 | } |
42 | |
43 | void QHstsCache::(const QList<QPair<QByteArray, QByteArray>> &, |
44 | const QUrl &url) |
45 | { |
46 | if (!url.isValid()) |
47 | return; |
48 | |
49 | QHstsHeaderParser parser; |
50 | if (parser.parse(headers)) { |
51 | updateKnownHost(hostName: url.host(), expires: parser.expirationDate(), includeSubDomains: parser.includeSubDomains()); |
52 | #if QT_CONFIG(settings) |
53 | if (hstsStore) |
54 | hstsStore->synchronize(); |
55 | #endif // QT_CONFIG(settings) |
56 | } |
57 | } |
58 | |
59 | void QHstsCache::updateFromPolicies(const QList<QHstsPolicy> &policies) |
60 | { |
61 | for (const auto &policy : policies) |
62 | updateKnownHost(hostName: policy.host(), expires: policy.expiry(), includeSubDomains: policy.includesSubDomains()); |
63 | |
64 | #if QT_CONFIG(settings) |
65 | if (hstsStore && policies.size()) { |
66 | // These policies are coming either from store or from QNAM's setter |
67 | // function. As a result we can notice expired or new policies, time |
68 | // to sync ... |
69 | hstsStore->synchronize(); |
70 | } |
71 | #endif // QT_CONFIG(settings) |
72 | } |
73 | |
74 | void QHstsCache::updateKnownHost(const QUrl &url, const QDateTime &expires, |
75 | bool includeSubDomains) |
76 | { |
77 | if (!url.isValid()) |
78 | return; |
79 | |
80 | updateKnownHost(hostName: url.host(), expires, includeSubDomains); |
81 | #if QT_CONFIG(settings) |
82 | if (hstsStore) |
83 | hstsStore->synchronize(); |
84 | #endif // QT_CONFIG(settings) |
85 | } |
86 | |
87 | void QHstsCache::updateKnownHost(const QString &host, const QDateTime &expires, |
88 | bool includeSubDomains) |
89 | { |
90 | if (!is_valid_domain_name(host)) |
91 | return; |
92 | |
93 | // HSTS is a per-host policy, regardless of protocol, port or any of the other |
94 | // details in an URL; so we only want the host part. QUrl::host handles |
95 | // IDNA 2003 (RFC3490) for us, as required by HSTS (RFC6797, section 10). |
96 | const HostName hostName(host); |
97 | const auto pos = knownHosts.find(x: hostName); |
98 | QHstsPolicy::PolicyFlags flags; |
99 | if (includeSubDomains) |
100 | flags = QHstsPolicy::IncludeSubDomains; |
101 | |
102 | const QHstsPolicy newPolicy(expires, flags, hostName.name); |
103 | if (pos == knownHosts.end()) { |
104 | // A new, previously unknown host. |
105 | if (newPolicy.isExpired()) { |
106 | // Nothing to do at all - we did not know this host previously, |
107 | // we do not have to - since its policy expired. |
108 | return; |
109 | } |
110 | |
111 | knownHosts.insert(x: {hostName, newPolicy}); |
112 | #if QT_CONFIG(settings) |
113 | if (hstsStore) |
114 | hstsStore->addToObserved(policy: newPolicy); |
115 | #endif // QT_CONFIG(settings) |
116 | return; |
117 | } |
118 | |
119 | if (newPolicy.isExpired()) |
120 | knownHosts.erase(position: pos); |
121 | else if (pos->second != newPolicy) |
122 | pos->second = newPolicy; |
123 | else |
124 | return; |
125 | |
126 | #if QT_CONFIG(settings) |
127 | if (hstsStore) |
128 | hstsStore->addToObserved(policy: newPolicy); |
129 | #endif // QT_CONFIG(settings) |
130 | } |
131 | |
132 | bool QHstsCache::isKnownHost(const QUrl &url) const |
133 | { |
134 | if (!url.isValid() || !is_valid_domain_name(host: url.host())) |
135 | return false; |
136 | |
137 | /* |
138 | RFC6797, 8.2. Known HSTS Host Domain Name Matching |
139 | |
140 | * Superdomain Match |
141 | If a label-for-label match between an entire Known HSTS Host's |
142 | domain name and a right-hand portion of the given domain name |
143 | is found, then this Known HSTS Host's domain name is a |
144 | superdomain match for the given domain name. There could be |
145 | multiple superdomain matches for a given domain name. |
146 | * Congruent Match |
147 | If a label-for-label match between a Known HSTS Host's domain |
148 | name and the given domain name is found -- i.e., there are no |
149 | further labels to compare -- then the given domain name |
150 | congruently matches this Known HSTS Host. |
151 | |
152 | We start from the congruent match, and then chop labels and dots and |
153 | proceed with superdomain match. While RFC6797 recommends to start from |
154 | superdomain, the result is the same - some valid policy will make a host |
155 | known. |
156 | */ |
157 | |
158 | bool superDomainMatch = false; |
159 | const QString hostNameAsString(url.host()); |
160 | HostName nameToTest(QStringView{hostNameAsString}); |
161 | while (nameToTest.fragment.size()) { |
162 | auto const pos = knownHosts.find(x: nameToTest); |
163 | if (pos != knownHosts.end()) { |
164 | if (pos->second.isExpired()) { |
165 | knownHosts.erase(position: pos); |
166 | #if QT_CONFIG(settings) |
167 | if (hstsStore) { |
168 | // Inform our store that this policy has expired. |
169 | hstsStore->addToObserved(policy: pos->second); |
170 | } |
171 | #endif // QT_CONFIG(settings) |
172 | } else if (!superDomainMatch || pos->second.includesSubDomains()) { |
173 | return true; |
174 | } |
175 | } |
176 | |
177 | const qsizetype dot = nameToTest.fragment.indexOf(c: u'.'); |
178 | if (dot == -1) |
179 | break; |
180 | |
181 | nameToTest.fragment = nameToTest.fragment.mid(pos: dot + 1); |
182 | superDomainMatch = true; |
183 | } |
184 | |
185 | return false; |
186 | } |
187 | |
188 | void QHstsCache::clear() |
189 | { |
190 | knownHosts.clear(); |
191 | } |
192 | |
193 | QList<QHstsPolicy> QHstsCache::policies() const |
194 | { |
195 | QList<QHstsPolicy> values; |
196 | values.reserve(asize: int(knownHosts.size())); |
197 | for (const auto &host : knownHosts) |
198 | values << host.second; |
199 | return values; |
200 | } |
201 | |
202 | #if QT_CONFIG(settings) |
203 | void QHstsCache::setStore(QHstsStore *store) |
204 | { |
205 | // Caller retains ownership of store, which must outlive this cache. |
206 | if (store != hstsStore) { |
207 | hstsStore = store; |
208 | |
209 | if (!hstsStore) |
210 | return; |
211 | |
212 | // First we augment our store with the policies we already know about |
213 | // (and thus the cached policy takes priority over whatever policy we |
214 | // had in the store for the same host, if any). |
215 | if (knownHosts.size()) { |
216 | const QList<QHstsPolicy> observed(policies()); |
217 | for (const auto &policy : observed) |
218 | hstsStore->addToObserved(policy); |
219 | hstsStore->synchronize(); |
220 | } |
221 | |
222 | // Now we update the cache with anything we have not observed yet, but |
223 | // the store knows about (well, it can happen we synchronize again as a |
224 | // result if some policies managed to expire or if we add a new one |
225 | // from the store to cache): |
226 | const QList<QHstsPolicy> restored(store->readPolicies()); |
227 | updateFromPolicies(policies: restored); |
228 | } |
229 | } |
230 | #endif // QT_CONFIG(settings) |
231 | |
232 | // The parser is quite simple: 'nextToken' knowns exactly what kind of tokens |
233 | // are valid and it will return false if something else was found; then |
234 | // we immediately stop parsing. 'parseDirective' knows how these tokens can |
235 | // be combined into a valid directive and if some weird combination of |
236 | // valid tokens is found - we immediately stop. |
237 | // And finally we call parseDirective again and again until some error found or |
238 | // we have no more bytes in the header. |
239 | |
240 | // The following isXXX functions are based on RFC2616, 2.2 Basic Rules. |
241 | |
242 | static bool isCHAR(int c) |
243 | { |
244 | // CHAR = <any US-ASCII character (octets 0 - 127)> |
245 | return c >= 0 && c <= 127; |
246 | } |
247 | |
248 | static bool isCTL(int c) |
249 | { |
250 | // CTL = <any US-ASCII control character |
251 | // (octets 0 - 31) and DEL (127)> |
252 | return (c >= 0 && c <= 31) || c == 127; |
253 | } |
254 | |
255 | |
256 | static bool isLWS(int c) |
257 | { |
258 | // LWS = [CRLF] 1*( SP | HT ) |
259 | // |
260 | // CRLF = CR LF |
261 | // CR = <US-ASCII CR, carriage return (13)> |
262 | // LF = <US-ASCII LF, linefeed (10)> |
263 | // SP = <US-ASCII SP, space (32)> |
264 | // HT = <US-ASCII HT, horizontal-tab (9)> |
265 | // |
266 | // CRLF is handled by the time we parse a header (they were replaced with |
267 | // spaces). We only have to deal with remaining SP|HT |
268 | return c == ' ' || c == '\t'; |
269 | } |
270 | |
271 | static bool isTEXT(char c) |
272 | { |
273 | // TEXT = <any OCTET except CTLs, |
274 | // but including LWS> |
275 | return !isCTL(c) || isLWS(c); |
276 | } |
277 | |
278 | static bool isSeparator(char c) |
279 | { |
280 | // separators = "(" | ")" | "<" | ">" | "@" |
281 | // | "," | ";" | ":" | "\" | <"> |
282 | // | "/" | "[" | "]" | "?" | "=" |
283 | // | "{" | "}" | SP | HT |
284 | static const char separators[] = "()<>@,;:\\\"/[]?={}" ; |
285 | static const char *end = separators + sizeof separators - 1; |
286 | return isLWS(c) || std::find(first: separators, last: end, val: c) != end; |
287 | } |
288 | |
289 | static QByteArray unescapeMaxAge(const QByteArray &value) |
290 | { |
291 | if (value.size() < 2 || value[0] != '"') |
292 | return value; |
293 | |
294 | Q_ASSERT(value[value.size() - 1] == '"'); |
295 | return value.mid(index: 1, len: value.size() - 2); |
296 | } |
297 | |
298 | static bool isTOKEN(char c) |
299 | { |
300 | // token = 1*<any CHAR except CTLs or separators> |
301 | return isCHAR(c) && !isCTL(c) && !isSeparator(c); |
302 | } |
303 | |
304 | /* |
305 | |
306 | RFC6797, 6.1 Strict-Transport-Security HTTP Response Header Field. |
307 | Syntax: |
308 | |
309 | Strict-Tranposrt-Security = "Strict-Transport-Security" ":" |
310 | [ directive ] *( ";" [ directive ] ) |
311 | |
312 | directive = directive-name [ "=" directive-value ] |
313 | directive-name = token |
314 | directive-value = token | quoted-string |
315 | |
316 | RFC 2616, 2.2 Basic Rules. |
317 | |
318 | token = 1*<any CHAR except CTLs or separators> |
319 | quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) |
320 | |
321 | |
322 | qdtext = <any TEXT except <">> |
323 | quoted-pair = "\" CHAR |
324 | |
325 | */ |
326 | |
327 | bool QHstsHeaderParser::(const QList<QPair<QByteArray, QByteArray>> &) |
328 | { |
329 | for (const auto &h : headers) { |
330 | // We compare directly because header name was already 'trimmed' for us: |
331 | if (h.first.compare(a: "Strict-Transport-Security" , cs: Qt::CaseInsensitive) == 0) { |
332 | header = h.second; |
333 | // RFC6797, 8.1: |
334 | // |
335 | // The UA MUST ignore any STS header fields not conforming to the |
336 | // grammar specified in Section 6.1 ("Strict-Transport-Security HTTP |
337 | // Response Header Field"). |
338 | // |
339 | // If a UA receives more than one STS header field in an HTTP |
340 | // response message over secure transport, then the UA MUST process |
341 | // only the first such header field. |
342 | // |
343 | // We read this as: ignore all invalid headers and take the first valid: |
344 | if (parseSTSHeader() && maxAgeFound) { |
345 | expiry = QDateTime::currentDateTimeUtc().addSecs(secs: maxAge); |
346 | return true; |
347 | } |
348 | } |
349 | } |
350 | |
351 | // In case it was set by a syntactically correct header (but without |
352 | // REQUIRED max-age directive): |
353 | subDomainsFound = false; |
354 | |
355 | return false; |
356 | } |
357 | |
358 | bool QHstsHeaderParser::() |
359 | { |
360 | expiry = QDateTime(); |
361 | maxAgeFound = false; |
362 | subDomainsFound = false; |
363 | maxAge = 0; |
364 | tokenPos = 0; |
365 | token.clear(); |
366 | |
367 | while (tokenPos < header.size()) { |
368 | if (!parseDirective()) |
369 | return false; |
370 | |
371 | if (token.size() && token != ";" ) { |
372 | // After a directive we can only have a ";" or no more tokens. |
373 | // Invalid syntax. |
374 | return false; |
375 | } |
376 | } |
377 | |
378 | return true; |
379 | } |
380 | |
381 | bool QHstsHeaderParser::() |
382 | { |
383 | // RFC 6797, 6.1: |
384 | // |
385 | // directive = directive-name [ "=" directive-value ] |
386 | // directive-name = token |
387 | // directive-value = token | quoted-string |
388 | |
389 | |
390 | // RFC 2616, 2.2: |
391 | // |
392 | // token = 1*<any CHAR except CTLs or separators> |
393 | |
394 | if (!nextToken()) |
395 | return false; |
396 | |
397 | if (!token.size()) // No more data, but no error. |
398 | return true; |
399 | |
400 | if (token == ";" ) // That's a weird grammar, but that's what it is. |
401 | return true; |
402 | |
403 | if (!isTOKEN(c: token[0])) // Not a valid directive-name. |
404 | return false; |
405 | |
406 | const QByteArray directiveName = token; |
407 | // 2. Try to read "=" or ";". |
408 | if (!nextToken()) |
409 | return false; |
410 | |
411 | QByteArray directiveValue; |
412 | if (token == ";" ) // No directive-value |
413 | return processDirective(name: directiveName, value: directiveValue); |
414 | |
415 | if (token == "=" ) { |
416 | // We expect a directive-value now: |
417 | if (!nextToken() || !token.size()) |
418 | return false; |
419 | directiveValue = token; |
420 | } else if (token.size()) { |
421 | // Invalid syntax: |
422 | return false; |
423 | } |
424 | |
425 | if (!processDirective(name: directiveName, value: directiveValue)) |
426 | return false; |
427 | |
428 | // Read either ";", or 'end of header', or some invalid token. |
429 | return nextToken(); |
430 | } |
431 | |
432 | bool QHstsHeaderParser::(const QByteArray &name, const QByteArray &value) |
433 | { |
434 | Q_ASSERT(name.size()); |
435 | // RFC6797 6.1/3 Directive names are case-insensitive |
436 | if (name.compare(a: "max-age" , cs: Qt::CaseInsensitive) == 0) { |
437 | // RFC 6797, 6.1.1 |
438 | // The syntax of the max-age directive's REQUIRED value (after |
439 | // quoted-string unescaping, if necessary) is defined as: |
440 | // |
441 | // max-age-value = delta-seconds |
442 | if (maxAgeFound) { |
443 | // RFC 6797, 6.1/2: |
444 | // All directives MUST appear only once in an STS header field. |
445 | return false; |
446 | } |
447 | |
448 | const QByteArray unescapedValue = unescapeMaxAge(value); |
449 | if (!unescapedValue.size()) |
450 | return false; |
451 | |
452 | bool ok = false; |
453 | const qint64 age = unescapedValue.toLongLong(ok: &ok); |
454 | if (!ok || age < 0) |
455 | return false; |
456 | |
457 | maxAge = age; |
458 | maxAgeFound = true; |
459 | } else if (name.compare(a: "includesubdomains" , cs: Qt::CaseInsensitive) == 0) { |
460 | // RFC 6797, 6.1.2. The includeSubDomains Directive. |
461 | // The OPTIONAL "includeSubDomains" directive is a valueless directive. |
462 | |
463 | if (subDomainsFound) { |
464 | // RFC 6797, 6.1/2: |
465 | // All directives MUST appear only once in an STS header field. |
466 | return false; |
467 | } |
468 | |
469 | subDomainsFound = true; |
470 | } // else we do nothing, skip unknown directives (RFC 6797, 6.1/5) |
471 | |
472 | return true; |
473 | } |
474 | |
475 | bool QHstsHeaderParser::() |
476 | { |
477 | // Returns true if we found a valid token or we have no more data (token is |
478 | // empty then). |
479 | |
480 | token.clear(); |
481 | |
482 | // Fortunately enough, by this point qhttpnetworkreply already got rid of |
483 | // [CRLF] parts, but we can have 1*(SP|HT) yet. |
484 | while (tokenPos < header.size() && isLWS(c: header[tokenPos])) |
485 | ++tokenPos; |
486 | |
487 | if (tokenPos == header.size()) |
488 | return true; |
489 | |
490 | const char ch = header[tokenPos]; |
491 | if (ch == ';' || ch == '=') { |
492 | token.append(c: ch); |
493 | ++tokenPos; |
494 | return true; |
495 | } |
496 | |
497 | // RFC 2616, 2.2. |
498 | // |
499 | // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) |
500 | // qdtext = <any TEXT except <">> |
501 | if (ch == '"') { |
502 | int last = tokenPos + 1; |
503 | while (last < header.size()) { |
504 | if (header[last] == '"') { |
505 | // The end of a quoted-string. |
506 | break; |
507 | } else if (header[last] == '\\') { |
508 | // quoted-pair = "\" CHAR |
509 | if (last + 1 < header.size() && isCHAR(c: header[last + 1])) |
510 | last += 2; |
511 | else |
512 | return false; |
513 | } else { |
514 | if (!isTEXT(c: header[last])) |
515 | return false; |
516 | ++last; |
517 | } |
518 | } |
519 | |
520 | if (last >= header.size()) // no closing '"': |
521 | return false; |
522 | |
523 | token = header.mid(index: tokenPos, len: last - tokenPos + 1); |
524 | tokenPos = last + 1; |
525 | return true; |
526 | } |
527 | |
528 | // RFC 2616, 2.2: |
529 | // |
530 | // token = 1*<any CHAR except CTLs or separators> |
531 | if (!isTOKEN(c: ch)) |
532 | return false; |
533 | |
534 | int last = tokenPos + 1; |
535 | while (last < header.size() && isTOKEN(c: header[last])) |
536 | ++last; |
537 | |
538 | token = header.mid(index: tokenPos, len: last - tokenPos); |
539 | tokenPos = last; |
540 | |
541 | return true; |
542 | } |
543 | |
544 | QT_END_NAMESPACE |
545 | |