| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. |
| 4 | ** Contact: https://www.qt.io/licensing/ |
| 5 | ** |
| 6 | ** This file is part of the QtCore module of the Qt Toolkit. |
| 7 | ** |
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ |
| 9 | ** Commercial License Usage |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | ** accordance with the commercial license agreement provided with the |
| 12 | ** Software or, alternatively, in accordance with the terms contained in |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms |
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
| 15 | ** information use the contact form at https://www.qt.io/contact-us. |
| 16 | ** |
| 17 | ** GNU Lesser General Public License Usage |
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
| 19 | ** General Public License version 3 as published by the Free Software |
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| 21 | ** packaging of this file. Please review the following information to |
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements |
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| 24 | ** |
| 25 | ** GNU General Public License Usage |
| 26 | ** Alternatively, this file may be used under the terms of the GNU |
| 27 | ** General Public License version 2.0 or (at your option) the GNU General |
| 28 | ** Public license version 3 or any later version approved by the KDE Free |
| 29 | ** Qt Foundation. The licenses are as published by the Free Software |
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| 31 | ** included in the packaging of this file. Please review the following |
| 32 | ** information to ensure the GNU General Public License requirements will |
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
| 35 | ** |
| 36 | ** $QT_END_LICENSE$ |
| 37 | ** |
| 38 | ****************************************************************************/ |
| 39 | |
| 40 | #include <qglobal.h> |
| 41 | |
| 42 | #if QT_CONFIG(topleveldomain) |
| 43 | |
| 44 | #include "qplatformdefs.h" |
| 45 | #include "qurl.h" |
| 46 | #include "private/qurltlds_p.h" |
| 47 | #include "private/qtldurl_p.h" |
| 48 | #include "QtCore/qstring.h" |
| 49 | #include "QtCore/qvector.h" |
| 50 | |
| 51 | QT_BEGIN_NAMESPACE |
| 52 | |
| 53 | enum TLDMatchType { |
| 54 | ExactMatch, |
| 55 | SuffixMatch, |
| 56 | ExceptionMatch, |
| 57 | }; |
| 58 | |
| 59 | // Scan the auto-generated table of TLDs for an entry. For more details |
| 60 | // see comments in file: util/corelib/qurl-generateTLDs/main.cpp |
| 61 | static bool containsTLDEntry(QStringView entry, TLDMatchType match) |
| 62 | { |
| 63 | const QStringView matchSymbols[] = { |
| 64 | u"" , |
| 65 | u"*" , |
| 66 | u"!" , |
| 67 | }; |
| 68 | const auto symbol = matchSymbols[match]; |
| 69 | const int index = qt_hash(key: entry, chained: qt_hash(key: symbol)) % tldCount; |
| 70 | |
| 71 | // select the right chunk from the big table |
| 72 | short chunk = 0; |
| 73 | uint chunkIndex = tldIndices[index], offset = 0; |
| 74 | |
| 75 | // The offset in the big string, of the group that our entry hashes into. |
| 76 | const auto tldGroupOffset = tldIndices[index]; |
| 77 | |
| 78 | // It should always be inside all chunks' total size. |
| 79 | Q_ASSERT(tldGroupOffset < tldChunks[tldChunkCount - 1]); |
| 80 | // All offsets are stored in non-decreasing order. |
| 81 | // This check is within bounds as tldIndices has length tldCount+1. |
| 82 | Q_ASSERT(tldGroupOffset <= tldIndices[index + 1]); |
| 83 | // The last extra entry in tldIndices |
| 84 | // should be equal to the total of all chunks' lengths. |
| 85 | Q_ASSERT(tldIndices[tldCount] == tldChunks[tldChunkCount - 1]); |
| 86 | |
| 87 | // Find which chunk contains the tldGroupOffset |
| 88 | while (tldGroupOffset >= tldChunks[chunk]) { |
| 89 | chunkIndex -= tldChunks[chunk]; |
| 90 | offset += tldChunks[chunk]; |
| 91 | chunk++; |
| 92 | |
| 93 | // We can not go above the number of chunks we have, since all our |
| 94 | // indices are less than the total chunks' size (see asserts above). |
| 95 | Q_ASSERT(chunk < tldChunkCount); |
| 96 | } |
| 97 | |
| 98 | // check all the entries from the given offset |
| 99 | while (chunkIndex < tldIndices[index+1] - offset) { |
| 100 | const auto utf8 = tldData[chunk] + chunkIndex; |
| 101 | if ((symbol.isEmpty() || QLatin1Char(*utf8) == symbol) && entry == QString::fromUtf8(str: utf8 + symbol.size())) |
| 102 | return true; |
| 103 | chunkIndex += qstrlen(str: utf8) + 1; // +1 for the ending \0 |
| 104 | } |
| 105 | return false; |
| 106 | } |
| 107 | |
| 108 | /*! |
| 109 | \internal |
| 110 | |
| 111 | Return the top-level-domain per Qt's copy of the Mozilla public suffix list of |
| 112 | \a domain. |
| 113 | */ |
| 114 | |
| 115 | Q_CORE_EXPORT QString qTopLevelDomain(const QString &domain) |
| 116 | { |
| 117 | const QString domainLower = domain.toLower(); |
| 118 | QVector<QStringRef> sections = domainLower.splitRef(sep: QLatin1Char('.'), behavior: Qt::SkipEmptyParts); |
| 119 | if (sections.isEmpty()) |
| 120 | return QString(); |
| 121 | |
| 122 | QString level, tld; |
| 123 | for (int j = sections.count() - 1; j >= 0; --j) { |
| 124 | level.prepend(s: QLatin1Char('.') + sections.at(i: j)); |
| 125 | if (qIsEffectiveTLD(domain: level.rightRef(n: level.size() - 1))) |
| 126 | tld = level; |
| 127 | } |
| 128 | return tld; |
| 129 | } |
| 130 | |
| 131 | /*! |
| 132 | \internal |
| 133 | |
| 134 | Return true if \a domain is a top-level-domain per Qt's copy of the Mozilla public suffix list. |
| 135 | */ |
| 136 | |
| 137 | Q_CORE_EXPORT bool qIsEffectiveTLD(const QStringRef &domain) |
| 138 | { |
| 139 | // for domain 'foo.bar.com': |
| 140 | // 1. return if TLD table contains 'foo.bar.com' |
| 141 | // 2. else if table contains '*.bar.com', |
| 142 | // 3. test that table does not contain '!foo.bar.com' |
| 143 | |
| 144 | if (containsTLDEntry(entry: domain, match: ExactMatch)) // 1 |
| 145 | return true; |
| 146 | |
| 147 | const int dot = domain.indexOf(ch: QLatin1Char('.')); |
| 148 | if (dot < 0) // Actual TLD: may be effective if the subject of a wildcard rule: |
| 149 | return containsTLDEntry(entry: QString(QLatin1Char('.') + domain), match: SuffixMatch); |
| 150 | if (containsTLDEntry(entry: domain.mid(pos: dot), match: SuffixMatch)) // 2 |
| 151 | return !containsTLDEntry(entry: domain, match: ExceptionMatch); // 3 |
| 152 | return false; |
| 153 | } |
| 154 | |
| 155 | QT_END_NAMESPACE |
| 156 | |
| 157 | #endif |
| 158 | |