| 1 | /* |
| 2 | This file is part of the KDE project |
| 3 | SPDX-FileCopyrightText: 2002, 2003 Dawit Alemayehu <adawit@kde.org> |
| 4 | SPDX-FileCopyrightText: 2000 Yves Arrouye <yves@realnames.com> |
| 5 | SPDX-FileCopyrightText: 1999 Simon Hausmann <hausmann@kde.org> |
| 6 | |
| 7 | Advanced web shortcuts: |
| 8 | SPDX-FileCopyrightText: 2001 Andreas Hochsteger <e9625392@student.tuwien.ac.at> |
| 9 | |
| 10 | SPDX-License-Identifier: GPL-2.0-or-later |
| 11 | */ |
| 12 | |
| 13 | #include "kuriikwsfiltereng_p.h" |
| 14 | #include "searchprovider.h" |
| 15 | |
| 16 | #include <KConfig> |
| 17 | #include <KConfigGroup> |
| 18 | #include <kprotocolinfo.h> |
| 19 | |
| 20 | #ifdef WITH_QTDBUS |
| 21 | #include <QDBusConnection> |
| 22 | #endif |
| 23 | |
| 24 | #include <QLoggingCategory> |
| 25 | #include <QRegularExpression> |
| 26 | #include <QStringEncoder> |
| 27 | |
| 28 | Q_LOGGING_CATEGORY(category, "kf.kio.urifilters.ikws" , QtWarningMsg) |
| 29 | using namespace KIO; |
| 30 | |
| 31 | /*! |
| 32 | * IMPORTANT: If you change anything here, make sure kiowidgets-kurifiltertest-{colon,space}-separator |
| 33 | * unit tests still pass (they're usually run as part of "make test"). |
| 34 | */ |
| 35 | |
| 36 | KURISearchFilterEngine::KURISearchFilterEngine() |
| 37 | { |
| 38 | configure(); |
| 39 | // Only after initial load, we would want to reparse the files on config changes. |
| 40 | // When the registry is constructed, it automatically loads the searchproviders |
| 41 | m_reloadRegistry = true; |
| 42 | |
| 43 | #ifdef WITH_QTDBUS |
| 44 | QDBusConnection::sessionBus() |
| 45 | .connect(service: QString(), QStringLiteral("/" ), QStringLiteral("org.kde.KUriFilterPlugin" ), QStringLiteral("configure" ), receiver: this, SLOT(configure())); |
| 46 | #endif |
| 47 | } |
| 48 | |
| 49 | KURISearchFilterEngine::~KURISearchFilterEngine() = default; |
| 50 | |
| 51 | // static |
| 52 | QStringList KURISearchFilterEngine::defaultSearchProviders() |
| 53 | { |
| 54 | static const QStringList defaultProviders{QStringLiteral("google" ), |
| 55 | QStringLiteral("youtube" ), |
| 56 | QStringLiteral("yahoo" ), |
| 57 | QStringLiteral("wikipedia" ), |
| 58 | QStringLiteral("wikit" )}; |
| 59 | return defaultProviders; |
| 60 | } |
| 61 | |
| 62 | SearchProvider *KURISearchFilterEngine::webShortcutQuery(const QString &typedString, QString &searchTerm) const |
| 63 | { |
| 64 | const auto getProviderForKey = [this, &searchTerm](const QString &key) { |
| 65 | SearchProvider *provider = nullptr; |
| 66 | // If the key contains a : an assertion in the isKnownProtocol method would fail. This can be |
| 67 | // the case if the delimiter is switched to space, see kiowidgets_space_separator_test |
| 68 | if (!key.isEmpty() && (key.contains(c: QLatin1Char(':')) || !KProtocolInfo::isKnownProtocol(protocol: key, updateCacheIfNotfound: false))) { |
| 69 | provider = m_registry.findByKey(key); |
| 70 | if (provider) { |
| 71 | if (!m_bUseOnlyPreferredWebShortcuts || m_preferredWebShortcuts.contains(str: provider->desktopEntryName())) { |
| 72 | qCDebug(category) << "found provider" << provider->desktopEntryName() << "searchTerm=" << searchTerm; |
| 73 | } else { |
| 74 | provider = nullptr; |
| 75 | } |
| 76 | } |
| 77 | } |
| 78 | return provider; |
| 79 | }; |
| 80 | |
| 81 | SearchProvider *provider = nullptr; |
| 82 | if (m_bWebShortcutsEnabled) { |
| 83 | QString key; |
| 84 | if (typedString.contains(c: QLatin1Char('!'))) { |
| 85 | const static QRegularExpression bangRegex(QStringLiteral("!([^ ]+)" )); |
| 86 | const auto match = bangRegex.match(subject: typedString); |
| 87 | if (match.hasMatch() && match.lastCapturedIndex() == 1) { |
| 88 | key = match.captured(nth: 1); |
| 89 | searchTerm = QString(typedString).remove(re: bangRegex); |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | // If we have found a bang-match it might be unintentionally triggered, because the ! character is contained |
| 94 | // in the query. To avoid not returning any results we check if we can find a provider for the key, if not |
| 95 | // we clear it and try the traditional query syntax, see https://bugs.kde.org/show_bug.cgi?id=437660 |
| 96 | if (!key.isEmpty()) { |
| 97 | provider = getProviderForKey(key); |
| 98 | if (!provider) { |
| 99 | key.clear(); |
| 100 | } |
| 101 | } |
| 102 | if (key.isEmpty()) { |
| 103 | const int pos = typedString.indexOf(ch: QLatin1Char(m_cKeywordDelimiter)); |
| 104 | if (pos > -1) { |
| 105 | key = typedString.left(n: pos).toLower(); // #169801 |
| 106 | searchTerm = typedString.mid(position: pos + 1); |
| 107 | } else if (!typedString.isEmpty() && m_cKeywordDelimiter == ' ') { |
| 108 | key = typedString; |
| 109 | searchTerm = typedString.mid(position: pos + 1); |
| 110 | } |
| 111 | provider = getProviderForKey(key); |
| 112 | } |
| 113 | |
| 114 | qCDebug(category) << "m_cKeywordDelimiter=" << QLatin1Char(m_cKeywordDelimiter) << "key=" << key << "typedString=" << typedString; |
| 115 | } |
| 116 | |
| 117 | return provider; |
| 118 | } |
| 119 | |
| 120 | SearchProvider *KURISearchFilterEngine::autoWebSearchQuery(const QString &typedString, const QString &defaultShortcut) const |
| 121 | { |
| 122 | SearchProvider *provider = nullptr; |
| 123 | const QString defaultSearchProvider = (m_defaultWebShortcut.isEmpty() ? defaultShortcut : m_defaultWebShortcut); |
| 124 | |
| 125 | if (m_bWebShortcutsEnabled && !defaultSearchProvider.isEmpty()) { |
| 126 | // Make sure we ignore supported protocols, e.g. "smb:", "http:" |
| 127 | const int pos = typedString.indexOf(ch: QLatin1Char(':')); |
| 128 | |
| 129 | if (pos == -1 || !KProtocolInfo::isKnownProtocol(protocol: typedString.left(n: pos), updateCacheIfNotfound: false)) { |
| 130 | provider = m_registry.findByDesktopName(desktopName: defaultSearchProvider); |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | return provider; |
| 135 | } |
| 136 | |
| 137 | QByteArray KURISearchFilterEngine::name() const |
| 138 | { |
| 139 | return "kuriikwsfilter" ; |
| 140 | } |
| 141 | |
| 142 | char KURISearchFilterEngine::keywordDelimiter() const |
| 143 | { |
| 144 | return m_cKeywordDelimiter; |
| 145 | } |
| 146 | |
| 147 | QString KURISearchFilterEngine::defaultSearchEngine() const |
| 148 | { |
| 149 | return m_defaultWebShortcut; |
| 150 | } |
| 151 | |
| 152 | QStringList KURISearchFilterEngine::favoriteEngineList() const |
| 153 | { |
| 154 | return m_preferredWebShortcuts; |
| 155 | } |
| 156 | |
| 157 | KURISearchFilterEngine *KURISearchFilterEngine::self() |
| 158 | { |
| 159 | static KURISearchFilterEngine self; |
| 160 | return &self; |
| 161 | } |
| 162 | |
| 163 | QStringList KURISearchFilterEngine::modifySubstitutionMap(SubstMap &map, const QString &query) const |
| 164 | { |
| 165 | // Returns the number of query words |
| 166 | QString userquery = query; |
| 167 | |
| 168 | // Do some pre-encoding, before we can start the work: |
| 169 | { |
| 170 | const static QRegularExpression qsexpr(QStringLiteral("\\\"[^\\\"]*\\\"" )); |
| 171 | // Temporarily substitute spaces in quoted strings (" " -> "%20") |
| 172 | // Needed to split user query into StringList correctly. |
| 173 | int start = 0; |
| 174 | QRegularExpressionMatch match; |
| 175 | while ((match = qsexpr.match(subject: userquery, offset: start)).hasMatch()) { |
| 176 | QString str = match.captured(nth: 0); |
| 177 | str.replace(c: QLatin1Char(' '), after: QLatin1String("%20" )); |
| 178 | userquery.replace(i: match.capturedStart(nth: 0), len: match.capturedLength(nth: 0), after: str); |
| 179 | start = match.capturedStart(nth: 0) + str.size(); // Move after last quote |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | // Split user query between spaces: |
| 184 | QStringList l = userquery.simplified().split(sep: QLatin1Char(' '), behavior: Qt::SkipEmptyParts); |
| 185 | |
| 186 | // Back-substitute quoted strings (%20 -> " "): |
| 187 | userquery.replace(before: QLatin1String("%20" ), after: QLatin1String(" " )); |
| 188 | l.replaceInStrings(QStringLiteral("%20" ), QStringLiteral(" " )); |
| 189 | |
| 190 | qCDebug(category) << "Generating substitution map:\n" ; |
| 191 | // Generate substitution map from user query: |
| 192 | for (int i = 0; i <= l.count(); i++) { |
| 193 | int pos = 0; |
| 194 | QString v; |
| 195 | |
| 196 | // Add whole user query (\{0}) to substitution map: |
| 197 | if (i == 0) { |
| 198 | v = userquery; |
| 199 | } |
| 200 | // Add partial user query items to substitution map: |
| 201 | else { |
| 202 | v = l[i - 1]; |
| 203 | } |
| 204 | |
| 205 | // Insert partial queries (referenced by \1 ... \n) to map: |
| 206 | map.insert(key: QString::number(i), value: v); |
| 207 | |
| 208 | // Insert named references (referenced by \name) to map: |
| 209 | if ((i > 0) && (pos = v.indexOf(ch: QLatin1Char('='))) > 0) { |
| 210 | QString s = v.mid(position: pos + 1); |
| 211 | QString k = v.left(n: pos); |
| 212 | |
| 213 | // Back-substitute references contained in references (e.g. '\refname' substitutes to 'thisquery=\0') |
| 214 | s.replace(before: QLatin1String("%5C" ), after: QLatin1String("\\" )); |
| 215 | map.insert(key: k, value: s); |
| 216 | } |
| 217 | } |
| 218 | |
| 219 | return l; |
| 220 | } |
| 221 | |
| 222 | static QString encodeString(const QString &s, QStringEncoder &codec) |
| 223 | { |
| 224 | // we encode all characters, including the space character BUG: 304276 |
| 225 | QByteArray encoded = QByteArray(codec.encode(str: s)).toPercentEncoding(); |
| 226 | return QString::fromUtf8(ba: encoded); |
| 227 | } |
| 228 | |
| 229 | QString KURISearchFilterEngine::substituteQuery(const QString &url, SubstMap &map, const QString &userquery, QStringEncoder &codec) const |
| 230 | { |
| 231 | QString newurl = url; |
| 232 | QStringList ql = modifySubstitutionMap(map, query: userquery); |
| 233 | const int count = ql.count(); |
| 234 | |
| 235 | // Substitute references (\{ref1,ref2,...}) with values from user query: |
| 236 | { |
| 237 | const static QRegularExpression reflistRe(QStringLiteral("\\\\\\{([^\\}]+)\\}" )); |
| 238 | // Substitute reflists (\{ref1,ref2,...}): |
| 239 | int start = 0; |
| 240 | QRegularExpressionMatch match; |
| 241 | while ((match = reflistRe.match(subject: newurl, offset: start)).hasMatch()) { |
| 242 | bool found = false; |
| 243 | |
| 244 | // bool rest = false; |
| 245 | QString v; |
| 246 | const QString rlstring = match.captured(nth: 1); |
| 247 | |
| 248 | // \{@} gets a special treatment later |
| 249 | if (rlstring == QLatin1String("@" )) { |
| 250 | v = QStringLiteral("\\@" ); |
| 251 | found = true; |
| 252 | } |
| 253 | |
| 254 | // TODO: strip whitespaces around commas |
| 255 | const QStringList refList = rlstring.split(sep: QLatin1Char(','), behavior: Qt::SkipEmptyParts); |
| 256 | |
| 257 | for (const QString &rlitem : refList) { |
| 258 | if (found) { |
| 259 | break; |
| 260 | } |
| 261 | |
| 262 | const static QRegularExpression rangeRe(QStringLiteral("([0-9]*)\\-([0-9]*)" )); |
| 263 | const QRegularExpressionMatch rangeMatch = rangeRe.match(subject: rlitem); |
| 264 | // Substitute a range of keywords |
| 265 | if (rangeMatch.hasMatch()) { |
| 266 | int first = rangeMatch.captured(nth: 1).toInt(); |
| 267 | int last = rangeMatch.captured(nth: 2).toInt(); |
| 268 | |
| 269 | if (first == 0) { |
| 270 | first = 1; |
| 271 | } |
| 272 | |
| 273 | if (last == 0) { |
| 274 | last = count; |
| 275 | } |
| 276 | |
| 277 | for (int i = first; i <= last; i++) { |
| 278 | v += map[QString::number(i)] + QLatin1Char(' '); |
| 279 | // Remove used value from ql (needed for \{@}): |
| 280 | ql[i - 1].clear(); |
| 281 | } |
| 282 | |
| 283 | v = v.trimmed(); |
| 284 | if (!v.isEmpty()) { |
| 285 | found = true; |
| 286 | } |
| 287 | |
| 288 | v = encodeString(s: v, codec); |
| 289 | } else if (rlitem.startsWith(c: QLatin1Char('\"')) && rlitem.endsWith(c: QLatin1Char('\"'))) { |
| 290 | // Use default string from query definition: |
| 291 | found = true; |
| 292 | QString s = rlitem.mid(position: 1, n: rlitem.length() - 2); |
| 293 | v = encodeString(s, codec); |
| 294 | } else if (map.contains(key: rlitem)) { |
| 295 | // Use value from substitution map: |
| 296 | found = true; |
| 297 | v = encodeString(s: map[rlitem], codec); |
| 298 | |
| 299 | // Remove used value from ql (needed for \{@}): |
| 300 | const QChar c = rlitem.at(i: 0); // rlitem can't be empty at this point |
| 301 | if (c == QLatin1Char('0')) { |
| 302 | // It's a numeric reference to '0' |
| 303 | for (QStringList::Iterator it = ql.begin(); it != ql.end(); ++it) { |
| 304 | (*it).clear(); |
| 305 | } |
| 306 | } else if ((c >= QLatin1String("0" )) && (c <= QLatin1String("9" ))) { // krazy:excludeall=doublequote_chars |
| 307 | // It's a numeric reference > '0' |
| 308 | int n = rlitem.toInt(); |
| 309 | ql[n - 1].clear(); |
| 310 | } else { |
| 311 | // It's a alphanumeric reference |
| 312 | QStringList::Iterator it = ql.begin(); |
| 313 | while ((it != ql.end()) && !it->startsWith(s: rlitem + QLatin1Char('='))) { |
| 314 | ++it; |
| 315 | } |
| 316 | if (it != ql.end()) { |
| 317 | it->clear(); |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | // Encode '+', otherwise it would be interpreted as space in the resulting url: |
| 322 | v.replace(c: QLatin1Char('+'), after: QLatin1String("%2B" )); |
| 323 | } else if (rlitem == QLatin1String("@" )) { |
| 324 | v = QStringLiteral("\\@" ); |
| 325 | } |
| 326 | } |
| 327 | |
| 328 | newurl.replace(i: match.capturedStart(nth: 0), len: match.capturedLength(nth: 0), after: v); |
| 329 | start = match.capturedStart(nth: 0) + v.size(); |
| 330 | } |
| 331 | |
| 332 | // Special handling for \{@}; |
| 333 | { |
| 334 | // Generate list of unmatched strings: |
| 335 | QString v = ql.join(sep: QLatin1Char(' ')).simplified(); |
| 336 | v = encodeString(s: v, codec); |
| 337 | |
| 338 | // Substitute \{@} with list of unmatched query strings |
| 339 | newurl.replace(before: QLatin1String("\\@" ), after: v); |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | return newurl; |
| 344 | } |
| 345 | |
| 346 | QUrl KURISearchFilterEngine::formatResult(const QString &url, const QString &cset1, const QString &cset2, const QString &query, bool isMalformed) const |
| 347 | { |
| 348 | SubstMap map; |
| 349 | return formatResult(url, cset1, cset2, query, isMalformed, map); |
| 350 | } |
| 351 | |
| 352 | QUrl KURISearchFilterEngine::formatResult(const QString &url, |
| 353 | const QString &cset1, |
| 354 | const QString &cset2, |
| 355 | const QString &userquery, |
| 356 | bool /* isMalformed */, |
| 357 | SubstMap &map) const |
| 358 | { |
| 359 | // Return nothing if userquery is empty and it contains |
| 360 | // substitution strings... |
| 361 | if (userquery.isEmpty() && url.indexOf(s: QLatin1String("\\{" )) > 0) { |
| 362 | return QUrl(); |
| 363 | } |
| 364 | |
| 365 | // Create a codec for the desired encoding so that we can transcode the user's "url". |
| 366 | QString cseta = cset1; |
| 367 | if (cseta.isEmpty()) { |
| 368 | cseta = QStringLiteral("UTF-8" ); |
| 369 | } |
| 370 | |
| 371 | QStringEncoder csetacodec(cseta.toLatin1().constData()); |
| 372 | if (!csetacodec.isValid()) { |
| 373 | cseta = QStringLiteral("UTF-8" ); |
| 374 | csetacodec = QStringEncoder(QStringEncoder::Utf8); |
| 375 | } |
| 376 | |
| 377 | // Add charset indicator for the query to substitution map: |
| 378 | map.insert(QStringLiteral("ikw_charset" ), value: cseta); |
| 379 | |
| 380 | // Add charset indicator for the fallback query to substitution map: |
| 381 | QString csetb = cset2; |
| 382 | if (csetb.isEmpty()) { |
| 383 | csetb = QStringLiteral("UTF-8" ); |
| 384 | } |
| 385 | map.insert(QStringLiteral("wsc_charset" ), value: csetb); |
| 386 | |
| 387 | QString newurl = substituteQuery(url, map, userquery, codec&: csetacodec); |
| 388 | |
| 389 | return QUrl(newurl, QUrl::StrictMode); |
| 390 | } |
| 391 | |
| 392 | void KURISearchFilterEngine::configure() |
| 393 | { |
| 394 | qCDebug(category) << "Keywords Engine: Loading config..." ; |
| 395 | |
| 396 | // Load the config. |
| 397 | KConfig config(QString::fromUtf8(ba: name()) + QLatin1String("rc" ), KConfig::NoGlobals); |
| 398 | KConfigGroup group = config.group(QStringLiteral("General" )); |
| 399 | |
| 400 | m_cKeywordDelimiter = group.readEntry(key: "KeywordDelimiter" , aDefault: ":" ).at(i: 0).toLatin1(); |
| 401 | m_bWebShortcutsEnabled = group.readEntry(key: "EnableWebShortcuts" , defaultValue: true); |
| 402 | m_defaultWebShortcut = group.readEntry(key: "DefaultWebShortcut" , aDefault: "duckduckgo" ); |
| 403 | m_bUseOnlyPreferredWebShortcuts = group.readEntry(key: "UsePreferredWebShortcutsOnly" , defaultValue: false); |
| 404 | |
| 405 | QStringList defaultPreferredShortcuts; |
| 406 | if (!group.hasKey(key: "PreferredWebShortcuts" )) { |
| 407 | defaultPreferredShortcuts = KURISearchFilterEngine::defaultSearchProviders(); |
| 408 | } |
| 409 | m_preferredWebShortcuts = group.readEntry(key: "PreferredWebShortcuts" , aDefault: defaultPreferredShortcuts); |
| 410 | |
| 411 | // Use either a white space or a : as the keyword delimiter... |
| 412 | if (strchr(s: " :" , c: m_cKeywordDelimiter) == nullptr) { |
| 413 | m_cKeywordDelimiter = ':'; |
| 414 | } |
| 415 | |
| 416 | qCDebug(category) << "Web Shortcuts Enabled: " << m_bWebShortcutsEnabled; |
| 417 | qCDebug(category) << "Default Shortcut: " << m_defaultWebShortcut; |
| 418 | qCDebug(category) << "Keyword Delimiter: " << m_cKeywordDelimiter; |
| 419 | if (m_reloadRegistry) { |
| 420 | m_registry.reload(); |
| 421 | } |
| 422 | } |
| 423 | |
| 424 | SearchProviderRegistry *KURISearchFilterEngine::registry() |
| 425 | { |
| 426 | return &m_registry; |
| 427 | } |
| 428 | |
| 429 | #include "moc_kuriikwsfiltereng_p.cpp" |
| 430 | |