1/*
2 This file is part of the KDE project
3 SPDX-FileCopyrightText: 2002, 2003 Dawit Alemayehu <adawit@kde.org>
4 SPDX-FileCopyrightText: 2000 Yves Arrouye <yves@realnames.com>
5 SPDX-FileCopyrightText: 1999 Simon Hausmann <hausmann@kde.org>
6
7 Advanced web shortcuts:
8 SPDX-FileCopyrightText: 2001 Andreas Hochsteger <e9625392@student.tuwien.ac.at>
9
10 SPDX-License-Identifier: GPL-2.0-or-later
11*/
12
13#include "kuriikwsfiltereng_p.h"
14#include "searchprovider.h"
15
16#include <KConfig>
17#include <KConfigGroup>
18#include <kprotocolinfo.h>
19
20#include <QDBusConnection>
21#include <QLoggingCategory>
22#include <QRegularExpression>
23#include <QStringEncoder>
24
25Q_LOGGING_CATEGORY(category, "kf.kio.urifilters.ikws", QtWarningMsg)
26using namespace KIO;
27
28/**
29 * IMPORTANT: If you change anything here, make sure kiowidgets-kurifiltertest-{colon,space}-separator
30 * unit tests still pass (they're usually run as part of "make test").
31 */
32
33KURISearchFilterEngine::KURISearchFilterEngine()
34{
35 configure();
36 // Only after initial load, we would want to reparse the files on config changes.
37 // When the registry is constructed, it automatically loads the searchproviders
38 m_reloadRegistry = true;
39 QDBusConnection::sessionBus()
40 .connect(service: QString(), QStringLiteral("/"), QStringLiteral("org.kde.KUriFilterPlugin"), QStringLiteral("configure"), receiver: this, SLOT(configure()));
41}
42
43KURISearchFilterEngine::~KURISearchFilterEngine() = default;
44
45// static
46QStringList KURISearchFilterEngine::defaultSearchProviders()
47{
48 static const QStringList defaultProviders{QStringLiteral("google"),
49 QStringLiteral("youtube"),
50 QStringLiteral("yahoo"),
51 QStringLiteral("wikipedia"),
52 QStringLiteral("wikit")};
53 return defaultProviders;
54}
55
56SearchProvider *KURISearchFilterEngine::webShortcutQuery(const QString &typedString, QString &searchTerm) const
57{
58 const auto getProviderForKey = [this, &searchTerm](const QString &key) {
59 SearchProvider *provider = nullptr;
60 // If the key contains a : an assertion in the isKnownProtocol method would fail. This can be
61 // the case if the delimiter is switched to space, see kiowidgets_space_separator_test
62 if (!key.isEmpty() && (key.contains(c: QLatin1Char(':')) || !KProtocolInfo::isKnownProtocol(protocol: key, updateCacheIfNotfound: false))) {
63 provider = m_registry.findByKey(key);
64 if (provider) {
65 if (!m_bUseOnlyPreferredWebShortcuts || m_preferredWebShortcuts.contains(str: provider->desktopEntryName())) {
66 qCDebug(category) << "found provider" << provider->desktopEntryName() << "searchTerm=" << searchTerm;
67 } else {
68 provider = nullptr;
69 }
70 }
71 }
72 return provider;
73 };
74
75 SearchProvider *provider = nullptr;
76 if (m_bWebShortcutsEnabled) {
77 QString key;
78 if (typedString.contains(c: QLatin1Char('!'))) {
79 const static QRegularExpression bangRegex(QStringLiteral("!([^ ]+)"));
80 const auto match = bangRegex.match(subject: typedString);
81 if (match.hasMatch() && match.lastCapturedIndex() == 1) {
82 key = match.captured(nth: 1);
83 searchTerm = QString(typedString).remove(re: bangRegex);
84 }
85 }
86
87 // If we have found a bang-match it might be unintentionally triggered, because the ! character is contained
88 // in the query. To avoid not returning any results we check if we can find a provider for the key, if not
89 // we clear it and try the traditional query syntax, see https://bugs.kde.org/show_bug.cgi?id=437660
90 if (!key.isEmpty()) {
91 provider = getProviderForKey(key);
92 if (!provider) {
93 key.clear();
94 }
95 }
96 if (key.isEmpty()) {
97 const int pos = typedString.indexOf(c: QLatin1Char(m_cKeywordDelimiter));
98 if (pos > -1) {
99 key = typedString.left(n: pos).toLower(); // #169801
100 searchTerm = typedString.mid(position: pos + 1);
101 } else if (!typedString.isEmpty() && m_cKeywordDelimiter == ' ') {
102 key = typedString;
103 searchTerm = typedString.mid(position: pos + 1);
104 }
105 provider = getProviderForKey(key);
106 }
107
108 qCDebug(category) << "m_cKeywordDelimiter=" << QLatin1Char(m_cKeywordDelimiter) << "key=" << key << "typedString=" << typedString;
109 }
110
111 return provider;
112}
113
114SearchProvider *KURISearchFilterEngine::autoWebSearchQuery(const QString &typedString, const QString &defaultShortcut) const
115{
116 SearchProvider *provider = nullptr;
117 const QString defaultSearchProvider = (m_defaultWebShortcut.isEmpty() ? defaultShortcut : m_defaultWebShortcut);
118
119 if (m_bWebShortcutsEnabled && !defaultSearchProvider.isEmpty()) {
120 // Make sure we ignore supported protocols, e.g. "smb:", "http:"
121 const int pos = typedString.indexOf(c: QLatin1Char(':'));
122
123 if (pos == -1 || !KProtocolInfo::isKnownProtocol(protocol: typedString.left(n: pos), updateCacheIfNotfound: false)) {
124 provider = m_registry.findByDesktopName(desktopName: defaultSearchProvider);
125 }
126 }
127
128 return provider;
129}
130
131QByteArray KURISearchFilterEngine::name() const
132{
133 return "kuriikwsfilter";
134}
135
136char KURISearchFilterEngine::keywordDelimiter() const
137{
138 return m_cKeywordDelimiter;
139}
140
141QString KURISearchFilterEngine::defaultSearchEngine() const
142{
143 return m_defaultWebShortcut;
144}
145
146QStringList KURISearchFilterEngine::favoriteEngineList() const
147{
148 return m_preferredWebShortcuts;
149}
150
151KURISearchFilterEngine *KURISearchFilterEngine::self()
152{
153 static KURISearchFilterEngine self;
154 return &self;
155}
156
157QStringList KURISearchFilterEngine::modifySubstitutionMap(SubstMap &map, const QString &query) const
158{
159 // Returns the number of query words
160 QString userquery = query;
161
162 // Do some pre-encoding, before we can start the work:
163 {
164 const static QRegularExpression qsexpr(QStringLiteral("\\\"[^\\\"]*\\\""));
165 // Temporarily substitute spaces in quoted strings (" " -> "%20")
166 // Needed to split user query into StringList correctly.
167 int start = 0;
168 QRegularExpressionMatch match;
169 while ((match = qsexpr.match(subject: userquery, offset: start)).hasMatch()) {
170 QString str = match.captured(nth: 0);
171 str.replace(c: QLatin1Char(' '), after: QLatin1String("%20"));
172 userquery.replace(i: match.capturedStart(nth: 0), len: match.capturedLength(nth: 0), after: str);
173 start = match.capturedStart(nth: 0) + str.size(); // Move after last quote
174 }
175 }
176
177 // Split user query between spaces:
178 QStringList l = userquery.simplified().split(sep: QLatin1Char(' '), behavior: Qt::SkipEmptyParts);
179
180 // Back-substitute quoted strings (%20 -> " "):
181 userquery.replace(before: QLatin1String("%20"), after: QLatin1String(" "));
182 l.replaceInStrings(QStringLiteral("%20"), QStringLiteral(" "));
183
184 qCDebug(category) << "Generating substitution map:\n";
185 // Generate substitution map from user query:
186 for (int i = 0; i <= l.count(); i++) {
187 int pos = 0;
188 QString v;
189
190 // Add whole user query (\{0}) to substitution map:
191 if (i == 0) {
192 v = userquery;
193 }
194 // Add partial user query items to substitution map:
195 else {
196 v = l[i - 1];
197 }
198
199 // Insert partial queries (referenced by \1 ... \n) to map:
200 map.insert(key: QString::number(i), value: v);
201
202 // Insert named references (referenced by \name) to map:
203 if ((i > 0) && (pos = v.indexOf(c: QLatin1Char('='))) > 0) {
204 QString s = v.mid(position: pos + 1);
205 QString k = v.left(n: pos);
206
207 // Back-substitute references contained in references (e.g. '\refname' substitutes to 'thisquery=\0')
208 s.replace(before: QLatin1String("%5C"), after: QLatin1String("\\"));
209 map.insert(key: k, value: s);
210 }
211 }
212
213 return l;
214}
215
216static QString encodeString(const QString &s, QStringEncoder &codec)
217{
218 // we encode all characters, including the space character BUG: 304276
219 QByteArray encoded = QByteArray(codec.encode(str: s)).toPercentEncoding();
220 return QString::fromUtf8(ba: encoded);
221}
222
223QString KURISearchFilterEngine::substituteQuery(const QString &url, SubstMap &map, const QString &userquery, QStringEncoder &codec) const
224{
225 QString newurl = url;
226 QStringList ql = modifySubstitutionMap(map, query: userquery);
227 const int count = ql.count();
228
229 // Substitute references (\{ref1,ref2,...}) with values from user query:
230 {
231 const static QRegularExpression reflistRe(QStringLiteral("\\\\\\{([^\\}]+)\\}"));
232 // Substitute reflists (\{ref1,ref2,...}):
233 int start = 0;
234 QRegularExpressionMatch match;
235 while ((match = reflistRe.match(subject: newurl, offset: start)).hasMatch()) {
236 bool found = false;
237
238 // bool rest = false;
239 QString v;
240 const QString rlstring = match.captured(nth: 1);
241
242 // \{@} gets a special treatment later
243 if (rlstring == QLatin1String("@")) {
244 v = QStringLiteral("\\@");
245 found = true;
246 }
247
248 // TODO: strip whitespaces around commas
249 const QStringList refList = rlstring.split(sep: QLatin1Char(','), behavior: Qt::SkipEmptyParts);
250
251 for (const QString &rlitem : refList) {
252 if (found) {
253 break;
254 }
255
256 const static QRegularExpression rangeRe(QStringLiteral("([0-9]*)\\-([0-9]*)"));
257 const QRegularExpressionMatch rangeMatch = rangeRe.match(subject: rlitem);
258 // Substitute a range of keywords
259 if (rangeMatch.hasMatch()) {
260 int first = rangeMatch.captured(nth: 1).toInt();
261 int last = rangeMatch.captured(nth: 2).toInt();
262
263 if (first == 0) {
264 first = 1;
265 }
266
267 if (last == 0) {
268 last = count;
269 }
270
271 for (int i = first; i <= last; i++) {
272 v += map[QString::number(i)] + QLatin1Char(' ');
273 // Remove used value from ql (needed for \{@}):
274 ql[i - 1].clear();
275 }
276
277 v = v.trimmed();
278 if (!v.isEmpty()) {
279 found = true;
280 }
281
282 v = encodeString(s: v, codec);
283 } else if (rlitem.startsWith(c: QLatin1Char('\"')) && rlitem.endsWith(c: QLatin1Char('\"'))) {
284 // Use default string from query definition:
285 found = true;
286 QString s = rlitem.mid(position: 1, n: rlitem.length() - 2);
287 v = encodeString(s, codec);
288 } else if (map.contains(key: rlitem)) {
289 // Use value from substitution map:
290 found = true;
291 v = encodeString(s: map[rlitem], codec);
292
293 // Remove used value from ql (needed for \{@}):
294 const QChar c = rlitem.at(i: 0); // rlitem can't be empty at this point
295 if (c == QLatin1Char('0')) {
296 // It's a numeric reference to '0'
297 for (QStringList::Iterator it = ql.begin(); it != ql.end(); ++it) {
298 (*it).clear();
299 }
300 } else if ((c >= QLatin1String("0")) && (c <= QLatin1String("9"))) { // krazy:excludeall=doublequote_chars
301 // It's a numeric reference > '0'
302 int n = rlitem.toInt();
303 ql[n - 1].clear();
304 } else {
305 // It's a alphanumeric reference
306 QStringList::Iterator it = ql.begin();
307 while ((it != ql.end()) && !it->startsWith(s: rlitem + QLatin1Char('='))) {
308 ++it;
309 }
310 if (it != ql.end()) {
311 it->clear();
312 }
313 }
314
315 // Encode '+', otherwise it would be interpreted as space in the resulting url:
316 v.replace(c: QLatin1Char('+'), after: QLatin1String("%2B"));
317 } else if (rlitem == QLatin1String("@")) {
318 v = QStringLiteral("\\@");
319 }
320 }
321
322 newurl.replace(i: match.capturedStart(nth: 0), len: match.capturedLength(nth: 0), after: v);
323 start = match.capturedStart(nth: 0) + v.size();
324 }
325
326 // Special handling for \{@};
327 {
328 // Generate list of unmatched strings:
329 QString v = ql.join(sep: QLatin1Char(' ')).simplified();
330 v = encodeString(s: v, codec);
331
332 // Substitute \{@} with list of unmatched query strings
333 newurl.replace(before: QLatin1String("\\@"), after: v);
334 }
335 }
336
337 return newurl;
338}
339
340QUrl KURISearchFilterEngine::formatResult(const QString &url, const QString &cset1, const QString &cset2, const QString &query, bool isMalformed) const
341{
342 SubstMap map;
343 return formatResult(url, cset1, cset2, query, isMalformed, map);
344}
345
346QUrl KURISearchFilterEngine::formatResult(const QString &url,
347 const QString &cset1,
348 const QString &cset2,
349 const QString &userquery,
350 bool /* isMalformed */,
351 SubstMap &map) const
352{
353 // Return nothing if userquery is empty and it contains
354 // substitution strings...
355 if (userquery.isEmpty() && url.indexOf(s: QLatin1String("\\{")) > 0) {
356 return QUrl();
357 }
358
359 // Create a codec for the desired encoding so that we can transcode the user's "url".
360 QString cseta = cset1;
361 if (cseta.isEmpty()) {
362 cseta = QStringLiteral("UTF-8");
363 }
364
365 QStringEncoder csetacodec(cseta.toLatin1().constData());
366 if (!csetacodec.isValid()) {
367 cseta = QStringLiteral("UTF-8");
368 csetacodec = QStringEncoder(QStringEncoder::Utf8);
369 }
370
371 // Add charset indicator for the query to substitution map:
372 map.insert(QStringLiteral("ikw_charset"), value: cseta);
373
374 // Add charset indicator for the fallback query to substitution map:
375 QString csetb = cset2;
376 if (csetb.isEmpty()) {
377 csetb = QStringLiteral("UTF-8");
378 }
379 map.insert(QStringLiteral("wsc_charset"), value: csetb);
380
381 QString newurl = substituteQuery(url, map, userquery, codec&: csetacodec);
382
383 return QUrl(newurl, QUrl::StrictMode);
384}
385
386void KURISearchFilterEngine::configure()
387{
388 qCDebug(category) << "Keywords Engine: Loading config...";
389
390 // Load the config.
391 KConfig config(QString::fromUtf8(ba: name()) + QLatin1String("rc"), KConfig::NoGlobals);
392 KConfigGroup group = config.group(QStringLiteral("General"));
393
394 m_cKeywordDelimiter = group.readEntry(key: "KeywordDelimiter", aDefault: ":").at(i: 0).toLatin1();
395 m_bWebShortcutsEnabled = group.readEntry(key: "EnableWebShortcuts", defaultValue: true);
396 m_defaultWebShortcut = group.readEntry(key: "DefaultWebShortcut", aDefault: "duckduckgo");
397 m_bUseOnlyPreferredWebShortcuts = group.readEntry(key: "UsePreferredWebShortcutsOnly", defaultValue: false);
398
399 QStringList defaultPreferredShortcuts;
400 if (!group.hasKey(key: "PreferredWebShortcuts")) {
401 defaultPreferredShortcuts = KURISearchFilterEngine::defaultSearchProviders();
402 }
403 m_preferredWebShortcuts = group.readEntry(key: "PreferredWebShortcuts", aDefault: defaultPreferredShortcuts);
404
405 // Use either a white space or a : as the keyword delimiter...
406 if (strchr(s: " :", c: m_cKeywordDelimiter) == nullptr) {
407 m_cKeywordDelimiter = ':';
408 }
409
410 qCDebug(category) << "Web Shortcuts Enabled: " << m_bWebShortcutsEnabled;
411 qCDebug(category) << "Default Shortcut: " << m_defaultWebShortcut;
412 qCDebug(category) << "Keyword Delimiter: " << m_cKeywordDelimiter;
413 if (m_reloadRegistry) {
414 m_registry.reload();
415 }
416}
417
418SearchProviderRegistry *KURISearchFilterEngine::registry()
419{
420 return &m_registry;
421}
422
423#include "moc_kuriikwsfiltereng_p.cpp"
424

source code of kio/src/urifilters/ikws/kuriikwsfiltereng.cpp