1/*
2 This file is part of the KDE project
3 SPDX-FileCopyrightText: 2002, 2003 Dawit Alemayehu <adawit@kde.org>
4 SPDX-FileCopyrightText: 2000 Yves Arrouye <yves@realnames.com>
5 SPDX-FileCopyrightText: 1999 Simon Hausmann <hausmann@kde.org>
6
7 Advanced web shortcuts:
8 SPDX-FileCopyrightText: 2001 Andreas Hochsteger <e9625392@student.tuwien.ac.at>
9
10 SPDX-License-Identifier: GPL-2.0-or-later
11*/
12
13#include "kuriikwsfiltereng_p.h"
14#include "searchprovider.h"
15
16#include <KConfig>
17#include <KConfigGroup>
18#include <kprotocolinfo.h>
19
20#ifdef WITH_QTDBUS
21#include <QDBusConnection>
22#endif
23
24#include <QLoggingCategory>
25#include <QRegularExpression>
26#include <QStringEncoder>
27
28Q_LOGGING_CATEGORY(category, "kf.kio.urifilters.ikws", QtWarningMsg)
29using namespace KIO;
30
31/*!
32 * IMPORTANT: If you change anything here, make sure kiowidgets-kurifiltertest-{colon,space}-separator
33 * unit tests still pass (they're usually run as part of "make test").
34 */
35
36KURISearchFilterEngine::KURISearchFilterEngine()
37{
38 configure();
39 // Only after initial load, we would want to reparse the files on config changes.
40 // When the registry is constructed, it automatically loads the searchproviders
41 m_reloadRegistry = true;
42
43#ifdef WITH_QTDBUS
44 QDBusConnection::sessionBus()
45 .connect(service: QString(), QStringLiteral("/"), QStringLiteral("org.kde.KUriFilterPlugin"), QStringLiteral("configure"), receiver: this, SLOT(configure()));
46#endif
47}
48
49KURISearchFilterEngine::~KURISearchFilterEngine() = default;
50
51// static
52QStringList KURISearchFilterEngine::defaultSearchProviders()
53{
54 static const QStringList defaultProviders{QStringLiteral("google"),
55 QStringLiteral("youtube"),
56 QStringLiteral("yahoo"),
57 QStringLiteral("wikipedia"),
58 QStringLiteral("wikit")};
59 return defaultProviders;
60}
61
62SearchProvider *KURISearchFilterEngine::webShortcutQuery(const QString &typedString, QString &searchTerm) const
63{
64 const auto getProviderForKey = [this, &searchTerm](const QString &key) {
65 SearchProvider *provider = nullptr;
66 // If the key contains a : an assertion in the isKnownProtocol method would fail. This can be
67 // the case if the delimiter is switched to space, see kiowidgets_space_separator_test
68 if (!key.isEmpty() && (key.contains(c: QLatin1Char(':')) || !KProtocolInfo::isKnownProtocol(protocol: key, updateCacheIfNotfound: false))) {
69 provider = m_registry.findByKey(key);
70 if (provider) {
71 if (!m_bUseOnlyPreferredWebShortcuts || m_preferredWebShortcuts.contains(str: provider->desktopEntryName())) {
72 qCDebug(category) << "found provider" << provider->desktopEntryName() << "searchTerm=" << searchTerm;
73 } else {
74 provider = nullptr;
75 }
76 }
77 }
78 return provider;
79 };
80
81 SearchProvider *provider = nullptr;
82 if (m_bWebShortcutsEnabled) {
83 QString key;
84 if (typedString.contains(c: QLatin1Char('!'))) {
85 const static QRegularExpression bangRegex(QStringLiteral("!([^ ]+)"));
86 const auto match = bangRegex.match(subject: typedString);
87 if (match.hasMatch() && match.lastCapturedIndex() == 1) {
88 key = match.captured(nth: 1);
89 searchTerm = QString(typedString).remove(re: bangRegex);
90 }
91 }
92
93 // If we have found a bang-match it might be unintentionally triggered, because the ! character is contained
94 // in the query. To avoid not returning any results we check if we can find a provider for the key, if not
95 // we clear it and try the traditional query syntax, see https://bugs.kde.org/show_bug.cgi?id=437660
96 if (!key.isEmpty()) {
97 provider = getProviderForKey(key);
98 if (!provider) {
99 key.clear();
100 }
101 }
102 if (key.isEmpty()) {
103 const int pos = typedString.indexOf(ch: QLatin1Char(m_cKeywordDelimiter));
104 if (pos > -1) {
105 key = typedString.left(n: pos).toLower(); // #169801
106 searchTerm = typedString.mid(position: pos + 1);
107 } else if (!typedString.isEmpty() && m_cKeywordDelimiter == ' ') {
108 key = typedString;
109 searchTerm = typedString.mid(position: pos + 1);
110 }
111 provider = getProviderForKey(key);
112 }
113
114 qCDebug(category) << "m_cKeywordDelimiter=" << QLatin1Char(m_cKeywordDelimiter) << "key=" << key << "typedString=" << typedString;
115 }
116
117 return provider;
118}
119
120SearchProvider *KURISearchFilterEngine::autoWebSearchQuery(const QString &typedString, const QString &defaultShortcut) const
121{
122 SearchProvider *provider = nullptr;
123 const QString defaultSearchProvider = (m_defaultWebShortcut.isEmpty() ? defaultShortcut : m_defaultWebShortcut);
124
125 if (m_bWebShortcutsEnabled && !defaultSearchProvider.isEmpty()) {
126 // Make sure we ignore supported protocols, e.g. "smb:", "http:"
127 const int pos = typedString.indexOf(ch: QLatin1Char(':'));
128
129 if (pos == -1 || !KProtocolInfo::isKnownProtocol(protocol: typedString.left(n: pos), updateCacheIfNotfound: false)) {
130 provider = m_registry.findByDesktopName(desktopName: defaultSearchProvider);
131 }
132 }
133
134 return provider;
135}
136
137QByteArray KURISearchFilterEngine::name() const
138{
139 return "kuriikwsfilter";
140}
141
142char KURISearchFilterEngine::keywordDelimiter() const
143{
144 return m_cKeywordDelimiter;
145}
146
147QString KURISearchFilterEngine::defaultSearchEngine() const
148{
149 return m_defaultWebShortcut;
150}
151
152QStringList KURISearchFilterEngine::favoriteEngineList() const
153{
154 return m_preferredWebShortcuts;
155}
156
157KURISearchFilterEngine *KURISearchFilterEngine::self()
158{
159 static KURISearchFilterEngine self;
160 return &self;
161}
162
163QStringList KURISearchFilterEngine::modifySubstitutionMap(SubstMap &map, const QString &query) const
164{
165 // Returns the number of query words
166 QString userquery = query;
167
168 // Do some pre-encoding, before we can start the work:
169 {
170 const static QRegularExpression qsexpr(QStringLiteral("\\\"[^\\\"]*\\\""));
171 // Temporarily substitute spaces in quoted strings (" " -> "%20")
172 // Needed to split user query into StringList correctly.
173 int start = 0;
174 QRegularExpressionMatch match;
175 while ((match = qsexpr.match(subject: userquery, offset: start)).hasMatch()) {
176 QString str = match.captured(nth: 0);
177 str.replace(c: QLatin1Char(' '), after: QLatin1String("%20"));
178 userquery.replace(i: match.capturedStart(nth: 0), len: match.capturedLength(nth: 0), after: str);
179 start = match.capturedStart(nth: 0) + str.size(); // Move after last quote
180 }
181 }
182
183 // Split user query between spaces:
184 QStringList l = userquery.simplified().split(sep: QLatin1Char(' '), behavior: Qt::SkipEmptyParts);
185
186 // Back-substitute quoted strings (%20 -> " "):
187 userquery.replace(before: QLatin1String("%20"), after: QLatin1String(" "));
188 l.replaceInStrings(QStringLiteral("%20"), QStringLiteral(" "));
189
190 qCDebug(category) << "Generating substitution map:\n";
191 // Generate substitution map from user query:
192 for (int i = 0; i <= l.count(); i++) {
193 int pos = 0;
194 QString v;
195
196 // Add whole user query (\{0}) to substitution map:
197 if (i == 0) {
198 v = userquery;
199 }
200 // Add partial user query items to substitution map:
201 else {
202 v = l[i - 1];
203 }
204
205 // Insert partial queries (referenced by \1 ... \n) to map:
206 map.insert(key: QString::number(i), value: v);
207
208 // Insert named references (referenced by \name) to map:
209 if ((i > 0) && (pos = v.indexOf(ch: QLatin1Char('='))) > 0) {
210 QString s = v.mid(position: pos + 1);
211 QString k = v.left(n: pos);
212
213 // Back-substitute references contained in references (e.g. '\refname' substitutes to 'thisquery=\0')
214 s.replace(before: QLatin1String("%5C"), after: QLatin1String("\\"));
215 map.insert(key: k, value: s);
216 }
217 }
218
219 return l;
220}
221
222static QString encodeString(const QString &s, QStringEncoder &codec)
223{
224 // we encode all characters, including the space character BUG: 304276
225 QByteArray encoded = QByteArray(codec.encode(str: s)).toPercentEncoding();
226 return QString::fromUtf8(ba: encoded);
227}
228
229QString KURISearchFilterEngine::substituteQuery(const QString &url, SubstMap &map, const QString &userquery, QStringEncoder &codec) const
230{
231 QString newurl = url;
232 QStringList ql = modifySubstitutionMap(map, query: userquery);
233 const int count = ql.count();
234
235 // Substitute references (\{ref1,ref2,...}) with values from user query:
236 {
237 const static QRegularExpression reflistRe(QStringLiteral("\\\\\\{([^\\}]+)\\}"));
238 // Substitute reflists (\{ref1,ref2,...}):
239 int start = 0;
240 QRegularExpressionMatch match;
241 while ((match = reflistRe.match(subject: newurl, offset: start)).hasMatch()) {
242 bool found = false;
243
244 // bool rest = false;
245 QString v;
246 const QString rlstring = match.captured(nth: 1);
247
248 // \{@} gets a special treatment later
249 if (rlstring == QLatin1String("@")) {
250 v = QStringLiteral("\\@");
251 found = true;
252 }
253
254 // TODO: strip whitespaces around commas
255 const QStringList refList = rlstring.split(sep: QLatin1Char(','), behavior: Qt::SkipEmptyParts);
256
257 for (const QString &rlitem : refList) {
258 if (found) {
259 break;
260 }
261
262 const static QRegularExpression rangeRe(QStringLiteral("([0-9]*)\\-([0-9]*)"));
263 const QRegularExpressionMatch rangeMatch = rangeRe.match(subject: rlitem);
264 // Substitute a range of keywords
265 if (rangeMatch.hasMatch()) {
266 int first = rangeMatch.captured(nth: 1).toInt();
267 int last = rangeMatch.captured(nth: 2).toInt();
268
269 if (first == 0) {
270 first = 1;
271 }
272
273 if (last == 0) {
274 last = count;
275 }
276
277 for (int i = first; i <= last; i++) {
278 v += map[QString::number(i)] + QLatin1Char(' ');
279 // Remove used value from ql (needed for \{@}):
280 ql[i - 1].clear();
281 }
282
283 v = v.trimmed();
284 if (!v.isEmpty()) {
285 found = true;
286 }
287
288 v = encodeString(s: v, codec);
289 } else if (rlitem.startsWith(c: QLatin1Char('\"')) && rlitem.endsWith(c: QLatin1Char('\"'))) {
290 // Use default string from query definition:
291 found = true;
292 QString s = rlitem.mid(position: 1, n: rlitem.length() - 2);
293 v = encodeString(s, codec);
294 } else if (map.contains(key: rlitem)) {
295 // Use value from substitution map:
296 found = true;
297 v = encodeString(s: map[rlitem], codec);
298
299 // Remove used value from ql (needed for \{@}):
300 const QChar c = rlitem.at(i: 0); // rlitem can't be empty at this point
301 if (c == QLatin1Char('0')) {
302 // It's a numeric reference to '0'
303 for (QStringList::Iterator it = ql.begin(); it != ql.end(); ++it) {
304 (*it).clear();
305 }
306 } else if ((c >= QLatin1String("0")) && (c <= QLatin1String("9"))) { // krazy:excludeall=doublequote_chars
307 // It's a numeric reference > '0'
308 int n = rlitem.toInt();
309 ql[n - 1].clear();
310 } else {
311 // It's a alphanumeric reference
312 QStringList::Iterator it = ql.begin();
313 while ((it != ql.end()) && !it->startsWith(s: rlitem + QLatin1Char('='))) {
314 ++it;
315 }
316 if (it != ql.end()) {
317 it->clear();
318 }
319 }
320
321 // Encode '+', otherwise it would be interpreted as space in the resulting url:
322 v.replace(c: QLatin1Char('+'), after: QLatin1String("%2B"));
323 } else if (rlitem == QLatin1String("@")) {
324 v = QStringLiteral("\\@");
325 }
326 }
327
328 newurl.replace(i: match.capturedStart(nth: 0), len: match.capturedLength(nth: 0), after: v);
329 start = match.capturedStart(nth: 0) + v.size();
330 }
331
332 // Special handling for \{@};
333 {
334 // Generate list of unmatched strings:
335 QString v = ql.join(sep: QLatin1Char(' ')).simplified();
336 v = encodeString(s: v, codec);
337
338 // Substitute \{@} with list of unmatched query strings
339 newurl.replace(before: QLatin1String("\\@"), after: v);
340 }
341 }
342
343 return newurl;
344}
345
346QUrl KURISearchFilterEngine::formatResult(const QString &url, const QString &cset1, const QString &cset2, const QString &query, bool isMalformed) const
347{
348 SubstMap map;
349 return formatResult(url, cset1, cset2, query, isMalformed, map);
350}
351
352QUrl KURISearchFilterEngine::formatResult(const QString &url,
353 const QString &cset1,
354 const QString &cset2,
355 const QString &userquery,
356 bool /* isMalformed */,
357 SubstMap &map) const
358{
359 // Return nothing if userquery is empty and it contains
360 // substitution strings...
361 if (userquery.isEmpty() && url.indexOf(s: QLatin1String("\\{")) > 0) {
362 return QUrl();
363 }
364
365 // Create a codec for the desired encoding so that we can transcode the user's "url".
366 QString cseta = cset1;
367 if (cseta.isEmpty()) {
368 cseta = QStringLiteral("UTF-8");
369 }
370
371 QStringEncoder csetacodec(cseta.toLatin1().constData());
372 if (!csetacodec.isValid()) {
373 cseta = QStringLiteral("UTF-8");
374 csetacodec = QStringEncoder(QStringEncoder::Utf8);
375 }
376
377 // Add charset indicator for the query to substitution map:
378 map.insert(QStringLiteral("ikw_charset"), value: cseta);
379
380 // Add charset indicator for the fallback query to substitution map:
381 QString csetb = cset2;
382 if (csetb.isEmpty()) {
383 csetb = QStringLiteral("UTF-8");
384 }
385 map.insert(QStringLiteral("wsc_charset"), value: csetb);
386
387 QString newurl = substituteQuery(url, map, userquery, codec&: csetacodec);
388
389 return QUrl(newurl, QUrl::StrictMode);
390}
391
392void KURISearchFilterEngine::configure()
393{
394 qCDebug(category) << "Keywords Engine: Loading config...";
395
396 // Load the config.
397 KConfig config(QString::fromUtf8(ba: name()) + QLatin1String("rc"), KConfig::NoGlobals);
398 KConfigGroup group = config.group(QStringLiteral("General"));
399
400 m_cKeywordDelimiter = group.readEntry(key: "KeywordDelimiter", aDefault: ":").at(i: 0).toLatin1();
401 m_bWebShortcutsEnabled = group.readEntry(key: "EnableWebShortcuts", defaultValue: true);
402 m_defaultWebShortcut = group.readEntry(key: "DefaultWebShortcut", aDefault: "duckduckgo");
403 m_bUseOnlyPreferredWebShortcuts = group.readEntry(key: "UsePreferredWebShortcutsOnly", defaultValue: false);
404
405 QStringList defaultPreferredShortcuts;
406 if (!group.hasKey(key: "PreferredWebShortcuts")) {
407 defaultPreferredShortcuts = KURISearchFilterEngine::defaultSearchProviders();
408 }
409 m_preferredWebShortcuts = group.readEntry(key: "PreferredWebShortcuts", aDefault: defaultPreferredShortcuts);
410
411 // Use either a white space or a : as the keyword delimiter...
412 if (strchr(s: " :", c: m_cKeywordDelimiter) == nullptr) {
413 m_cKeywordDelimiter = ':';
414 }
415
416 qCDebug(category) << "Web Shortcuts Enabled: " << m_bWebShortcutsEnabled;
417 qCDebug(category) << "Default Shortcut: " << m_defaultWebShortcut;
418 qCDebug(category) << "Keyword Delimiter: " << m_cKeywordDelimiter;
419 if (m_reloadRegistry) {
420 m_registry.reload();
421 }
422}
423
424SearchProviderRegistry *KURISearchFilterEngine::registry()
425{
426 return &m_registry;
427}
428
429#include "moc_kuriikwsfiltereng_p.cpp"
430

source code of kio/src/urifilters/ikws/kuriikwsfiltereng.cpp