1 | /* |
2 | This file is part of the KDE project |
3 | SPDX-FileCopyrightText: 2002, 2003 Dawit Alemayehu <adawit@kde.org> |
4 | SPDX-FileCopyrightText: 2000 Yves Arrouye <yves@realnames.com> |
5 | SPDX-FileCopyrightText: 1999 Simon Hausmann <hausmann@kde.org> |
6 | |
7 | Advanced web shortcuts: |
8 | SPDX-FileCopyrightText: 2001 Andreas Hochsteger <e9625392@student.tuwien.ac.at> |
9 | |
10 | SPDX-License-Identifier: GPL-2.0-or-later |
11 | */ |
12 | |
13 | #include "kuriikwsfiltereng_p.h" |
14 | #include "searchprovider.h" |
15 | |
16 | #include <KConfig> |
17 | #include <KConfigGroup> |
18 | #include <kprotocolinfo.h> |
19 | |
20 | #ifdef WITH_QTDBUS |
21 | #include <QDBusConnection> |
22 | #endif |
23 | |
24 | #include <QLoggingCategory> |
25 | #include <QRegularExpression> |
26 | #include <QStringEncoder> |
27 | |
28 | Q_LOGGING_CATEGORY(category, "kf.kio.urifilters.ikws" , QtWarningMsg) |
29 | using namespace KIO; |
30 | |
31 | /*! |
32 | * IMPORTANT: If you change anything here, make sure kiowidgets-kurifiltertest-{colon,space}-separator |
33 | * unit tests still pass (they're usually run as part of "make test"). |
34 | */ |
35 | |
36 | KURISearchFilterEngine::KURISearchFilterEngine() |
37 | { |
38 | configure(); |
39 | // Only after initial load, we would want to reparse the files on config changes. |
40 | // When the registry is constructed, it automatically loads the searchproviders |
41 | m_reloadRegistry = true; |
42 | |
43 | #ifdef WITH_QTDBUS |
44 | QDBusConnection::sessionBus() |
45 | .connect(service: QString(), QStringLiteral("/" ), QStringLiteral("org.kde.KUriFilterPlugin" ), QStringLiteral("configure" ), receiver: this, SLOT(configure())); |
46 | #endif |
47 | } |
48 | |
49 | KURISearchFilterEngine::~KURISearchFilterEngine() = default; |
50 | |
51 | // static |
52 | QStringList KURISearchFilterEngine::defaultSearchProviders() |
53 | { |
54 | static const QStringList defaultProviders{QStringLiteral("google" ), |
55 | QStringLiteral("youtube" ), |
56 | QStringLiteral("yahoo" ), |
57 | QStringLiteral("wikipedia" ), |
58 | QStringLiteral("wikit" )}; |
59 | return defaultProviders; |
60 | } |
61 | |
62 | SearchProvider *KURISearchFilterEngine::webShortcutQuery(const QString &typedString, QString &searchTerm) const |
63 | { |
64 | const auto getProviderForKey = [this, &searchTerm](const QString &key) { |
65 | SearchProvider *provider = nullptr; |
66 | // If the key contains a : an assertion in the isKnownProtocol method would fail. This can be |
67 | // the case if the delimiter is switched to space, see kiowidgets_space_separator_test |
68 | if (!key.isEmpty() && (key.contains(c: QLatin1Char(':')) || !KProtocolInfo::isKnownProtocol(protocol: key, updateCacheIfNotfound: false))) { |
69 | provider = m_registry.findByKey(key); |
70 | if (provider) { |
71 | if (!m_bUseOnlyPreferredWebShortcuts || m_preferredWebShortcuts.contains(str: provider->desktopEntryName())) { |
72 | qCDebug(category) << "found provider" << provider->desktopEntryName() << "searchTerm=" << searchTerm; |
73 | } else { |
74 | provider = nullptr; |
75 | } |
76 | } |
77 | } |
78 | return provider; |
79 | }; |
80 | |
81 | SearchProvider *provider = nullptr; |
82 | if (m_bWebShortcutsEnabled) { |
83 | QString key; |
84 | if (typedString.contains(c: QLatin1Char('!'))) { |
85 | const static QRegularExpression bangRegex(QStringLiteral("!([^ ]+)" )); |
86 | const auto match = bangRegex.match(subject: typedString); |
87 | if (match.hasMatch() && match.lastCapturedIndex() == 1) { |
88 | key = match.captured(nth: 1); |
89 | searchTerm = QString(typedString).remove(re: bangRegex); |
90 | } |
91 | } |
92 | |
93 | // If we have found a bang-match it might be unintentionally triggered, because the ! character is contained |
94 | // in the query. To avoid not returning any results we check if we can find a provider for the key, if not |
95 | // we clear it and try the traditional query syntax, see https://bugs.kde.org/show_bug.cgi?id=437660 |
96 | if (!key.isEmpty()) { |
97 | provider = getProviderForKey(key); |
98 | if (!provider) { |
99 | key.clear(); |
100 | } |
101 | } |
102 | if (key.isEmpty()) { |
103 | const int pos = typedString.indexOf(ch: QLatin1Char(m_cKeywordDelimiter)); |
104 | if (pos > -1) { |
105 | key = typedString.left(n: pos).toLower(); // #169801 |
106 | searchTerm = typedString.mid(position: pos + 1); |
107 | } else if (!typedString.isEmpty() && m_cKeywordDelimiter == ' ') { |
108 | key = typedString; |
109 | searchTerm = typedString.mid(position: pos + 1); |
110 | } |
111 | provider = getProviderForKey(key); |
112 | } |
113 | |
114 | qCDebug(category) << "m_cKeywordDelimiter=" << QLatin1Char(m_cKeywordDelimiter) << "key=" << key << "typedString=" << typedString; |
115 | } |
116 | |
117 | return provider; |
118 | } |
119 | |
120 | SearchProvider *KURISearchFilterEngine::autoWebSearchQuery(const QString &typedString, const QString &defaultShortcut) const |
121 | { |
122 | SearchProvider *provider = nullptr; |
123 | const QString defaultSearchProvider = (m_defaultWebShortcut.isEmpty() ? defaultShortcut : m_defaultWebShortcut); |
124 | |
125 | if (m_bWebShortcutsEnabled && !defaultSearchProvider.isEmpty()) { |
126 | // Make sure we ignore supported protocols, e.g. "smb:", "http:" |
127 | const int pos = typedString.indexOf(ch: QLatin1Char(':')); |
128 | |
129 | if (pos == -1 || !KProtocolInfo::isKnownProtocol(protocol: typedString.left(n: pos), updateCacheIfNotfound: false)) { |
130 | provider = m_registry.findByDesktopName(desktopName: defaultSearchProvider); |
131 | } |
132 | } |
133 | |
134 | return provider; |
135 | } |
136 | |
137 | QByteArray KURISearchFilterEngine::name() const |
138 | { |
139 | return "kuriikwsfilter" ; |
140 | } |
141 | |
142 | char KURISearchFilterEngine::keywordDelimiter() const |
143 | { |
144 | return m_cKeywordDelimiter; |
145 | } |
146 | |
147 | QString KURISearchFilterEngine::defaultSearchEngine() const |
148 | { |
149 | return m_defaultWebShortcut; |
150 | } |
151 | |
152 | QStringList KURISearchFilterEngine::favoriteEngineList() const |
153 | { |
154 | return m_preferredWebShortcuts; |
155 | } |
156 | |
157 | KURISearchFilterEngine *KURISearchFilterEngine::self() |
158 | { |
159 | static KURISearchFilterEngine self; |
160 | return &self; |
161 | } |
162 | |
163 | QStringList KURISearchFilterEngine::modifySubstitutionMap(SubstMap &map, const QString &query) const |
164 | { |
165 | // Returns the number of query words |
166 | QString userquery = query; |
167 | |
168 | // Do some pre-encoding, before we can start the work: |
169 | { |
170 | const static QRegularExpression qsexpr(QStringLiteral("\\\"[^\\\"]*\\\"" )); |
171 | // Temporarily substitute spaces in quoted strings (" " -> "%20") |
172 | // Needed to split user query into StringList correctly. |
173 | int start = 0; |
174 | QRegularExpressionMatch match; |
175 | while ((match = qsexpr.match(subject: userquery, offset: start)).hasMatch()) { |
176 | QString str = match.captured(nth: 0); |
177 | str.replace(c: QLatin1Char(' '), after: QLatin1String("%20" )); |
178 | userquery.replace(i: match.capturedStart(nth: 0), len: match.capturedLength(nth: 0), after: str); |
179 | start = match.capturedStart(nth: 0) + str.size(); // Move after last quote |
180 | } |
181 | } |
182 | |
183 | // Split user query between spaces: |
184 | QStringList l = userquery.simplified().split(sep: QLatin1Char(' '), behavior: Qt::SkipEmptyParts); |
185 | |
186 | // Back-substitute quoted strings (%20 -> " "): |
187 | userquery.replace(before: QLatin1String("%20" ), after: QLatin1String(" " )); |
188 | l.replaceInStrings(QStringLiteral("%20" ), QStringLiteral(" " )); |
189 | |
190 | qCDebug(category) << "Generating substitution map:\n" ; |
191 | // Generate substitution map from user query: |
192 | for (int i = 0; i <= l.count(); i++) { |
193 | int pos = 0; |
194 | QString v; |
195 | |
196 | // Add whole user query (\{0}) to substitution map: |
197 | if (i == 0) { |
198 | v = userquery; |
199 | } |
200 | // Add partial user query items to substitution map: |
201 | else { |
202 | v = l[i - 1]; |
203 | } |
204 | |
205 | // Insert partial queries (referenced by \1 ... \n) to map: |
206 | map.insert(key: QString::number(i), value: v); |
207 | |
208 | // Insert named references (referenced by \name) to map: |
209 | if ((i > 0) && (pos = v.indexOf(ch: QLatin1Char('='))) > 0) { |
210 | QString s = v.mid(position: pos + 1); |
211 | QString k = v.left(n: pos); |
212 | |
213 | // Back-substitute references contained in references (e.g. '\refname' substitutes to 'thisquery=\0') |
214 | s.replace(before: QLatin1String("%5C" ), after: QLatin1String("\\" )); |
215 | map.insert(key: k, value: s); |
216 | } |
217 | } |
218 | |
219 | return l; |
220 | } |
221 | |
222 | static QString encodeString(const QString &s, QStringEncoder &codec) |
223 | { |
224 | // we encode all characters, including the space character BUG: 304276 |
225 | QByteArray encoded = QByteArray(codec.encode(str: s)).toPercentEncoding(); |
226 | return QString::fromUtf8(ba: encoded); |
227 | } |
228 | |
229 | QString KURISearchFilterEngine::substituteQuery(const QString &url, SubstMap &map, const QString &userquery, QStringEncoder &codec) const |
230 | { |
231 | QString newurl = url; |
232 | QStringList ql = modifySubstitutionMap(map, query: userquery); |
233 | const int count = ql.count(); |
234 | |
235 | // Substitute references (\{ref1,ref2,...}) with values from user query: |
236 | { |
237 | const static QRegularExpression reflistRe(QStringLiteral("\\\\\\{([^\\}]+)\\}" )); |
238 | // Substitute reflists (\{ref1,ref2,...}): |
239 | int start = 0; |
240 | QRegularExpressionMatch match; |
241 | while ((match = reflistRe.match(subject: newurl, offset: start)).hasMatch()) { |
242 | bool found = false; |
243 | |
244 | // bool rest = false; |
245 | QString v; |
246 | const QString rlstring = match.captured(nth: 1); |
247 | |
248 | // \{@} gets a special treatment later |
249 | if (rlstring == QLatin1String("@" )) { |
250 | v = QStringLiteral("\\@" ); |
251 | found = true; |
252 | } |
253 | |
254 | // TODO: strip whitespaces around commas |
255 | const QStringList refList = rlstring.split(sep: QLatin1Char(','), behavior: Qt::SkipEmptyParts); |
256 | |
257 | for (const QString &rlitem : refList) { |
258 | if (found) { |
259 | break; |
260 | } |
261 | |
262 | const static QRegularExpression rangeRe(QStringLiteral("([0-9]*)\\-([0-9]*)" )); |
263 | const QRegularExpressionMatch rangeMatch = rangeRe.match(subject: rlitem); |
264 | // Substitute a range of keywords |
265 | if (rangeMatch.hasMatch()) { |
266 | int first = rangeMatch.captured(nth: 1).toInt(); |
267 | int last = rangeMatch.captured(nth: 2).toInt(); |
268 | |
269 | if (first == 0) { |
270 | first = 1; |
271 | } |
272 | |
273 | if (last == 0) { |
274 | last = count; |
275 | } |
276 | |
277 | for (int i = first; i <= last; i++) { |
278 | v += map[QString::number(i)] + QLatin1Char(' '); |
279 | // Remove used value from ql (needed for \{@}): |
280 | ql[i - 1].clear(); |
281 | } |
282 | |
283 | v = v.trimmed(); |
284 | if (!v.isEmpty()) { |
285 | found = true; |
286 | } |
287 | |
288 | v = encodeString(s: v, codec); |
289 | } else if (rlitem.startsWith(c: QLatin1Char('\"')) && rlitem.endsWith(c: QLatin1Char('\"'))) { |
290 | // Use default string from query definition: |
291 | found = true; |
292 | QString s = rlitem.mid(position: 1, n: rlitem.length() - 2); |
293 | v = encodeString(s, codec); |
294 | } else if (map.contains(key: rlitem)) { |
295 | // Use value from substitution map: |
296 | found = true; |
297 | v = encodeString(s: map[rlitem], codec); |
298 | |
299 | // Remove used value from ql (needed for \{@}): |
300 | const QChar c = rlitem.at(i: 0); // rlitem can't be empty at this point |
301 | if (c == QLatin1Char('0')) { |
302 | // It's a numeric reference to '0' |
303 | for (QStringList::Iterator it = ql.begin(); it != ql.end(); ++it) { |
304 | (*it).clear(); |
305 | } |
306 | } else if ((c >= QLatin1String("0" )) && (c <= QLatin1String("9" ))) { // krazy:excludeall=doublequote_chars |
307 | // It's a numeric reference > '0' |
308 | int n = rlitem.toInt(); |
309 | ql[n - 1].clear(); |
310 | } else { |
311 | // It's a alphanumeric reference |
312 | QStringList::Iterator it = ql.begin(); |
313 | while ((it != ql.end()) && !it->startsWith(s: rlitem + QLatin1Char('='))) { |
314 | ++it; |
315 | } |
316 | if (it != ql.end()) { |
317 | it->clear(); |
318 | } |
319 | } |
320 | |
321 | // Encode '+', otherwise it would be interpreted as space in the resulting url: |
322 | v.replace(c: QLatin1Char('+'), after: QLatin1String("%2B" )); |
323 | } else if (rlitem == QLatin1String("@" )) { |
324 | v = QStringLiteral("\\@" ); |
325 | } |
326 | } |
327 | |
328 | newurl.replace(i: match.capturedStart(nth: 0), len: match.capturedLength(nth: 0), after: v); |
329 | start = match.capturedStart(nth: 0) + v.size(); |
330 | } |
331 | |
332 | // Special handling for \{@}; |
333 | { |
334 | // Generate list of unmatched strings: |
335 | QString v = ql.join(sep: QLatin1Char(' ')).simplified(); |
336 | v = encodeString(s: v, codec); |
337 | |
338 | // Substitute \{@} with list of unmatched query strings |
339 | newurl.replace(before: QLatin1String("\\@" ), after: v); |
340 | } |
341 | } |
342 | |
343 | return newurl; |
344 | } |
345 | |
346 | QUrl KURISearchFilterEngine::formatResult(const QString &url, const QString &cset1, const QString &cset2, const QString &query, bool isMalformed) const |
347 | { |
348 | SubstMap map; |
349 | return formatResult(url, cset1, cset2, query, isMalformed, map); |
350 | } |
351 | |
352 | QUrl KURISearchFilterEngine::formatResult(const QString &url, |
353 | const QString &cset1, |
354 | const QString &cset2, |
355 | const QString &userquery, |
356 | bool /* isMalformed */, |
357 | SubstMap &map) const |
358 | { |
359 | // Return nothing if userquery is empty and it contains |
360 | // substitution strings... |
361 | if (userquery.isEmpty() && url.indexOf(s: QLatin1String("\\{" )) > 0) { |
362 | return QUrl(); |
363 | } |
364 | |
365 | // Create a codec for the desired encoding so that we can transcode the user's "url". |
366 | QString cseta = cset1; |
367 | if (cseta.isEmpty()) { |
368 | cseta = QStringLiteral("UTF-8" ); |
369 | } |
370 | |
371 | QStringEncoder csetacodec(cseta.toLatin1().constData()); |
372 | if (!csetacodec.isValid()) { |
373 | cseta = QStringLiteral("UTF-8" ); |
374 | csetacodec = QStringEncoder(QStringEncoder::Utf8); |
375 | } |
376 | |
377 | // Add charset indicator for the query to substitution map: |
378 | map.insert(QStringLiteral("ikw_charset" ), value: cseta); |
379 | |
380 | // Add charset indicator for the fallback query to substitution map: |
381 | QString csetb = cset2; |
382 | if (csetb.isEmpty()) { |
383 | csetb = QStringLiteral("UTF-8" ); |
384 | } |
385 | map.insert(QStringLiteral("wsc_charset" ), value: csetb); |
386 | |
387 | QString newurl = substituteQuery(url, map, userquery, codec&: csetacodec); |
388 | |
389 | return QUrl(newurl, QUrl::StrictMode); |
390 | } |
391 | |
392 | void KURISearchFilterEngine::configure() |
393 | { |
394 | qCDebug(category) << "Keywords Engine: Loading config..." ; |
395 | |
396 | // Load the config. |
397 | KConfig config(QString::fromUtf8(ba: name()) + QLatin1String("rc" ), KConfig::NoGlobals); |
398 | KConfigGroup group = config.group(QStringLiteral("General" )); |
399 | |
400 | m_cKeywordDelimiter = group.readEntry(key: "KeywordDelimiter" , aDefault: ":" ).at(i: 0).toLatin1(); |
401 | m_bWebShortcutsEnabled = group.readEntry(key: "EnableWebShortcuts" , defaultValue: true); |
402 | m_defaultWebShortcut = group.readEntry(key: "DefaultWebShortcut" , aDefault: "duckduckgo" ); |
403 | m_bUseOnlyPreferredWebShortcuts = group.readEntry(key: "UsePreferredWebShortcutsOnly" , defaultValue: false); |
404 | |
405 | QStringList defaultPreferredShortcuts; |
406 | if (!group.hasKey(key: "PreferredWebShortcuts" )) { |
407 | defaultPreferredShortcuts = KURISearchFilterEngine::defaultSearchProviders(); |
408 | } |
409 | m_preferredWebShortcuts = group.readEntry(key: "PreferredWebShortcuts" , aDefault: defaultPreferredShortcuts); |
410 | |
411 | // Use either a white space or a : as the keyword delimiter... |
412 | if (strchr(s: " :" , c: m_cKeywordDelimiter) == nullptr) { |
413 | m_cKeywordDelimiter = ':'; |
414 | } |
415 | |
416 | qCDebug(category) << "Web Shortcuts Enabled: " << m_bWebShortcutsEnabled; |
417 | qCDebug(category) << "Default Shortcut: " << m_defaultWebShortcut; |
418 | qCDebug(category) << "Keyword Delimiter: " << m_cKeywordDelimiter; |
419 | if (m_reloadRegistry) { |
420 | m_registry.reload(); |
421 | } |
422 | } |
423 | |
424 | SearchProviderRegistry *KURISearchFilterEngine::registry() |
425 | { |
426 | return &m_registry; |
427 | } |
428 | |
429 | #include "moc_kuriikwsfiltereng_p.cpp" |
430 | |