1 | /* |
2 | This file is part of the KDE project |
3 | SPDX-FileCopyrightText: 2002, 2003 Dawit Alemayehu <adawit@kde.org> |
4 | SPDX-FileCopyrightText: 2000 Yves Arrouye <yves@realnames.com> |
5 | SPDX-FileCopyrightText: 1999 Simon Hausmann <hausmann@kde.org> |
6 | |
7 | Advanced web shortcuts: |
8 | SPDX-FileCopyrightText: 2001 Andreas Hochsteger <e9625392@student.tuwien.ac.at> |
9 | |
10 | SPDX-License-Identifier: GPL-2.0-or-later |
11 | */ |
12 | |
13 | #include "kuriikwsfiltereng_p.h" |
14 | #include "searchprovider.h" |
15 | |
16 | #include <KConfig> |
17 | #include <KConfigGroup> |
18 | #include <kprotocolinfo.h> |
19 | |
20 | #include <QDBusConnection> |
21 | #include <QLoggingCategory> |
22 | #include <QRegularExpression> |
23 | #include <QStringEncoder> |
24 | |
25 | Q_LOGGING_CATEGORY(category, "kf.kio.urifilters.ikws" , QtWarningMsg) |
26 | using namespace KIO; |
27 | |
28 | /** |
29 | * IMPORTANT: If you change anything here, make sure kiowidgets-kurifiltertest-{colon,space}-separator |
30 | * unit tests still pass (they're usually run as part of "make test"). |
31 | */ |
32 | |
33 | KURISearchFilterEngine::KURISearchFilterEngine() |
34 | { |
35 | configure(); |
36 | // Only after initial load, we would want to reparse the files on config changes. |
37 | // When the registry is constructed, it automatically loads the searchproviders |
38 | m_reloadRegistry = true; |
39 | QDBusConnection::sessionBus() |
40 | .connect(service: QString(), QStringLiteral("/" ), QStringLiteral("org.kde.KUriFilterPlugin" ), QStringLiteral("configure" ), receiver: this, SLOT(configure())); |
41 | } |
42 | |
43 | KURISearchFilterEngine::~KURISearchFilterEngine() = default; |
44 | |
45 | // static |
46 | QStringList KURISearchFilterEngine::defaultSearchProviders() |
47 | { |
48 | static const QStringList defaultProviders{QStringLiteral("google" ), |
49 | QStringLiteral("youtube" ), |
50 | QStringLiteral("yahoo" ), |
51 | QStringLiteral("wikipedia" ), |
52 | QStringLiteral("wikit" )}; |
53 | return defaultProviders; |
54 | } |
55 | |
56 | SearchProvider *KURISearchFilterEngine::webShortcutQuery(const QString &typedString, QString &searchTerm) const |
57 | { |
58 | const auto getProviderForKey = [this, &searchTerm](const QString &key) { |
59 | SearchProvider *provider = nullptr; |
60 | // If the key contains a : an assertion in the isKnownProtocol method would fail. This can be |
61 | // the case if the delimiter is switched to space, see kiowidgets_space_separator_test |
62 | if (!key.isEmpty() && (key.contains(c: QLatin1Char(':')) || !KProtocolInfo::isKnownProtocol(protocol: key, updateCacheIfNotfound: false))) { |
63 | provider = m_registry.findByKey(key); |
64 | if (provider) { |
65 | if (!m_bUseOnlyPreferredWebShortcuts || m_preferredWebShortcuts.contains(str: provider->desktopEntryName())) { |
66 | qCDebug(category) << "found provider" << provider->desktopEntryName() << "searchTerm=" << searchTerm; |
67 | } else { |
68 | provider = nullptr; |
69 | } |
70 | } |
71 | } |
72 | return provider; |
73 | }; |
74 | |
75 | SearchProvider *provider = nullptr; |
76 | if (m_bWebShortcutsEnabled) { |
77 | QString key; |
78 | if (typedString.contains(c: QLatin1Char('!'))) { |
79 | const static QRegularExpression bangRegex(QStringLiteral("!([^ ]+)" )); |
80 | const auto match = bangRegex.match(subject: typedString); |
81 | if (match.hasMatch() && match.lastCapturedIndex() == 1) { |
82 | key = match.captured(nth: 1); |
83 | searchTerm = QString(typedString).remove(re: bangRegex); |
84 | } |
85 | } |
86 | |
87 | // If we have found a bang-match it might be unintentionally triggered, because the ! character is contained |
88 | // in the query. To avoid not returning any results we check if we can find a provider for the key, if not |
89 | // we clear it and try the traditional query syntax, see https://bugs.kde.org/show_bug.cgi?id=437660 |
90 | if (!key.isEmpty()) { |
91 | provider = getProviderForKey(key); |
92 | if (!provider) { |
93 | key.clear(); |
94 | } |
95 | } |
96 | if (key.isEmpty()) { |
97 | const int pos = typedString.indexOf(c: QLatin1Char(m_cKeywordDelimiter)); |
98 | if (pos > -1) { |
99 | key = typedString.left(n: pos).toLower(); // #169801 |
100 | searchTerm = typedString.mid(position: pos + 1); |
101 | } else if (!typedString.isEmpty() && m_cKeywordDelimiter == ' ') { |
102 | key = typedString; |
103 | searchTerm = typedString.mid(position: pos + 1); |
104 | } |
105 | provider = getProviderForKey(key); |
106 | } |
107 | |
108 | qCDebug(category) << "m_cKeywordDelimiter=" << QLatin1Char(m_cKeywordDelimiter) << "key=" << key << "typedString=" << typedString; |
109 | } |
110 | |
111 | return provider; |
112 | } |
113 | |
114 | SearchProvider *KURISearchFilterEngine::autoWebSearchQuery(const QString &typedString, const QString &defaultShortcut) const |
115 | { |
116 | SearchProvider *provider = nullptr; |
117 | const QString defaultSearchProvider = (m_defaultWebShortcut.isEmpty() ? defaultShortcut : m_defaultWebShortcut); |
118 | |
119 | if (m_bWebShortcutsEnabled && !defaultSearchProvider.isEmpty()) { |
120 | // Make sure we ignore supported protocols, e.g. "smb:", "http:" |
121 | const int pos = typedString.indexOf(c: QLatin1Char(':')); |
122 | |
123 | if (pos == -1 || !KProtocolInfo::isKnownProtocol(protocol: typedString.left(n: pos), updateCacheIfNotfound: false)) { |
124 | provider = m_registry.findByDesktopName(desktopName: defaultSearchProvider); |
125 | } |
126 | } |
127 | |
128 | return provider; |
129 | } |
130 | |
131 | QByteArray KURISearchFilterEngine::name() const |
132 | { |
133 | return "kuriikwsfilter" ; |
134 | } |
135 | |
136 | char KURISearchFilterEngine::keywordDelimiter() const |
137 | { |
138 | return m_cKeywordDelimiter; |
139 | } |
140 | |
141 | QString KURISearchFilterEngine::defaultSearchEngine() const |
142 | { |
143 | return m_defaultWebShortcut; |
144 | } |
145 | |
146 | QStringList KURISearchFilterEngine::favoriteEngineList() const |
147 | { |
148 | return m_preferredWebShortcuts; |
149 | } |
150 | |
151 | KURISearchFilterEngine *KURISearchFilterEngine::self() |
152 | { |
153 | static KURISearchFilterEngine self; |
154 | return &self; |
155 | } |
156 | |
157 | QStringList KURISearchFilterEngine::modifySubstitutionMap(SubstMap &map, const QString &query) const |
158 | { |
159 | // Returns the number of query words |
160 | QString userquery = query; |
161 | |
162 | // Do some pre-encoding, before we can start the work: |
163 | { |
164 | const static QRegularExpression qsexpr(QStringLiteral("\\\"[^\\\"]*\\\"" )); |
165 | // Temporarily substitute spaces in quoted strings (" " -> "%20") |
166 | // Needed to split user query into StringList correctly. |
167 | int start = 0; |
168 | QRegularExpressionMatch match; |
169 | while ((match = qsexpr.match(subject: userquery, offset: start)).hasMatch()) { |
170 | QString str = match.captured(nth: 0); |
171 | str.replace(c: QLatin1Char(' '), after: QLatin1String("%20" )); |
172 | userquery.replace(i: match.capturedStart(nth: 0), len: match.capturedLength(nth: 0), after: str); |
173 | start = match.capturedStart(nth: 0) + str.size(); // Move after last quote |
174 | } |
175 | } |
176 | |
177 | // Split user query between spaces: |
178 | QStringList l = userquery.simplified().split(sep: QLatin1Char(' '), behavior: Qt::SkipEmptyParts); |
179 | |
180 | // Back-substitute quoted strings (%20 -> " "): |
181 | userquery.replace(before: QLatin1String("%20" ), after: QLatin1String(" " )); |
182 | l.replaceInStrings(QStringLiteral("%20" ), QStringLiteral(" " )); |
183 | |
184 | qCDebug(category) << "Generating substitution map:\n" ; |
185 | // Generate substitution map from user query: |
186 | for (int i = 0; i <= l.count(); i++) { |
187 | int pos = 0; |
188 | QString v; |
189 | |
190 | // Add whole user query (\{0}) to substitution map: |
191 | if (i == 0) { |
192 | v = userquery; |
193 | } |
194 | // Add partial user query items to substitution map: |
195 | else { |
196 | v = l[i - 1]; |
197 | } |
198 | |
199 | // Insert partial queries (referenced by \1 ... \n) to map: |
200 | map.insert(key: QString::number(i), value: v); |
201 | |
202 | // Insert named references (referenced by \name) to map: |
203 | if ((i > 0) && (pos = v.indexOf(c: QLatin1Char('='))) > 0) { |
204 | QString s = v.mid(position: pos + 1); |
205 | QString k = v.left(n: pos); |
206 | |
207 | // Back-substitute references contained in references (e.g. '\refname' substitutes to 'thisquery=\0') |
208 | s.replace(before: QLatin1String("%5C" ), after: QLatin1String("\\" )); |
209 | map.insert(key: k, value: s); |
210 | } |
211 | } |
212 | |
213 | return l; |
214 | } |
215 | |
216 | static QString encodeString(const QString &s, QStringEncoder &codec) |
217 | { |
218 | // we encode all characters, including the space character BUG: 304276 |
219 | QByteArray encoded = QByteArray(codec.encode(str: s)).toPercentEncoding(); |
220 | return QString::fromUtf8(ba: encoded); |
221 | } |
222 | |
223 | QString KURISearchFilterEngine::substituteQuery(const QString &url, SubstMap &map, const QString &userquery, QStringEncoder &codec) const |
224 | { |
225 | QString newurl = url; |
226 | QStringList ql = modifySubstitutionMap(map, query: userquery); |
227 | const int count = ql.count(); |
228 | |
229 | // Substitute references (\{ref1,ref2,...}) with values from user query: |
230 | { |
231 | const static QRegularExpression reflistRe(QStringLiteral("\\\\\\{([^\\}]+)\\}" )); |
232 | // Substitute reflists (\{ref1,ref2,...}): |
233 | int start = 0; |
234 | QRegularExpressionMatch match; |
235 | while ((match = reflistRe.match(subject: newurl, offset: start)).hasMatch()) { |
236 | bool found = false; |
237 | |
238 | // bool rest = false; |
239 | QString v; |
240 | const QString rlstring = match.captured(nth: 1); |
241 | |
242 | // \{@} gets a special treatment later |
243 | if (rlstring == QLatin1String("@" )) { |
244 | v = QStringLiteral("\\@" ); |
245 | found = true; |
246 | } |
247 | |
248 | // TODO: strip whitespaces around commas |
249 | const QStringList refList = rlstring.split(sep: QLatin1Char(','), behavior: Qt::SkipEmptyParts); |
250 | |
251 | for (const QString &rlitem : refList) { |
252 | if (found) { |
253 | break; |
254 | } |
255 | |
256 | const static QRegularExpression rangeRe(QStringLiteral("([0-9]*)\\-([0-9]*)" )); |
257 | const QRegularExpressionMatch rangeMatch = rangeRe.match(subject: rlitem); |
258 | // Substitute a range of keywords |
259 | if (rangeMatch.hasMatch()) { |
260 | int first = rangeMatch.captured(nth: 1).toInt(); |
261 | int last = rangeMatch.captured(nth: 2).toInt(); |
262 | |
263 | if (first == 0) { |
264 | first = 1; |
265 | } |
266 | |
267 | if (last == 0) { |
268 | last = count; |
269 | } |
270 | |
271 | for (int i = first; i <= last; i++) { |
272 | v += map[QString::number(i)] + QLatin1Char(' '); |
273 | // Remove used value from ql (needed for \{@}): |
274 | ql[i - 1].clear(); |
275 | } |
276 | |
277 | v = v.trimmed(); |
278 | if (!v.isEmpty()) { |
279 | found = true; |
280 | } |
281 | |
282 | v = encodeString(s: v, codec); |
283 | } else if (rlitem.startsWith(c: QLatin1Char('\"')) && rlitem.endsWith(c: QLatin1Char('\"'))) { |
284 | // Use default string from query definition: |
285 | found = true; |
286 | QString s = rlitem.mid(position: 1, n: rlitem.length() - 2); |
287 | v = encodeString(s, codec); |
288 | } else if (map.contains(key: rlitem)) { |
289 | // Use value from substitution map: |
290 | found = true; |
291 | v = encodeString(s: map[rlitem], codec); |
292 | |
293 | // Remove used value from ql (needed for \{@}): |
294 | const QChar c = rlitem.at(i: 0); // rlitem can't be empty at this point |
295 | if (c == QLatin1Char('0')) { |
296 | // It's a numeric reference to '0' |
297 | for (QStringList::Iterator it = ql.begin(); it != ql.end(); ++it) { |
298 | (*it).clear(); |
299 | } |
300 | } else if ((c >= QLatin1String("0" )) && (c <= QLatin1String("9" ))) { // krazy:excludeall=doublequote_chars |
301 | // It's a numeric reference > '0' |
302 | int n = rlitem.toInt(); |
303 | ql[n - 1].clear(); |
304 | } else { |
305 | // It's a alphanumeric reference |
306 | QStringList::Iterator it = ql.begin(); |
307 | while ((it != ql.end()) && !it->startsWith(s: rlitem + QLatin1Char('='))) { |
308 | ++it; |
309 | } |
310 | if (it != ql.end()) { |
311 | it->clear(); |
312 | } |
313 | } |
314 | |
315 | // Encode '+', otherwise it would be interpreted as space in the resulting url: |
316 | v.replace(c: QLatin1Char('+'), after: QLatin1String("%2B" )); |
317 | } else if (rlitem == QLatin1String("@" )) { |
318 | v = QStringLiteral("\\@" ); |
319 | } |
320 | } |
321 | |
322 | newurl.replace(i: match.capturedStart(nth: 0), len: match.capturedLength(nth: 0), after: v); |
323 | start = match.capturedStart(nth: 0) + v.size(); |
324 | } |
325 | |
326 | // Special handling for \{@}; |
327 | { |
328 | // Generate list of unmatched strings: |
329 | QString v = ql.join(sep: QLatin1Char(' ')).simplified(); |
330 | v = encodeString(s: v, codec); |
331 | |
332 | // Substitute \{@} with list of unmatched query strings |
333 | newurl.replace(before: QLatin1String("\\@" ), after: v); |
334 | } |
335 | } |
336 | |
337 | return newurl; |
338 | } |
339 | |
340 | QUrl KURISearchFilterEngine::formatResult(const QString &url, const QString &cset1, const QString &cset2, const QString &query, bool isMalformed) const |
341 | { |
342 | SubstMap map; |
343 | return formatResult(url, cset1, cset2, query, isMalformed, map); |
344 | } |
345 | |
346 | QUrl KURISearchFilterEngine::formatResult(const QString &url, |
347 | const QString &cset1, |
348 | const QString &cset2, |
349 | const QString &userquery, |
350 | bool /* isMalformed */, |
351 | SubstMap &map) const |
352 | { |
353 | // Return nothing if userquery is empty and it contains |
354 | // substitution strings... |
355 | if (userquery.isEmpty() && url.indexOf(s: QLatin1String("\\{" )) > 0) { |
356 | return QUrl(); |
357 | } |
358 | |
359 | // Create a codec for the desired encoding so that we can transcode the user's "url". |
360 | QString cseta = cset1; |
361 | if (cseta.isEmpty()) { |
362 | cseta = QStringLiteral("UTF-8" ); |
363 | } |
364 | |
365 | QStringEncoder csetacodec(cseta.toLatin1().constData()); |
366 | if (!csetacodec.isValid()) { |
367 | cseta = QStringLiteral("UTF-8" ); |
368 | csetacodec = QStringEncoder(QStringEncoder::Utf8); |
369 | } |
370 | |
371 | // Add charset indicator for the query to substitution map: |
372 | map.insert(QStringLiteral("ikw_charset" ), value: cseta); |
373 | |
374 | // Add charset indicator for the fallback query to substitution map: |
375 | QString csetb = cset2; |
376 | if (csetb.isEmpty()) { |
377 | csetb = QStringLiteral("UTF-8" ); |
378 | } |
379 | map.insert(QStringLiteral("wsc_charset" ), value: csetb); |
380 | |
381 | QString newurl = substituteQuery(url, map, userquery, codec&: csetacodec); |
382 | |
383 | return QUrl(newurl, QUrl::StrictMode); |
384 | } |
385 | |
386 | void KURISearchFilterEngine::configure() |
387 | { |
388 | qCDebug(category) << "Keywords Engine: Loading config..." ; |
389 | |
390 | // Load the config. |
391 | KConfig config(QString::fromUtf8(ba: name()) + QLatin1String("rc" ), KConfig::NoGlobals); |
392 | KConfigGroup group = config.group(QStringLiteral("General" )); |
393 | |
394 | m_cKeywordDelimiter = group.readEntry(key: "KeywordDelimiter" , aDefault: ":" ).at(i: 0).toLatin1(); |
395 | m_bWebShortcutsEnabled = group.readEntry(key: "EnableWebShortcuts" , defaultValue: true); |
396 | m_defaultWebShortcut = group.readEntry(key: "DefaultWebShortcut" , aDefault: "duckduckgo" ); |
397 | m_bUseOnlyPreferredWebShortcuts = group.readEntry(key: "UsePreferredWebShortcutsOnly" , defaultValue: false); |
398 | |
399 | QStringList defaultPreferredShortcuts; |
400 | if (!group.hasKey(key: "PreferredWebShortcuts" )) { |
401 | defaultPreferredShortcuts = KURISearchFilterEngine::defaultSearchProviders(); |
402 | } |
403 | m_preferredWebShortcuts = group.readEntry(key: "PreferredWebShortcuts" , aDefault: defaultPreferredShortcuts); |
404 | |
405 | // Use either a white space or a : as the keyword delimiter... |
406 | if (strchr(s: " :" , c: m_cKeywordDelimiter) == nullptr) { |
407 | m_cKeywordDelimiter = ':'; |
408 | } |
409 | |
410 | qCDebug(category) << "Web Shortcuts Enabled: " << m_bWebShortcutsEnabled; |
411 | qCDebug(category) << "Default Shortcut: " << m_defaultWebShortcut; |
412 | qCDebug(category) << "Keyword Delimiter: " << m_cKeywordDelimiter; |
413 | if (m_reloadRegistry) { |
414 | m_registry.reload(); |
415 | } |
416 | } |
417 | |
418 | SearchProviderRegistry *KURISearchFilterEngine::registry() |
419 | { |
420 | return &m_registry; |
421 | } |
422 | |
423 | #include "moc_kuriikwsfiltereng_p.cpp" |
424 | |