1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the Qt Assistant of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | #include "qhelpsearchindexwriter_default_p.h" |
41 | #include "qhelp_global.h" |
42 | #include "qhelpenginecore.h" |
43 | #include "qhelpdbreader_p.h" |
44 | |
45 | #include <QtCore/QDataStream> |
46 | #include <QtCore/QDateTime> |
47 | #include <QtCore/QDir> |
48 | #include <QtCore/QTextCodec> |
49 | #include <QtCore/QTextStream> |
50 | #include <QtCore/QSet> |
51 | #include <QtCore/QUrl> |
52 | #include <QtCore/QVariant> |
53 | #include <QtSql/QSqlDatabase> |
54 | #include <QtSql/QSqlDriver> |
55 | #include <QtSql/QSqlError> |
56 | #include <QtSql/QSqlQuery> |
57 | |
58 | #include <QTextDocument> |
59 | |
60 | QT_BEGIN_NAMESPACE |
61 | |
62 | namespace fulltextsearch { |
63 | namespace qt { |
64 | |
65 | const char FTS_DB_NAME[] = "fts" ; |
66 | |
67 | Writer::Writer(const QString &path) |
68 | : m_dbDir(path) |
69 | { |
70 | clearLegacyIndex(); |
71 | QDir().mkpath(dirPath: m_dbDir); |
72 | m_uniqueId = QHelpGlobal::uniquifyConnectionName(name: QLatin1String("QHelpWriter" ), pointer: this); |
73 | m_db = new QSqlDatabase(); |
74 | *m_db = QSqlDatabase::addDatabase(type: QLatin1String("QSQLITE" ), connectionName: m_uniqueId); |
75 | const QString dbPath = m_dbDir + QLatin1Char('/') + QLatin1String(FTS_DB_NAME); |
76 | m_db->setDatabaseName(dbPath); |
77 | if (!m_db->open()) { |
78 | const QString &error = QHelpSearchIndexWriter::tr(s: "Cannot open database \"%1\" using connection \"%2\": %3" ) |
79 | .arg(args: dbPath, args&: m_uniqueId, args: m_db->lastError().text()); |
80 | qWarning(msg: "%s" , qUtf8Printable(error)); |
81 | delete m_db; |
82 | m_db = nullptr; |
83 | QSqlDatabase::removeDatabase(connectionName: m_uniqueId); |
84 | m_uniqueId = QString(); |
85 | } else { |
86 | startTransaction(); |
87 | } |
88 | } |
89 | |
90 | bool Writer::tryInit(bool reindex) |
91 | { |
92 | if (!m_db) |
93 | return true; |
94 | |
95 | QSqlQuery query(*m_db); |
96 | // HACK: we try to perform any modifying command just to check if |
97 | // we don't get SQLITE_BUSY code (SQLITE_BUSY is defined to 5 in sqlite driver) |
98 | if (!query.exec(query: QLatin1String("CREATE TABLE foo ();" ))) { |
99 | if (query.lastError().nativeErrorCode() == QLatin1String("5" )) // db is locked |
100 | return false; |
101 | } |
102 | // HACK: clear what we have created |
103 | query.exec(query: QLatin1String("DROP TABLE foo;" )); |
104 | |
105 | init(reindex); |
106 | return true; |
107 | } |
108 | |
109 | bool Writer::hasDB() |
110 | { |
111 | if (!m_db) |
112 | return false; |
113 | |
114 | QSqlQuery query(*m_db); |
115 | |
116 | query.prepare(query: QLatin1String("SELECT id FROM info LIMIT 1" )); |
117 | query.exec(); |
118 | |
119 | return query.next(); |
120 | } |
121 | |
122 | void Writer::clearLegacyIndex() |
123 | { |
124 | // Clear old legacy clucene index. |
125 | // More important in case of Creator, since |
126 | // the index folder is common for all Creator versions |
127 | QDir dir(m_dbDir); |
128 | if (!dir.exists()) |
129 | return; |
130 | |
131 | const QStringList &list = dir.entryList(filters: QDir::Files | QDir::Hidden); |
132 | if (!list.contains(str: QLatin1String(FTS_DB_NAME))) { |
133 | for (const QString &item : list) |
134 | dir.remove(fileName: item); |
135 | } |
136 | } |
137 | |
138 | void Writer::init(bool reindex) |
139 | { |
140 | if (!m_db) |
141 | return; |
142 | |
143 | QSqlQuery query(*m_db); |
144 | |
145 | if (reindex && hasDB()) { |
146 | m_needOptimize = true; |
147 | |
148 | query.exec(query: QLatin1String("DROP TABLE titles;" )); |
149 | query.exec(query: QLatin1String("DROP TABLE contents;" )); |
150 | query.exec(query: QLatin1String("DROP TABLE info;" )); |
151 | } |
152 | |
153 | query.exec(query: QLatin1String("CREATE TABLE info (id INTEGER PRIMARY KEY, namespace, attributes, url, title, data);" )); |
154 | |
155 | query.exec(query: QLatin1String("CREATE VIRTUAL TABLE titles USING fts5(" |
156 | "namespace UNINDEXED, attributes UNINDEXED, " |
157 | "url UNINDEXED, title, " |
158 | "tokenize = 'porter unicode61', content = 'info', content_rowid='id');" )); |
159 | query.exec(query: QLatin1String("CREATE TRIGGER titles_insert AFTER INSERT ON info BEGIN " |
160 | "INSERT INTO titles(rowid, namespace, attributes, url, title) " |
161 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); " |
162 | "END;" )); |
163 | query.exec(query: QLatin1String("CREATE TRIGGER titles_delete AFTER DELETE ON info BEGIN " |
164 | "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) " |
165 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); " |
166 | "END;" )); |
167 | query.exec(query: QLatin1String("CREATE TRIGGER titles_update AFTER UPDATE ON info BEGIN " |
168 | "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) " |
169 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); " |
170 | "INSERT INTO titles(rowid, namespace, attributes, url, title) " |
171 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); " |
172 | "END;" )); |
173 | |
174 | query.exec(query: QLatin1String("CREATE VIRTUAL TABLE contents USING fts5(" |
175 | "namespace UNINDEXED, attributes UNINDEXED, " |
176 | "url UNINDEXED, title, data, " |
177 | "tokenize = 'porter unicode61', content = 'info', content_rowid='id');" )); |
178 | query.exec(query: QLatin1String("CREATE TRIGGER contents_insert AFTER INSERT ON info BEGIN " |
179 | "INSERT INTO contents(rowid, namespace, attributes, url, title, data) " |
180 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); " |
181 | "END;" )); |
182 | query.exec(query: QLatin1String("CREATE TRIGGER contents_delete AFTER DELETE ON info BEGIN " |
183 | "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) " |
184 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); " |
185 | "END;" )); |
186 | query.exec(query: QLatin1String("CREATE TRIGGER contents_update AFTER UPDATE ON info BEGIN " |
187 | "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) " |
188 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); " |
189 | "INSERT INTO contents(rowid, namespace, attributes, url, title, data) " |
190 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); " |
191 | "END;" )); |
192 | } |
193 | |
194 | Writer::~Writer() |
195 | { |
196 | if (m_db) { |
197 | m_db->close(); |
198 | delete m_db; |
199 | } |
200 | |
201 | if (!m_uniqueId.isEmpty()) |
202 | QSqlDatabase::removeDatabase(connectionName: m_uniqueId); |
203 | } |
204 | |
205 | void Writer::flush() |
206 | { |
207 | if (!m_db) |
208 | return; |
209 | |
210 | QSqlQuery query(*m_db); |
211 | |
212 | query.prepare(query: QLatin1String("INSERT INTO info (namespace, attributes, url, title, data) VALUES (?, ?, ?, ?, ?)" )); |
213 | query.addBindValue(val: m_namespaces); |
214 | query.addBindValue(val: m_attributes); |
215 | query.addBindValue(val: m_urls); |
216 | query.addBindValue(val: m_titles); |
217 | query.addBindValue(val: m_contents); |
218 | query.execBatch(); |
219 | |
220 | m_namespaces = QVariantList(); |
221 | m_attributes = QVariantList(); |
222 | m_urls = QVariantList(); |
223 | m_titles = QVariantList(); |
224 | m_contents = QVariantList(); |
225 | } |
226 | |
227 | void Writer::removeNamespace(const QString &namespaceName) |
228 | { |
229 | if (!m_db) |
230 | return; |
231 | |
232 | if (!hasNamespace(namespaceName)) |
233 | return; // no data to delete |
234 | |
235 | m_needOptimize = true; |
236 | |
237 | QSqlQuery query(*m_db); |
238 | |
239 | query.prepare(query: QLatin1String("DELETE FROM info WHERE namespace = ?" )); |
240 | query.addBindValue(val: namespaceName); |
241 | query.exec(); |
242 | } |
243 | |
244 | bool Writer::hasNamespace(const QString &namespaceName) |
245 | { |
246 | if (!m_db) |
247 | return false; |
248 | |
249 | QSqlQuery query(*m_db); |
250 | |
251 | query.prepare(query: QLatin1String("SELECT id FROM info WHERE namespace = ? LIMIT 1" )); |
252 | query.addBindValue(val: namespaceName); |
253 | query.exec(); |
254 | |
255 | return query.next(); |
256 | } |
257 | |
258 | void Writer::insertDoc(const QString &namespaceName, |
259 | const QString &attributes, |
260 | const QString &url, |
261 | const QString &title, |
262 | const QString &contents) |
263 | { |
264 | m_namespaces.append(t: namespaceName); |
265 | m_attributes.append(t: attributes); |
266 | m_urls.append(t: url); |
267 | m_titles.append(t: title); |
268 | m_contents.append(t: contents); |
269 | } |
270 | |
271 | void Writer::startTransaction() |
272 | { |
273 | if (!m_db) |
274 | return; |
275 | |
276 | m_needOptimize = false; |
277 | if (m_db && m_db->driver()->hasFeature(f: QSqlDriver::Transactions)) |
278 | m_db->transaction(); |
279 | } |
280 | |
281 | void Writer::endTransaction() |
282 | { |
283 | if (!m_db) |
284 | return; |
285 | |
286 | QSqlQuery query(*m_db); |
287 | |
288 | if (m_needOptimize) { |
289 | query.exec(query: QLatin1String("INSERT INTO titles(titles) VALUES('rebuild')" )); |
290 | query.exec(query: QLatin1String("INSERT INTO contents(contents) VALUES('rebuild')" )); |
291 | } |
292 | |
293 | if (m_db && m_db->driver()->hasFeature(f: QSqlDriver::Transactions)) |
294 | m_db->commit(); |
295 | |
296 | if (m_needOptimize) |
297 | query.exec(query: QLatin1String("VACUUM" )); |
298 | } |
299 | |
300 | QHelpSearchIndexWriter::QHelpSearchIndexWriter() |
301 | : QThread() |
302 | , m_cancel(false) |
303 | { |
304 | } |
305 | |
306 | QHelpSearchIndexWriter::~QHelpSearchIndexWriter() |
307 | { |
308 | m_mutex.lock(); |
309 | this->m_cancel = true; |
310 | m_mutex.unlock(); |
311 | |
312 | wait(); |
313 | } |
314 | |
315 | void QHelpSearchIndexWriter::cancelIndexing() |
316 | { |
317 | QMutexLocker lock(&m_mutex); |
318 | m_cancel = true; |
319 | } |
320 | |
321 | void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile, |
322 | const QString &indexFilesFolder, |
323 | bool reindex) |
324 | { |
325 | wait(); |
326 | QMutexLocker lock(&m_mutex); |
327 | |
328 | m_cancel = false; |
329 | m_reindex = reindex; |
330 | m_collectionFile = collectionFile; |
331 | m_indexFilesFolder = indexFilesFolder; |
332 | |
333 | lock.unlock(); |
334 | |
335 | start(QThread::LowestPriority); |
336 | } |
337 | |
338 | static const char IndexedNamespacesKey[] = "FTS5IndexedNamespaces" ; |
339 | |
340 | static QMap<QString, QDateTime> readIndexMap(const QHelpEngineCore &engine) |
341 | { |
342 | QMap<QString, QDateTime> indexMap; |
343 | QDataStream dataStream(engine.customValue( |
344 | key: QLatin1String(IndexedNamespacesKey)).toByteArray()); |
345 | dataStream >> indexMap; |
346 | return indexMap; |
347 | } |
348 | |
349 | static bool writeIndexMap(QHelpEngineCore *engine, |
350 | const QMap<QString, QDateTime> &indexMap) |
351 | { |
352 | QByteArray data; |
353 | |
354 | QDataStream dataStream(&data, QIODevice::ReadWrite); |
355 | dataStream << indexMap; |
356 | |
357 | return engine->setCustomValue( |
358 | key: QLatin1String(IndexedNamespacesKey), value: data); |
359 | } |
360 | |
361 | static bool clearIndexMap(QHelpEngineCore *engine) |
362 | { |
363 | return engine->removeCustomValue(key: QLatin1String(IndexedNamespacesKey)); |
364 | } |
365 | |
366 | void QHelpSearchIndexWriter::run() |
367 | { |
368 | QMutexLocker lock(&m_mutex); |
369 | |
370 | if (m_cancel) |
371 | return; |
372 | |
373 | const bool reindex(m_reindex); |
374 | const QString collectionFile(m_collectionFile); |
375 | const QString indexPath(m_indexFilesFolder); |
376 | |
377 | lock.unlock(); |
378 | |
379 | QHelpEngineCore engine(collectionFile, nullptr); |
380 | if (!engine.setupData()) |
381 | return; |
382 | |
383 | if (reindex) |
384 | clearIndexMap(engine: &engine); |
385 | |
386 | emit indexingStarted(); |
387 | |
388 | Writer writer(indexPath); |
389 | |
390 | while (!writer.tryInit(reindex)) |
391 | sleep(1); |
392 | |
393 | const QStringList ®isteredDocs = engine.registeredDocumentations(); |
394 | QMap<QString, QDateTime> indexMap = readIndexMap(engine); |
395 | |
396 | if (!reindex) { |
397 | for (const QString &namespaceName : registeredDocs) { |
398 | if (indexMap.contains(akey: namespaceName)) { |
399 | const QString path = engine.documentationFileName(namespaceName); |
400 | if (indexMap.value(akey: namespaceName) < QFileInfo(path).lastModified()) { |
401 | // Remove some outdated indexed stuff |
402 | indexMap.remove(akey: namespaceName); |
403 | writer.removeNamespace(namespaceName); |
404 | } else if (!writer.hasNamespace(namespaceName)) { |
405 | // No data in fts db for namespace. |
406 | // The namespace could have been removed from fts db |
407 | // or the whole fts db have been removed |
408 | // without removing it from indexMap. |
409 | indexMap.remove(akey: namespaceName); |
410 | } |
411 | } else { |
412 | // Needed in case namespaceName was removed from indexMap |
413 | // without removing it from fts db. |
414 | // May happen when e.g. qch file was removed manually |
415 | // without removing fts db. |
416 | writer.removeNamespace(namespaceName); |
417 | } |
418 | // TODO: we may also detect if there are any other data |
419 | // and remove it |
420 | } |
421 | } else { |
422 | indexMap.clear(); |
423 | } |
424 | |
425 | for (const QString &namespaceName : indexMap.keys()) { |
426 | if (!registeredDocs.contains(str: namespaceName)) { |
427 | indexMap.remove(akey: namespaceName); |
428 | writer.removeNamespace(namespaceName); |
429 | } |
430 | } |
431 | |
432 | for (const QString &namespaceName : registeredDocs) { |
433 | lock.relock(); |
434 | if (m_cancel) { |
435 | // store what we have done so far |
436 | writeIndexMap(engine: &engine, indexMap); |
437 | writer.endTransaction(); |
438 | emit indexingFinished(); |
439 | return; |
440 | } |
441 | lock.unlock(); |
442 | |
443 | // if indexed, continue |
444 | if (indexMap.contains(akey: namespaceName)) |
445 | continue; |
446 | |
447 | const QString fileName = engine.documentationFileName(namespaceName); |
448 | QHelpDBReader reader(fileName, QHelpGlobal::uniquifyConnectionName( |
449 | name: fileName, pointer: this), nullptr); |
450 | if (!reader.init()) |
451 | continue; |
452 | |
453 | const QString virtualFolder = reader.virtualFolder(); |
454 | |
455 | const QList<QStringList> &attributeSets = |
456 | engine.filterAttributeSets(namespaceName); |
457 | |
458 | for (const QStringList &attributes : attributeSets) { |
459 | const QString &attributesString = attributes.join(sep: QLatin1Char('|')); |
460 | |
461 | const QMap<QString, QByteArray> htmlFiles |
462 | = reader.filesData(filterAttributes: attributes, extensionFilter: QLatin1String("html" )); |
463 | const QMap<QString, QByteArray> htmFiles |
464 | = reader.filesData(filterAttributes: attributes, extensionFilter: QLatin1String("htm" )); |
465 | const QMap<QString, QByteArray> txtFiles |
466 | = reader.filesData(filterAttributes: attributes, extensionFilter: QLatin1String("txt" )); |
467 | |
468 | QMultiMap<QString, QByteArray> files = htmlFiles; |
469 | files.unite(other: htmFiles); |
470 | files.unite(other: txtFiles); |
471 | |
472 | for (auto it = files.cbegin(), end = files.cend(); it != end ; ++it) { |
473 | lock.relock(); |
474 | if (m_cancel) { |
475 | // store what we have done so far |
476 | writeIndexMap(engine: &engine, indexMap); |
477 | writer.endTransaction(); |
478 | emit indexingFinished(); |
479 | return; |
480 | } |
481 | lock.unlock(); |
482 | |
483 | const QString &file = it.key(); |
484 | const QByteArray &data = it.value(); |
485 | |
486 | if (data.isEmpty()) |
487 | continue; |
488 | |
489 | QUrl url; |
490 | url.setScheme(QLatin1String("qthelp" )); |
491 | url.setAuthority(authority: namespaceName); |
492 | url.setPath(path: QLatin1Char('/') + virtualFolder + QLatin1Char('/') + file); |
493 | |
494 | if (url.hasFragment()) |
495 | url.setFragment(fragment: QString()); |
496 | |
497 | const QString &fullFileName = url.toString(); |
498 | if (!fullFileName.endsWith(s: QLatin1String(".html" )) |
499 | && !fullFileName.endsWith(s: QLatin1String(".htm" )) |
500 | && !fullFileName.endsWith(s: QLatin1String(".txt" ))) { |
501 | continue; |
502 | } |
503 | |
504 | QTextStream s(data); |
505 | const QString &en = QHelpGlobal::codecFromData(data); |
506 | s.setCodec(QTextCodec::codecForName(name: en.toLatin1().constData())); |
507 | |
508 | const QString &text = s.readAll(); |
509 | if (text.isEmpty()) |
510 | continue; |
511 | |
512 | QString title; |
513 | QString contents; |
514 | if (fullFileName.endsWith(s: QLatin1String(".txt" ))) { |
515 | title = fullFileName.mid(position: fullFileName.lastIndexOf(c: QLatin1Char('/')) + 1); |
516 | contents = text.toHtmlEscaped(); |
517 | } else { |
518 | QTextDocument doc; |
519 | doc.setHtml(text); |
520 | |
521 | title = doc.metaInformation(info: QTextDocument::DocumentTitle).toHtmlEscaped(); |
522 | contents = doc.toPlainText().toHtmlEscaped(); |
523 | } |
524 | |
525 | writer.insertDoc(namespaceName, attributes: attributesString, url: fullFileName, title, contents); |
526 | } |
527 | } |
528 | writer.flush(); |
529 | const QString &path = engine.documentationFileName(namespaceName); |
530 | indexMap.insert(akey: namespaceName, avalue: QFileInfo(path).lastModified()); |
531 | } |
532 | |
533 | writeIndexMap(engine: &engine, indexMap); |
534 | |
535 | writer.endTransaction(); |
536 | emit indexingFinished(); |
537 | } |
538 | |
539 | } // namespace std |
540 | } // namespace fulltextsearch |
541 | |
542 | QT_END_NAMESPACE |
543 | |