1/*
2 This file is part of the KDE Baloo Project
3 SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <me@vhanda.in>
4
5 SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
6*/
7
8#include "basicindexingjob.h"
9#include "termgenerator.h"
10#include "idutils.h"
11
12#include <QStringList>
13#include <QFile>
14
15#include <KFileMetaData/Types>
16#include <KFileMetaData/UserMetaData>
17
18using namespace Baloo;
19
20BasicIndexingJob::BasicIndexingJob(const QString& filePath, const QString& mimetype,
21 IndexingLevel level)
22 : m_filePath(filePath)
23 , m_mimetype(mimetype)
24 , m_indexingLevel(level)
25{
26 if (m_filePath.endsWith(c: QLatin1Char('/'))) {
27 m_filePath.chop(n: 1);
28 }
29}
30
31namespace {
32
33void indexXAttr(const QString& url, Document& doc)
34{
35 KFileMetaData::UserMetaData userMetaData(url);
36
37 using Attribute = KFileMetaData::UserMetaData::Attribute;
38 auto attributes = userMetaData.queryAttributes(attributes: Attribute::Tags |
39 Attribute::Rating | Attribute::Comment);
40 if (attributes == Attribute::None) {
41 return;
42 }
43
44 TermGenerator tg(doc);
45
46 const QStringList tags = userMetaData.tags();
47 for (const QString& tag : tags) {
48 tg.indexXattrText(text: tag, prefix: QByteArray("TA"));
49 doc.addXattrTerm(term: QByteArray("TAG-") + tag.toUtf8());
50 }
51
52 int rating = userMetaData.rating();
53 if (rating) {
54 doc.addXattrTerm(term: QByteArray("R") + QByteArray::number(rating));
55 }
56
57 QString comment = userMetaData.userComment();
58 if (!comment.isEmpty()) {
59 tg.indexXattrText(text: comment, prefix: QByteArray("C"));
60 }
61}
62
63QVector<KFileMetaData::Type::Type> typesForMimeType(const QString& mimeType)
64{
65 using namespace KFileMetaData;
66 QVector<Type::Type> types;
67 types.reserve(asize: 2);
68
69 // Basic types
70 if (mimeType.startsWith(s: QLatin1String("audio/"))) {
71 types << Type::Audio;
72 }
73 if (mimeType.startsWith(s: QLatin1String("video/"))) {
74 types << Type::Video;
75 }
76 if (mimeType.startsWith(s: QLatin1String("image/"))) {
77 types << Type::Image;
78 }
79 if (mimeType.startsWith(s: QLatin1String("text/"))) {
80 types << Type::Text;
81 }
82 if (mimeType.contains(s: QLatin1String("document"))) {
83 types << Type::Document;
84 }
85
86 if (mimeType.contains(s: QLatin1String("powerpoint"))) {
87 types << Type::Presentation;
88 types << Type::Document;
89 }
90 if (mimeType.contains(s: QLatin1String("excel"))) {
91 types << Type::Spreadsheet;
92 types << Type::Document;
93 }
94 // Compressed tar archives: "application/x-<compression>-compressed-tar"
95 if ((mimeType.startsWith(s: QLatin1String("application/x-"))) &&
96 (mimeType.endsWith(s: QLatin1String("-compressed-tar")))) {
97 types << Type::Archive;
98 }
99
100 static QMultiHash<QString, Type::Type> typeMapper {
101 {QStringLiteral("text/plain"), Type::Document},
102 // MS Office
103 {QStringLiteral("application/msword"), Type::Document},
104 {QStringLiteral("application/x-scribus"), Type::Document},
105 // The old pre-XML MS Office formats are already covered by the excel/powerpoint "contains" above:
106 // - application/vnd.ms-powerpoint
107 // - application/vnd.ms-excel
108 // "openxmlformats-officedocument" and "opendocument" contain "document", i.e. already have Type::Document
109 // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
110 // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
111 // - application/vnd.openxmlformats-officedocument.presentationml.presentation
112 // - application/vnd.oasis.opendocument.text
113 // - application/vnd.oasis.opendocument.spreadsheet
114 // - application/vnd.oasis.opendocument.presentation
115 // Office 2007
116 {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.presentation"), Type::Presentation},
117 {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.slideshow"), Type::Presentation},
118 {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.template"), Type::Presentation},
119 {QStringLiteral("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), Type::Spreadsheet},
120 // Open Document Formats - https://en.wikipedia.org/wiki/OpenDocument_technical_specification
121 {QStringLiteral("application/vnd.oasis.opendocument.presentation"), Type::Presentation},
122 {QStringLiteral("application/vnd.oasis.opendocument.spreadsheet"), Type::Spreadsheet},
123 {QStringLiteral("application/pdf"), Type::Document},
124 {QStringLiteral("application/postscript"), Type::Document},
125 {QStringLiteral("application/x-dvi"), Type::Document},
126 {QStringLiteral("application/rtf"), Type::Document},
127 // EBooks
128 {QStringLiteral("application/epub+zip"), Type::Document},
129 {QStringLiteral("application/vnd.amazon.mobi8-ebook"), Type::Document},
130 {QStringLiteral("application/x-mobipocket-ebook"), Type::Document},
131 // Graphic EBooks
132 {QStringLiteral("application/vnd.comicbook-rar"), Type::Document},
133 {QStringLiteral("application/vnd.comicbook+zip"), Type::Document},
134 {QStringLiteral("application/x-cb7"), Type::Document},
135 {QStringLiteral("application/x-cbt"), Type::Document},
136 // Archives - https://en.wikipedia.org/wiki/List_of_archive_formats
137 {QStringLiteral("application/gzip"), Type::Archive},
138 {QStringLiteral("application/x-tar"), Type::Archive},
139 {QStringLiteral("application/x-tarz"), Type::Archive},
140 {QStringLiteral("application/x-arc"), Type::Archive},
141 {QStringLiteral("application/x-archive"), Type::Archive},
142 {QStringLiteral("application/x-bzip"), Type::Archive},
143 {QStringLiteral("application/x-cpio"), Type::Archive},
144 {QStringLiteral("application/x-lha"), Type::Archive},
145 {QStringLiteral("application/x-lhz"), Type::Archive},
146 {QStringLiteral("application/x-lrzip"), Type::Archive},
147 {QStringLiteral("application/x-lz4"), Type::Archive},
148 {QStringLiteral("application/x-lzip"), Type::Archive},
149 {QStringLiteral("application/x-lzma"), Type::Archive},
150 {QStringLiteral("application/x-lzop"), Type::Archive},
151 {QStringLiteral("application/x-7z-compressed"), Type::Archive},
152 {QStringLiteral("application/x-ace"), Type::Archive},
153 {QStringLiteral("application/x-astrotite-afa"), Type::Archive},
154 {QStringLiteral("application/x-alz"), Type::Archive},
155 {QStringLiteral("application/vnd.android.package-archive"), Type::Archive},
156 {QStringLiteral("application/x-arj"), Type::Archive},
157 {QStringLiteral("application/vnd.ms-cab-compressed"), Type::Archive},
158 {QStringLiteral("application/x-cfs-compressed"), Type::Archive},
159 {QStringLiteral("application/x-dar"), Type::Archive},
160 {QStringLiteral("application/x-lzh"), Type::Archive},
161 {QStringLiteral("application/x-lzx"), Type::Archive},
162 {QStringLiteral("application/vnd.rar"), Type::Archive},
163 {QStringLiteral("application/x-stuffit"), Type::Archive},
164 {QStringLiteral("application/x-stuffitx"), Type::Archive},
165 {QStringLiteral("application/x-tzo"), Type::Archive},
166 {QStringLiteral("application/x-ustar"), Type::Archive},
167 {QStringLiteral("application/x-xar"), Type::Archive},
168 {QStringLiteral("application/x-xz"), Type::Archive},
169 {QStringLiteral("application/x-zoo"), Type::Archive},
170 {QStringLiteral("application/zip"), Type::Archive},
171 {QStringLiteral("application/zlib"), Type::Archive},
172 {QStringLiteral("application/zstd"), Type::Archive},
173 // WPS office
174 {QStringLiteral("application/wps-office.doc"), Type::Document},
175 {QStringLiteral("application/wps-office.xls"), Type::Document},
176 {QStringLiteral("application/wps-office.xls"), Type::Spreadsheet},
177 {QStringLiteral("application/wps-office.pot"), Type::Document},
178 {QStringLiteral("application/wps-office.pot"), Type::Presentation},
179 {QStringLiteral("application/wps-office.wps"), Type::Document},
180 {QStringLiteral("application/wps-office.docx"), Type::Document},
181 {QStringLiteral("application/wps-office.xlsx"), Type::Document},
182 {QStringLiteral("application/wps-office.xlsx"), Type::Spreadsheet},
183 {QStringLiteral("application/wps-office.pptx"), Type::Document},
184 {QStringLiteral("application/wps-office.pptx"), Type::Presentation},
185 // Other
186 {QStringLiteral("text/markdown"), Type::Document},
187 {QStringLiteral("image/vnd.djvu+multipage"), Type::Document},
188 {QStringLiteral("application/x-lyx"), Type::Document}
189 };
190
191 auto hashIt = typeMapper.find(key: mimeType);
192 while (hashIt != typeMapper.end() && hashIt.key() == mimeType) {
193 types.append(t: hashIt.value());
194 ++hashIt;
195 }
196
197 return types;
198}
199} // namespace
200
201BasicIndexingJob::~BasicIndexingJob()
202{
203}
204
205bool BasicIndexingJob::index()
206{
207 const QByteArray url = QFile::encodeName(fileName: m_filePath);
208 auto lastSlash = url.lastIndexOf(c: '/');
209
210 const QByteArray fileName = url.mid(index: lastSlash + 1);
211 const QByteArray filePath = url.left(len: lastSlash);
212
213 QT_STATBUF statBuf;
214 if (filePathToStat(filePath, statBuf) != 0) {
215 return false;
216 }
217
218 Document doc;
219 doc.setParentId(statBufToId(stBuf: statBuf));
220
221 if (filePathToStat(filePath: url, statBuf) != 0) {
222 return false;
223 }
224 doc.setId(statBufToId(stBuf: statBuf));
225 doc.setUrl(url);
226
227 TermGenerator tg(doc);
228 tg.indexFileNameText(text: QFile::decodeName(localFileName: fileName));
229 if (statBuf.st_size == 0) {
230 tg.indexText(QStringLiteral("application/x-zerosize"), prefix: QByteArray("M"));
231 } else {
232 tg.indexText(text: m_mimetype, prefix: QByteArray("M"));
233 }
234
235 // (Content) Modification time, Metadata (e.g. XAttr) change time
236 doc.setMTime(statBuf.st_mtime);
237 doc.setCTime(statBuf.st_ctime);
238
239 if (S_ISDIR(statBuf.st_mode)) {
240 static const QByteArray type = QByteArray("T") + QByteArray::number(static_cast<int>(KFileMetaData::Type::Folder));
241 doc.addTerm(term: type);
242 // For folders we do not need to go through file indexing, so we do not set contentIndexing
243
244 } else if (statBuf.st_size > 0) {
245 if (m_indexingLevel == MarkForContentIndexing) {
246 doc.setContentIndexing(true);
247 }
248 // Types
249 const QVector<KFileMetaData::Type::Type> tList = typesForMimeType(mimeType: m_mimetype);
250 for (KFileMetaData::Type::Type type : tList) {
251 QByteArray num = QByteArray::number(static_cast<int>(type));
252 doc.addTerm(term: QByteArray("T") + num);
253 }
254 }
255
256 indexXAttr(url: m_filePath, doc);
257
258 m_doc = doc;
259 return true;
260}
261

source code of baloo/src/file/basicindexingjob.cpp