1 | /* |
2 | This file is part of the KDE Baloo Project |
3 | SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <me@vhanda.in> |
4 | |
5 | SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL |
6 | */ |
7 | |
8 | #include "basicindexingjob.h" |
9 | #include "termgenerator.h" |
10 | #include "idutils.h" |
11 | |
12 | #include <QStringList> |
13 | #include <QFile> |
14 | |
15 | #include <KFileMetaData/Types> |
16 | #include <KFileMetaData/UserMetaData> |
17 | |
18 | using namespace Baloo; |
19 | |
20 | BasicIndexingJob::BasicIndexingJob(const QString& filePath, const QString& mimetype, |
21 | IndexingLevel level) |
22 | : m_filePath(filePath) |
23 | , m_mimetype(mimetype) |
24 | , m_indexingLevel(level) |
25 | { |
26 | if (m_filePath.endsWith(c: QLatin1Char('/'))) { |
27 | m_filePath.chop(n: 1); |
28 | } |
29 | } |
30 | |
31 | namespace { |
32 | |
33 | void indexXAttr(const QString& url, Document& doc) |
34 | { |
35 | KFileMetaData::UserMetaData userMetaData(url); |
36 | |
37 | using Attribute = KFileMetaData::UserMetaData::Attribute; |
38 | auto attributes = userMetaData.queryAttributes(attributes: Attribute::Tags | |
39 | Attribute::Rating | Attribute::Comment); |
40 | if (attributes == Attribute::None) { |
41 | return; |
42 | } |
43 | |
44 | TermGenerator tg(doc); |
45 | |
46 | const QStringList tags = userMetaData.tags(); |
47 | for (const QString& tag : tags) { |
48 | tg.indexXattrText(text: tag, prefix: QByteArray("TA" )); |
49 | doc.addXattrTerm(term: QByteArray("TAG-" ) + tag.toUtf8()); |
50 | } |
51 | |
52 | int rating = userMetaData.rating(); |
53 | if (rating) { |
54 | doc.addXattrTerm(term: QByteArray("R" ) + QByteArray::number(rating)); |
55 | } |
56 | |
57 | QString = userMetaData.userComment(); |
58 | if (!comment.isEmpty()) { |
59 | tg.indexXattrText(text: comment, prefix: QByteArray("C" )); |
60 | } |
61 | } |
62 | |
63 | QVector<KFileMetaData::Type::Type> typesForMimeType(const QString& mimeType) |
64 | { |
65 | using namespace KFileMetaData; |
66 | QVector<Type::Type> types; |
67 | types.reserve(asize: 2); |
68 | |
69 | // Basic types |
70 | if (mimeType.startsWith(s: QLatin1String("audio/" ))) { |
71 | types << Type::Audio; |
72 | } |
73 | if (mimeType.startsWith(s: QLatin1String("video/" ))) { |
74 | types << Type::Video; |
75 | } |
76 | if (mimeType.startsWith(s: QLatin1String("image/" ))) { |
77 | types << Type::Image; |
78 | } |
79 | if (mimeType.startsWith(s: QLatin1String("text/" ))) { |
80 | types << Type::Text; |
81 | } |
82 | if (mimeType.contains(s: QLatin1String("document" ))) { |
83 | types << Type::Document; |
84 | } |
85 | |
86 | if (mimeType.contains(s: QLatin1String("powerpoint" ))) { |
87 | types << Type::Presentation; |
88 | types << Type::Document; |
89 | } |
90 | if (mimeType.contains(s: QLatin1String("excel" ))) { |
91 | types << Type::Spreadsheet; |
92 | types << Type::Document; |
93 | } |
94 | // Compressed tar archives: "application/x-<compression>-compressed-tar" |
95 | if ((mimeType.startsWith(s: QLatin1String("application/x-" ))) && |
96 | (mimeType.endsWith(s: QLatin1String("-compressed-tar" )))) { |
97 | types << Type::Archive; |
98 | } |
99 | |
100 | static QMultiHash<QString, Type::Type> typeMapper { |
101 | {QStringLiteral("text/plain" ), Type::Document}, |
102 | // MS Office |
103 | {QStringLiteral("application/msword" ), Type::Document}, |
104 | {QStringLiteral("application/x-scribus" ), Type::Document}, |
105 | // The old pre-XML MS Office formats are already covered by the excel/powerpoint "contains" above: |
106 | // - application/vnd.ms-powerpoint |
107 | // - application/vnd.ms-excel |
108 | // "openxmlformats-officedocument" and "opendocument" contain "document", i.e. already have Type::Document |
109 | // - application/vnd.openxmlformats-officedocument.wordprocessingml.document |
110 | // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet |
111 | // - application/vnd.openxmlformats-officedocument.presentationml.presentation |
112 | // - application/vnd.oasis.opendocument.text |
113 | // - application/vnd.oasis.opendocument.spreadsheet |
114 | // - application/vnd.oasis.opendocument.presentation |
115 | // Office 2007 |
116 | {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.presentation" ), Type::Presentation}, |
117 | {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.slideshow" ), Type::Presentation}, |
118 | {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.template" ), Type::Presentation}, |
119 | {QStringLiteral("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ), Type::Spreadsheet}, |
120 | // Open Document Formats - https://en.wikipedia.org/wiki/OpenDocument_technical_specification |
121 | {QStringLiteral("application/vnd.oasis.opendocument.presentation" ), Type::Presentation}, |
122 | {QStringLiteral("application/vnd.oasis.opendocument.spreadsheet" ), Type::Spreadsheet}, |
123 | {QStringLiteral("application/pdf" ), Type::Document}, |
124 | {QStringLiteral("application/postscript" ), Type::Document}, |
125 | {QStringLiteral("application/x-dvi" ), Type::Document}, |
126 | {QStringLiteral("application/rtf" ), Type::Document}, |
127 | // EBooks |
128 | {QStringLiteral("application/epub+zip" ), Type::Document}, |
129 | {QStringLiteral("application/vnd.amazon.mobi8-ebook" ), Type::Document}, |
130 | {QStringLiteral("application/x-mobipocket-ebook" ), Type::Document}, |
131 | // Graphic EBooks |
132 | {QStringLiteral("application/vnd.comicbook-rar" ), Type::Document}, |
133 | {QStringLiteral("application/vnd.comicbook+zip" ), Type::Document}, |
134 | {QStringLiteral("application/x-cb7" ), Type::Document}, |
135 | {QStringLiteral("application/x-cbt" ), Type::Document}, |
136 | // Archives - https://en.wikipedia.org/wiki/List_of_archive_formats |
137 | {QStringLiteral("application/gzip" ), Type::Archive}, |
138 | {QStringLiteral("application/x-tar" ), Type::Archive}, |
139 | {QStringLiteral("application/x-tarz" ), Type::Archive}, |
140 | {QStringLiteral("application/x-arc" ), Type::Archive}, |
141 | {QStringLiteral("application/x-archive" ), Type::Archive}, |
142 | {QStringLiteral("application/x-bzip" ), Type::Archive}, |
143 | {QStringLiteral("application/x-cpio" ), Type::Archive}, |
144 | {QStringLiteral("application/x-lha" ), Type::Archive}, |
145 | {QStringLiteral("application/x-lhz" ), Type::Archive}, |
146 | {QStringLiteral("application/x-lrzip" ), Type::Archive}, |
147 | {QStringLiteral("application/x-lz4" ), Type::Archive}, |
148 | {QStringLiteral("application/x-lzip" ), Type::Archive}, |
149 | {QStringLiteral("application/x-lzma" ), Type::Archive}, |
150 | {QStringLiteral("application/x-lzop" ), Type::Archive}, |
151 | {QStringLiteral("application/x-7z-compressed" ), Type::Archive}, |
152 | {QStringLiteral("application/x-ace" ), Type::Archive}, |
153 | {QStringLiteral("application/x-astrotite-afa" ), Type::Archive}, |
154 | {QStringLiteral("application/x-alz" ), Type::Archive}, |
155 | {QStringLiteral("application/vnd.android.package-archive" ), Type::Archive}, |
156 | {QStringLiteral("application/x-arj" ), Type::Archive}, |
157 | {QStringLiteral("application/vnd.ms-cab-compressed" ), Type::Archive}, |
158 | {QStringLiteral("application/x-cfs-compressed" ), Type::Archive}, |
159 | {QStringLiteral("application/x-dar" ), Type::Archive}, |
160 | {QStringLiteral("application/x-lzh" ), Type::Archive}, |
161 | {QStringLiteral("application/x-lzx" ), Type::Archive}, |
162 | {QStringLiteral("application/vnd.rar" ), Type::Archive}, |
163 | {QStringLiteral("application/x-stuffit" ), Type::Archive}, |
164 | {QStringLiteral("application/x-stuffitx" ), Type::Archive}, |
165 | {QStringLiteral("application/x-tzo" ), Type::Archive}, |
166 | {QStringLiteral("application/x-ustar" ), Type::Archive}, |
167 | {QStringLiteral("application/x-xar" ), Type::Archive}, |
168 | {QStringLiteral("application/x-xz" ), Type::Archive}, |
169 | {QStringLiteral("application/x-zoo" ), Type::Archive}, |
170 | {QStringLiteral("application/zip" ), Type::Archive}, |
171 | {QStringLiteral("application/zlib" ), Type::Archive}, |
172 | {QStringLiteral("application/zstd" ), Type::Archive}, |
173 | // WPS office |
174 | {QStringLiteral("application/wps-office.doc" ), Type::Document}, |
175 | {QStringLiteral("application/wps-office.xls" ), Type::Document}, |
176 | {QStringLiteral("application/wps-office.xls" ), Type::Spreadsheet}, |
177 | {QStringLiteral("application/wps-office.pot" ), Type::Document}, |
178 | {QStringLiteral("application/wps-office.pot" ), Type::Presentation}, |
179 | {QStringLiteral("application/wps-office.wps" ), Type::Document}, |
180 | {QStringLiteral("application/wps-office.docx" ), Type::Document}, |
181 | {QStringLiteral("application/wps-office.xlsx" ), Type::Document}, |
182 | {QStringLiteral("application/wps-office.xlsx" ), Type::Spreadsheet}, |
183 | {QStringLiteral("application/wps-office.pptx" ), Type::Document}, |
184 | {QStringLiteral("application/wps-office.pptx" ), Type::Presentation}, |
185 | // Other |
186 | {QStringLiteral("text/markdown" ), Type::Document}, |
187 | {QStringLiteral("image/vnd.djvu+multipage" ), Type::Document}, |
188 | {QStringLiteral("application/x-lyx" ), Type::Document} |
189 | }; |
190 | |
191 | auto hashIt = typeMapper.find(key: mimeType); |
192 | while (hashIt != typeMapper.end() && hashIt.key() == mimeType) { |
193 | types.append(t: hashIt.value()); |
194 | ++hashIt; |
195 | } |
196 | |
197 | return types; |
198 | } |
199 | } // namespace |
200 | |
201 | BasicIndexingJob::~BasicIndexingJob() |
202 | { |
203 | } |
204 | |
205 | bool BasicIndexingJob::index() |
206 | { |
207 | const QByteArray url = QFile::encodeName(fileName: m_filePath); |
208 | auto lastSlash = url.lastIndexOf(c: '/'); |
209 | |
210 | const QByteArray fileName = url.mid(index: lastSlash + 1); |
211 | const QByteArray filePath = url.left(len: lastSlash); |
212 | |
213 | QT_STATBUF statBuf; |
214 | if (filePathToStat(filePath, statBuf) != 0) { |
215 | return false; |
216 | } |
217 | |
218 | Document doc; |
219 | doc.setParentId(statBufToId(stBuf: statBuf)); |
220 | |
221 | if (filePathToStat(filePath: url, statBuf) != 0) { |
222 | return false; |
223 | } |
224 | doc.setId(statBufToId(stBuf: statBuf)); |
225 | doc.setUrl(url); |
226 | |
227 | TermGenerator tg(doc); |
228 | tg.indexFileNameText(text: QFile::decodeName(localFileName: fileName)); |
229 | if (statBuf.st_size == 0) { |
230 | tg.indexText(QStringLiteral("application/x-zerosize" ), prefix: QByteArray("M" )); |
231 | } else { |
232 | tg.indexText(text: m_mimetype, prefix: QByteArray("M" )); |
233 | } |
234 | |
235 | // (Content) Modification time, Metadata (e.g. XAttr) change time |
236 | doc.setMTime(statBuf.st_mtime); |
237 | doc.setCTime(statBuf.st_ctime); |
238 | |
239 | if (S_ISDIR(statBuf.st_mode)) { |
240 | static const QByteArray type = QByteArray("T" ) + QByteArray::number(static_cast<int>(KFileMetaData::Type::Folder)); |
241 | doc.addTerm(term: type); |
242 | // For folders we do not need to go through file indexing, so we do not set contentIndexing |
243 | |
244 | } else if (statBuf.st_size > 0) { |
245 | if (m_indexingLevel == MarkForContentIndexing) { |
246 | doc.setContentIndexing(true); |
247 | } |
248 | // Types |
249 | const QVector<KFileMetaData::Type::Type> tList = typesForMimeType(mimeType: m_mimetype); |
250 | for (KFileMetaData::Type::Type type : tList) { |
251 | QByteArray num = QByteArray::number(static_cast<int>(type)); |
252 | doc.addTerm(term: QByteArray("T" ) + num); |
253 | } |
254 | } |
255 | |
256 | indexXAttr(url: m_filePath, doc); |
257 | |
258 | m_doc = doc; |
259 | return true; |
260 | } |
261 | |