1 | /* |
2 | SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org> |
3 | |
4 | SPDX-License-Identifier: LGPL-2.0-or-later |
5 | */ |
6 | |
7 | #include "config-localedata.h" |
8 | |
9 | #include "isocodes_p.h" |
10 | #include "isocodescache_p.h" |
11 | #include "logging.h" |
12 | |
13 | #include <QDir> |
14 | #include <QFile> |
15 | #include <QFileInfo> |
16 | #include <QJsonArray> |
17 | #include <QJsonDocument> |
18 | #include <QJsonObject> |
19 | #include <QStandardPaths> |
20 | |
21 | using namespace Qt::Literals; |
22 | |
23 | // increment those when changing the format |
24 | enum : uint32_t { |
25 | = 0x4B493102, |
26 | = 0x4B493201, |
27 | }; |
28 | |
29 | static QString isoCodesPath(QStringView file) |
30 | { |
31 | #ifndef Q_OS_ANDROID |
32 | auto path = QStandardPaths::locate(type: QStandardPaths::GenericDataLocation, fileName: QLatin1String("iso-codes/json/" ) + file, options: QStandardPaths::LocateFile); |
33 | if (!path.isEmpty()) { |
34 | return path; |
35 | } |
36 | |
37 | // search manually in the compile-time determined prefix |
38 | // needed for example for non-installed Windows binaries to work, such as unit tests |
39 | for (const char *installLocation : {"/share" , "/bin/data" }) { |
40 | path = QLatin1String(ISO_CODES_PREFIX) + QLatin1String(installLocation) + QLatin1String("/iso-codes/json/" ) + file; |
41 | if (QFileInfo::exists(file: path)) { |
42 | return path; |
43 | } |
44 | } |
45 | |
46 | return {}; |
47 | #else |
48 | return QLatin1String("assets:/share/iso-codes/json/" ) + file; |
49 | #endif |
50 | } |
51 | |
52 | static QString cachePath() |
53 | { |
54 | return QStandardPaths::writableLocation(type: QStandardPaths::GenericCacheLocation) + QLatin1String("/org.kde.ki18n/iso-codes/" ); |
55 | } |
56 | |
57 | static QString cacheFilePath(QStringView file) |
58 | { |
59 | return cachePath() + file; |
60 | } |
61 | |
62 | static void initResources() |
63 | { |
64 | #ifdef HAVE_EMBEDDED_CACHE |
65 | Q_INIT_RESOURCE(isocodescache); |
66 | #endif |
67 | } |
68 | |
69 | IsoCodesCache::~IsoCodesCache() = default; |
70 | |
71 | IsoCodesCache *IsoCodesCache::instance() |
72 | { |
73 | static IsoCodesCache s_cache; |
74 | return &s_cache; |
75 | } |
76 | |
77 | void IsoCodesCache::loadIso3166_1() |
78 | { |
79 | if (!m_iso3166_1CacheData && !loadIso3166_1Cache()) { |
80 | QDir().mkpath(dirPath: cachePath()); |
81 | createIso3166_1Cache(isoCodesPath: isoCodesPath(file: u"iso_3166-1.json" ), cacheFilePath: cacheFilePath(file: u"iso_3166-1" )); |
82 | loadIso3166_1Cache(); |
83 | } |
84 | } |
85 | |
86 | static std::unique_ptr<QFile> openCacheFile(QStringView cacheFileName, QStringView isoCodesFileName) |
87 | { |
88 | QFileInfo jsonFi(isoCodesPath(file: isoCodesFileName)); |
89 | if (!jsonFi.exists()) { // no source file means we can only use an embedded cache |
90 | initResources(); |
91 | auto f = std::make_unique<QFile>(args: QLatin1String(":/org.kde.ki18n/iso-codes/cache/" ) + cacheFileName); |
92 | if (!f->open(flags: QFile::ReadOnly) || f->size() < 8) { |
93 | return {}; |
94 | } |
95 | return f; |
96 | } |
97 | auto f = std::make_unique<QFile>(args: cacheFilePath(file: cacheFileName)); |
98 | if (!f->open(flags: QFile::ReadOnly) || f->fileTime(time: QFile::FileModificationTime) < jsonFi.lastModified() || f->size() < 8) { |
99 | return {}; |
100 | } |
101 | return f; |
102 | } |
103 | |
104 | bool IsoCodesCache::loadIso3166_1Cache() |
105 | { |
106 | auto f = openCacheFile(cacheFileName: u"iso_3166-1" , isoCodesFileName: u"iso_3166-1.json" ); |
107 | if (!f) { |
108 | return false; |
109 | } |
110 | m_iso3166_1CacheSize = f->size(); |
111 | |
112 | // validate cache file is usable |
113 | // header matches |
114 | const auto data = f->map(offset: 0, size: m_iso3166_1CacheSize); |
115 | if (*reinterpret_cast<const uint32_t *>(data) != Iso3166_1CacheHeader) { |
116 | return false; |
117 | } |
118 | // lookup tables fit into the available size |
119 | const auto size = *(reinterpret_cast<const uint32_t *>(data) + 1); |
120 | if (sizeof(Iso3166_1CacheHeader) + sizeof(size) + size * sizeof(MapEntry<uint16_t>) * 2 >= m_iso3166_1CacheSize) { |
121 | return false; |
122 | } |
123 | // string table is 0 terminated |
124 | if (data[m_iso3166_1CacheSize - 1] != '\0') { |
125 | return false; |
126 | } |
127 | |
128 | m_iso3166_1CacheFile = std::move(f); |
129 | m_iso3166_1CacheData = data; |
130 | return true; |
131 | } |
132 | |
133 | uint32_t IsoCodesCache::countryCount() const |
134 | { |
135 | return m_iso3166_1CacheData ? *(reinterpret_cast<const uint32_t *>(m_iso3166_1CacheData) + 1) : 0; |
136 | } |
137 | |
138 | const MapEntry<uint16_t> *IsoCodesCache::countryNameMapBegin() const |
139 | { |
140 | return m_iso3166_1CacheData ? reinterpret_cast<const MapEntry<uint16_t> *>(m_iso3166_1CacheData + sizeof(uint32_t) * 2) : nullptr; |
141 | } |
142 | |
143 | const MapEntry<uint16_t> *IsoCodesCache::countryAlpha3MapBegin() const |
144 | { |
145 | return m_iso3166_1CacheData ? countryNameMapBegin() + countryCount() : nullptr; |
146 | } |
147 | |
148 | const char *IsoCodesCache::countryStringTableLookup(uint16_t offset) const |
149 | { |
150 | if (m_iso3166_1CacheData) { |
151 | const auto pos = offset + 2 * sizeof(uint32_t) + 2 * countryCount() * sizeof(MapEntry<uint16_t>); |
152 | return m_iso3166_1CacheSize > pos ? reinterpret_cast<const char *>(m_iso3166_1CacheData + pos) : nullptr; |
153 | } |
154 | return nullptr; |
155 | } |
156 | |
157 | [[nodiscard]] static QByteArray nameForIso3166_1(const QJsonObject &entry) |
158 | { |
159 | if (const auto commonName = entry.value(key: "common_name"_L1 ).toString(); !commonName.isEmpty()) { |
160 | return commonName.toUtf8(); |
161 | } |
162 | return entry.value(key: "name"_L1 ).toString().toUtf8(); |
163 | } |
164 | |
165 | void IsoCodesCache::createIso3166_1Cache(const QString &isoCodesPath, const QString &cacheFilePath) |
166 | { |
167 | qCDebug(KI18NLD) << "Rebuilding ISO 3166-1 cache" ; |
168 | |
169 | QFile file(isoCodesPath); |
170 | if (!file.open(flags: QFile::ReadOnly)) { |
171 | qCWarning(KI18NLD) << "Unable to open iso_3166-1.json" << isoCodesPath << file.errorString(); |
172 | return; |
173 | } |
174 | |
175 | std::vector<MapEntry<uint16_t>> alpha2NameMap; |
176 | std::vector<MapEntry<uint16_t>> alpha3alpha2Map; |
177 | QByteArray iso3166_1stringTable; |
178 | |
179 | const auto doc = QJsonDocument::fromJson(json: file.readAll()); |
180 | const auto array = doc.object().value(key: QLatin1String("3166-1" )).toArray(); |
181 | for (const auto &entryVal : array) { |
182 | const auto entry = entryVal.toObject(); |
183 | const auto alpha2 = entry.value(key: QLatin1String("alpha_2" )).toString(); |
184 | if (alpha2.size() != 2) { |
185 | continue; |
186 | } |
187 | const auto alpha2Key = IsoCodes::alpha2CodeToKey(code: alpha2); |
188 | |
189 | assert(std::numeric_limits<uint16_t>::max() > iso3166_1stringTable.size()); |
190 | alpha2NameMap.push_back(x: {.key: alpha2Key, .value: (uint16_t)iso3166_1stringTable.size()}); |
191 | iso3166_1stringTable.append(a: nameForIso3166_1(entry)); |
192 | iso3166_1stringTable.append(c: '\0'); |
193 | |
194 | const auto alpha3Key = IsoCodes::alpha3CodeToKey(code: entry.value(key: QLatin1String("alpha_3" )).toString()); |
195 | alpha3alpha2Map.push_back(x: {.key: alpha3Key, .value: alpha2Key}); |
196 | } |
197 | |
198 | std::sort(first: alpha2NameMap.begin(), last: alpha2NameMap.end()); |
199 | std::sort(first: alpha3alpha2Map.begin(), last: alpha3alpha2Map.end()); |
200 | |
201 | // write out binary cache file |
202 | QFile cache(cacheFilePath); |
203 | if (!cache.open(flags: QFile::WriteOnly)) { |
204 | qCWarning(KI18NLD) << "Failed to write ISO 3166-1 cache:" << cache.errorString() << cache.fileName(); |
205 | return; |
206 | } |
207 | |
208 | uint32_t n = Iso3166_1CacheHeader; |
209 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // header |
210 | n = alpha2NameMap.size(); |
211 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // size |
212 | for (auto entry : alpha2NameMap) { |
213 | cache.write(data: reinterpret_cast<const char *>(&entry), len: sizeof(entry)); |
214 | } |
215 | for (auto entry : alpha3alpha2Map) { |
216 | cache.write(data: reinterpret_cast<const char *>(&entry), len: sizeof(entry)); |
217 | } |
218 | cache.write(data: iso3166_1stringTable); |
219 | } |
220 | |
221 | void IsoCodesCache::loadIso3166_2() |
222 | { |
223 | if (!m_iso3166_2CacheData && !loadIso3166_2Cache()) { |
224 | QDir().mkpath(dirPath: cachePath()); |
225 | createIso3166_2Cache(isoCodesPath: isoCodesPath(file: u"iso_3166-2.json" ), cacheFilePath: cacheFilePath(file: u"iso_3166-2" )); |
226 | loadIso3166_2Cache(); |
227 | } |
228 | } |
229 | |
230 | bool IsoCodesCache::loadIso3166_2Cache() |
231 | { |
232 | auto f = openCacheFile(cacheFileName: u"iso_3166-2" , isoCodesFileName: u"iso_3166-2.json" ); |
233 | if (!f) { |
234 | return false; |
235 | } |
236 | m_iso3166_2CacheSize = f->size(); |
237 | |
238 | // validate cache file is usable |
239 | // header matches |
240 | const auto data = f->map(offset: 0, size: m_iso3166_2CacheSize); |
241 | if (*reinterpret_cast<const uint32_t *>(data) != Iso3166_2CacheHeader) { |
242 | return false; |
243 | } |
244 | // name lookup table fits into the available size |
245 | auto size = *(reinterpret_cast<const uint32_t *>(data) + 1); |
246 | auto offset = 3 * sizeof(uint32_t) + size * sizeof(MapEntry<uint32_t>); |
247 | if (offset >= m_iso3166_2CacheSize) { |
248 | return false; |
249 | } |
250 | // hierarchy map boundary check |
251 | size = *(reinterpret_cast<const uint32_t *>(data + offset) - 1); |
252 | offset += size * sizeof(MapEntry<uint32_t>); |
253 | if (offset >= m_iso3166_2CacheSize) { |
254 | return false; |
255 | } |
256 | // string table is 0 terminated |
257 | if (data[m_iso3166_2CacheSize - 1] != '\0') { |
258 | return false; |
259 | } |
260 | |
261 | m_iso3166_2CacheFile = std::move(f); |
262 | m_iso3166_2CacheData = data; |
263 | return true; |
264 | } |
265 | |
266 | uint32_t IsoCodesCache::subdivisionCount() const |
267 | { |
268 | return m_iso3166_2CacheData ? *(reinterpret_cast<const uint32_t *>(m_iso3166_2CacheData) + 1) : 0; |
269 | } |
270 | |
271 | const MapEntry<uint32_t> *IsoCodesCache::subdivisionNameMapBegin() const |
272 | { |
273 | return m_iso3166_2CacheData ? reinterpret_cast<const MapEntry<uint32_t> *>(m_iso3166_2CacheData + 2 * sizeof(uint32_t)) : nullptr; |
274 | } |
275 | |
276 | uint32_t IsoCodesCache::subdivisionHierachyMapSize() const |
277 | { |
278 | return m_iso3166_2CacheData |
279 | ? *(reinterpret_cast<const uint32_t *>(m_iso3166_2CacheData + 2 * sizeof(uint32_t) + subdivisionCount() * sizeof(MapEntry<uint32_t>))) |
280 | : 0; |
281 | } |
282 | |
283 | const MapEntry<uint32_t> *IsoCodesCache::subdivisionParentMapBegin() const |
284 | { |
285 | return m_iso3166_2CacheData |
286 | ? reinterpret_cast<const MapEntry<uint32_t> *>(m_iso3166_2CacheData + 3 * sizeof(uint32_t) + subdivisionCount() * sizeof(MapEntry<uint32_t>)) |
287 | : nullptr; |
288 | } |
289 | |
290 | const char *IsoCodesCache::subdivisionStringTableLookup(uint16_t offset) const |
291 | { |
292 | if (m_iso3166_2CacheData) { |
293 | const auto pos = offset + 3 * sizeof(uint32_t) + (subdivisionCount() + subdivisionHierachyMapSize()) * sizeof(MapEntry<uint32_t>); |
294 | return m_iso3166_2CacheSize > pos ? reinterpret_cast<const char *>(m_iso3166_2CacheData + pos) : nullptr; |
295 | } |
296 | return nullptr; |
297 | } |
298 | |
299 | void IsoCodesCache::createIso3166_2Cache(const QString &isoCodesPath, const QString &cacheFilePath) |
300 | { |
301 | qCDebug(KI18NLD) << "Rebuilding ISO 3166-2 cache" ; |
302 | QFile file(isoCodesPath); |
303 | if (!file.open(flags: QFile::ReadOnly)) { |
304 | qCWarning(KI18NLD) << "Unable to open iso_3166-2.json" << isoCodesPath << file.errorString(); |
305 | return; |
306 | } |
307 | |
308 | std::vector<MapEntry<uint32_t>> subdivNameMap; |
309 | std::vector<MapEntry<uint32_t>> subdivParentMap; |
310 | QByteArray iso3166_2stringTable; |
311 | |
312 | const auto doc = QJsonDocument::fromJson(json: file.readAll()); |
313 | const auto array = doc.object().value(key: QLatin1String("3166-2" )).toArray(); |
314 | for (const auto &entryVal : array) { |
315 | const auto entry = entryVal.toObject(); |
316 | const auto key = IsoCodes::subdivisionCodeToKey(code: entry.value(key: QLatin1String("code" )).toString()); |
317 | |
318 | assert(std::numeric_limits<uint16_t>::max() > iso3166_2stringTable.size()); |
319 | subdivNameMap.push_back(x: {.key: key, .value: (uint16_t)iso3166_2stringTable.size()}); |
320 | iso3166_2stringTable.append(a: entry.value(key: QLatin1String("name" )).toString().toUtf8()); |
321 | iso3166_2stringTable.append(c: '\0'); |
322 | |
323 | const auto parentKey = IsoCodes::parentCodeToKey(code: entry.value(key: QLatin1String("parent" )).toString()); |
324 | if (parentKey) { |
325 | subdivParentMap.push_back(x: {.key: key, .value: parentKey}); |
326 | } |
327 | } |
328 | |
329 | std::sort(first: subdivNameMap.begin(), last: subdivNameMap.end()); |
330 | std::sort(first: subdivParentMap.begin(), last: subdivParentMap.end()); |
331 | |
332 | // write out binary cache file |
333 | QFile cache(cacheFilePath); |
334 | if (!cache.open(flags: QFile::WriteOnly)) { |
335 | qCWarning(KI18NLD) << "Failed to write ISO 3166-2 cache:" << cache.errorString() << cache.fileName(); |
336 | return; |
337 | } |
338 | |
339 | uint32_t n = Iso3166_2CacheHeader; |
340 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // header |
341 | n = subdivNameMap.size(); |
342 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // size of the name map |
343 | for (auto entry : subdivNameMap) { |
344 | cache.write(data: reinterpret_cast<const char *>(&entry), len: sizeof(entry)); |
345 | } |
346 | n = subdivParentMap.size(); |
347 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // size of the hierarchy map |
348 | for (auto entry : subdivParentMap) { |
349 | cache.write(data: reinterpret_cast<const char *>(&entry), len: sizeof(entry)); |
350 | } |
351 | cache.write(data: iso3166_2stringTable); |
352 | } |
353 | |