1 | /* |
2 | SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org> |
3 | |
4 | SPDX-License-Identifier: LGPL-2.0-or-later |
5 | */ |
6 | |
7 | #include "config-localedata.h" |
8 | |
9 | #include "isocodes_p.h" |
10 | #include "isocodescache_p.h" |
11 | #include "logging.h" |
12 | |
13 | #include <QDir> |
14 | #include <QFile> |
15 | #include <QFileInfo> |
16 | #include <QJsonArray> |
17 | #include <QJsonDocument> |
18 | #include <QJsonObject> |
19 | #include <QStandardPaths> |
20 | |
21 | // increment those when changing the format |
22 | enum : uint32_t { |
23 | = 0x4B493101, |
24 | = 0x4B493201, |
25 | }; |
26 | |
27 | static QString isoCodesPath(QStringView file) |
28 | { |
29 | #ifndef Q_OS_ANDROID |
30 | auto path = QStandardPaths::locate(type: QStandardPaths::GenericDataLocation, fileName: QLatin1String("iso-codes/json/" ) + file, options: QStandardPaths::LocateFile); |
31 | if (!path.isEmpty()) { |
32 | return path; |
33 | } |
34 | |
35 | // search manually in the compile-time determined prefix |
36 | // needed for example for non-installed Windows binaries to work, such as unit tests |
37 | for (const char *installLocation : {"/share" , "/bin/data" }) { |
38 | path = QLatin1String(ISO_CODES_PREFIX) + QLatin1String(installLocation) + QLatin1String("/iso-codes/json/" ) + file; |
39 | if (QFileInfo::exists(file: path)) { |
40 | return path; |
41 | } |
42 | } |
43 | |
44 | return {}; |
45 | #else |
46 | return QLatin1String("assets:/share/iso-codes/json/" ) + file; |
47 | #endif |
48 | } |
49 | |
50 | static QString cachePath() |
51 | { |
52 | return QStandardPaths::writableLocation(type: QStandardPaths::GenericCacheLocation) + QLatin1String("/org.kde.ki18n/iso-codes/" ); |
53 | } |
54 | |
55 | static QString cacheFilePath(QStringView file) |
56 | { |
57 | return cachePath() + file; |
58 | } |
59 | |
60 | IsoCodesCache::~IsoCodesCache() = default; |
61 | |
62 | IsoCodesCache *IsoCodesCache::instance() |
63 | { |
64 | static IsoCodesCache s_cache; |
65 | return &s_cache; |
66 | } |
67 | |
68 | void IsoCodesCache::loadIso3166_1() |
69 | { |
70 | if (!m_iso3166_1CacheData && !loadIso3166_1Cache()) { |
71 | QDir().mkpath(dirPath: cachePath()); |
72 | createIso3166_1Cache(isoCodesPath: isoCodesPath(file: u"iso_3166-1.json" ), cacheFilePath: cacheFilePath(file: u"iso_3166-1" )); |
73 | loadIso3166_1Cache(); |
74 | } |
75 | } |
76 | |
77 | static std::unique_ptr<QFile> openCacheFile(QStringView cacheFileName, QStringView isoCodesFileName) |
78 | { |
79 | QFileInfo jsonFi(isoCodesPath(file: isoCodesFileName)); |
80 | if (!jsonFi.exists()) { // no source file means we can only use an embedded cache |
81 | auto f = std::make_unique<QFile>(args: QLatin1String(":/org.kde.ki18n/iso-codes/cache/" ) + cacheFileName); |
82 | if (!f->open(flags: QFile::ReadOnly) || f->size() < 8) { |
83 | return {}; |
84 | } |
85 | return f; |
86 | } |
87 | auto f = std::make_unique<QFile>(args: cacheFilePath(file: cacheFileName)); |
88 | if (!f->open(flags: QFile::ReadOnly) || f->fileTime(time: QFile::FileModificationTime) < jsonFi.lastModified() || f->size() < 8) { |
89 | return {}; |
90 | } |
91 | return f; |
92 | } |
93 | |
94 | bool IsoCodesCache::loadIso3166_1Cache() |
95 | { |
96 | auto f = openCacheFile(cacheFileName: u"iso_3166-1" , isoCodesFileName: u"iso_3166-1.json" ); |
97 | if (!f) { |
98 | return false; |
99 | } |
100 | m_iso3166_1CacheSize = f->size(); |
101 | |
102 | // validate cache file is usable |
103 | // header matches |
104 | const auto data = f->map(offset: 0, size: m_iso3166_1CacheSize); |
105 | if (*reinterpret_cast<const uint32_t *>(data) != Iso3166_1CacheHeader) { |
106 | return false; |
107 | } |
108 | // lookup tables fit into the available size |
109 | const auto size = *(reinterpret_cast<const uint32_t *>(data) + 1); |
110 | if (sizeof(Iso3166_1CacheHeader) + sizeof(size) + size * sizeof(MapEntry<uint16_t>) * 2 >= m_iso3166_1CacheSize) { |
111 | return false; |
112 | } |
113 | // string table is 0 terminated |
114 | if (data[m_iso3166_1CacheSize - 1] != '\0') { |
115 | return false; |
116 | } |
117 | |
118 | m_iso3166_1CacheFile = std::move(f); |
119 | m_iso3166_1CacheData = data; |
120 | return true; |
121 | } |
122 | |
123 | uint32_t IsoCodesCache::countryCount() const |
124 | { |
125 | return m_iso3166_1CacheData ? *(reinterpret_cast<const uint32_t *>(m_iso3166_1CacheData) + 1) : 0; |
126 | } |
127 | |
128 | const MapEntry<uint16_t> *IsoCodesCache::countryNameMapBegin() const |
129 | { |
130 | return m_iso3166_1CacheData ? reinterpret_cast<const MapEntry<uint16_t> *>(m_iso3166_1CacheData + sizeof(uint32_t) * 2) : nullptr; |
131 | } |
132 | |
133 | const MapEntry<uint16_t> *IsoCodesCache::countryAlpha3MapBegin() const |
134 | { |
135 | return m_iso3166_1CacheData ? countryNameMapBegin() + countryCount() : nullptr; |
136 | } |
137 | |
138 | const char *IsoCodesCache::countryStringTableLookup(uint16_t offset) const |
139 | { |
140 | if (m_iso3166_1CacheData) { |
141 | const auto pos = offset + 2 * sizeof(uint32_t) + 2 * countryCount() * sizeof(MapEntry<uint16_t>); |
142 | return m_iso3166_1CacheSize > pos ? reinterpret_cast<const char *>(m_iso3166_1CacheData + pos) : nullptr; |
143 | } |
144 | return nullptr; |
145 | } |
146 | |
147 | void IsoCodesCache::createIso3166_1Cache(const QString &isoCodesPath, const QString &cacheFilePath) |
148 | { |
149 | qCDebug(KI18NLD) << "Rebuilding ISO 3166-1 cache" ; |
150 | |
151 | QFile file(isoCodesPath); |
152 | if (!file.open(flags: QFile::ReadOnly)) { |
153 | qCWarning(KI18NLD) << "Unable to open iso_3166-1.json" << isoCodesPath << file.errorString(); |
154 | return; |
155 | } |
156 | |
157 | std::vector<MapEntry<uint16_t>> alpha2NameMap; |
158 | std::vector<MapEntry<uint16_t>> alpha3alpha2Map; |
159 | QByteArray iso3166_1stringTable; |
160 | |
161 | const auto doc = QJsonDocument::fromJson(json: file.readAll()); |
162 | const auto array = doc.object().value(key: QLatin1String("3166-1" )).toArray(); |
163 | for (const auto &entryVal : array) { |
164 | const auto entry = entryVal.toObject(); |
165 | const auto alpha2 = entry.value(key: QLatin1String("alpha_2" )).toString(); |
166 | if (alpha2.size() != 2) { |
167 | continue; |
168 | } |
169 | const auto alpha2Key = IsoCodes::alpha2CodeToKey(code: alpha2); |
170 | |
171 | assert(std::numeric_limits<uint16_t>::max() > iso3166_1stringTable.size()); |
172 | alpha2NameMap.push_back(x: {.key: alpha2Key, .value: (uint16_t)iso3166_1stringTable.size()}); |
173 | iso3166_1stringTable.append(a: entry.value(key: QLatin1String("name" )).toString().toUtf8()); |
174 | iso3166_1stringTable.append(c: '\0'); |
175 | |
176 | const auto alpha3Key = IsoCodes::alpha3CodeToKey(code: entry.value(key: QLatin1String("alpha_3" )).toString()); |
177 | alpha3alpha2Map.push_back(x: {.key: alpha3Key, .value: alpha2Key}); |
178 | } |
179 | |
180 | std::sort(first: alpha2NameMap.begin(), last: alpha2NameMap.end()); |
181 | std::sort(first: alpha3alpha2Map.begin(), last: alpha3alpha2Map.end()); |
182 | |
183 | // write out binary cache file |
184 | QFile cache(cacheFilePath); |
185 | if (!cache.open(flags: QFile::WriteOnly)) { |
186 | qCWarning(KI18NLD) << "Failed to write ISO 3166-1 cache:" << cache.errorString() << cache.fileName(); |
187 | return; |
188 | } |
189 | |
190 | uint32_t n = Iso3166_1CacheHeader; |
191 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // header |
192 | n = alpha2NameMap.size(); |
193 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // size |
194 | for (auto entry : alpha2NameMap) { |
195 | cache.write(data: reinterpret_cast<const char *>(&entry), len: sizeof(entry)); |
196 | } |
197 | for (auto entry : alpha3alpha2Map) { |
198 | cache.write(data: reinterpret_cast<const char *>(&entry), len: sizeof(entry)); |
199 | } |
200 | cache.write(data: iso3166_1stringTable); |
201 | } |
202 | |
203 | void IsoCodesCache::loadIso3166_2() |
204 | { |
205 | if (!m_iso3166_2CacheData && !loadIso3166_2Cache()) { |
206 | QDir().mkpath(dirPath: cachePath()); |
207 | createIso3166_2Cache(isoCodesPath: isoCodesPath(file: u"iso_3166-2.json" ), cacheFilePath: cacheFilePath(file: u"iso_3166-2" )); |
208 | loadIso3166_2Cache(); |
209 | } |
210 | } |
211 | |
212 | bool IsoCodesCache::loadIso3166_2Cache() |
213 | { |
214 | auto f = openCacheFile(cacheFileName: u"iso_3166-2" , isoCodesFileName: u"iso_3166-2.json" ); |
215 | if (!f) { |
216 | return false; |
217 | } |
218 | m_iso3166_2CacheSize = f->size(); |
219 | |
220 | // validate cache file is usable |
221 | // header matches |
222 | const auto data = f->map(offset: 0, size: m_iso3166_2CacheSize); |
223 | if (*reinterpret_cast<const uint32_t *>(data) != Iso3166_2CacheHeader) { |
224 | return false; |
225 | } |
226 | // name lookup table fits into the available size |
227 | auto size = *(reinterpret_cast<const uint32_t *>(data) + 1); |
228 | auto offset = 3 * sizeof(uint32_t) + size * sizeof(MapEntry<uint32_t>); |
229 | if (offset >= m_iso3166_2CacheSize) { |
230 | return false; |
231 | } |
232 | // hierarchy map boundary check |
233 | size = *(reinterpret_cast<const uint32_t *>(data + offset) - 1); |
234 | offset += size * sizeof(MapEntry<uint32_t>); |
235 | if (offset >= m_iso3166_2CacheSize) { |
236 | return false; |
237 | } |
238 | // string table is 0 terminated |
239 | if (data[m_iso3166_2CacheSize - 1] != '\0') { |
240 | return false; |
241 | } |
242 | |
243 | m_iso3166_2CacheFile = std::move(f); |
244 | m_iso3166_2CacheData = data; |
245 | return true; |
246 | } |
247 | |
248 | uint32_t IsoCodesCache::subdivisionCount() const |
249 | { |
250 | return m_iso3166_2CacheData ? *(reinterpret_cast<const uint32_t *>(m_iso3166_2CacheData) + 1) : 0; |
251 | } |
252 | |
253 | const MapEntry<uint32_t> *IsoCodesCache::subdivisionNameMapBegin() const |
254 | { |
255 | return m_iso3166_2CacheData ? reinterpret_cast<const MapEntry<uint32_t> *>(m_iso3166_2CacheData + 2 * sizeof(uint32_t)) : nullptr; |
256 | } |
257 | |
258 | uint32_t IsoCodesCache::subdivisionHierachyMapSize() const |
259 | { |
260 | return m_iso3166_2CacheData |
261 | ? *(reinterpret_cast<const uint32_t *>(m_iso3166_2CacheData + 2 * sizeof(uint32_t) + subdivisionCount() * sizeof(MapEntry<uint32_t>))) |
262 | : 0; |
263 | } |
264 | |
265 | const MapEntry<uint32_t> *IsoCodesCache::subdivisionParentMapBegin() const |
266 | { |
267 | return m_iso3166_2CacheData |
268 | ? reinterpret_cast<const MapEntry<uint32_t> *>(m_iso3166_2CacheData + 3 * sizeof(uint32_t) + subdivisionCount() * sizeof(MapEntry<uint32_t>)) |
269 | : nullptr; |
270 | } |
271 | |
272 | const char *IsoCodesCache::subdivisionStringTableLookup(uint16_t offset) const |
273 | { |
274 | if (m_iso3166_2CacheData) { |
275 | const auto pos = offset + 3 * sizeof(uint32_t) + (subdivisionCount() + subdivisionHierachyMapSize()) * sizeof(MapEntry<uint32_t>); |
276 | return m_iso3166_2CacheSize > pos ? reinterpret_cast<const char *>(m_iso3166_2CacheData + pos) : nullptr; |
277 | } |
278 | return nullptr; |
279 | } |
280 | |
281 | void IsoCodesCache::createIso3166_2Cache(const QString &isoCodesPath, const QString &cacheFilePath) |
282 | { |
283 | qCDebug(KI18NLD) << "Rebuilding ISO 3166-2 cache" ; |
284 | QFile file(isoCodesPath); |
285 | if (!file.open(flags: QFile::ReadOnly)) { |
286 | qCWarning(KI18NLD) << "Unable to open iso_3166-2.json" << isoCodesPath << file.errorString(); |
287 | return; |
288 | } |
289 | |
290 | std::vector<MapEntry<uint32_t>> subdivNameMap; |
291 | std::vector<MapEntry<uint32_t>> subdivParentMap; |
292 | QByteArray iso3166_2stringTable; |
293 | |
294 | const auto doc = QJsonDocument::fromJson(json: file.readAll()); |
295 | const auto array = doc.object().value(key: QLatin1String("3166-2" )).toArray(); |
296 | for (const auto &entryVal : array) { |
297 | const auto entry = entryVal.toObject(); |
298 | const auto key = IsoCodes::subdivisionCodeToKey(code: entry.value(key: QLatin1String("code" )).toString()); |
299 | |
300 | assert(std::numeric_limits<uint16_t>::max() > iso3166_2stringTable.size()); |
301 | subdivNameMap.push_back(x: {.key: key, .value: (uint16_t)iso3166_2stringTable.size()}); |
302 | iso3166_2stringTable.append(a: entry.value(key: QLatin1String("name" )).toString().toUtf8()); |
303 | iso3166_2stringTable.append(c: '\0'); |
304 | |
305 | const auto parentKey = IsoCodes::parentCodeToKey(code: entry.value(key: QLatin1String("parent" )).toString()); |
306 | if (parentKey) { |
307 | subdivParentMap.push_back(x: {.key: key, .value: parentKey}); |
308 | } |
309 | } |
310 | |
311 | std::sort(first: subdivNameMap.begin(), last: subdivNameMap.end()); |
312 | std::sort(first: subdivParentMap.begin(), last: subdivParentMap.end()); |
313 | |
314 | // write out binary cache file |
315 | QFile cache(cacheFilePath); |
316 | if (!cache.open(flags: QFile::WriteOnly)) { |
317 | qCWarning(KI18NLD) << "Failed to write ISO 3166-2 cache:" << cache.errorString() << cache.fileName(); |
318 | return; |
319 | } |
320 | |
321 | uint32_t n = Iso3166_2CacheHeader; |
322 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // header |
323 | n = subdivNameMap.size(); |
324 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // size of the name map |
325 | for (auto entry : subdivNameMap) { |
326 | cache.write(data: reinterpret_cast<const char *>(&entry), len: sizeof(entry)); |
327 | } |
328 | n = subdivParentMap.size(); |
329 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // size of the hierarchy map |
330 | for (auto entry : subdivParentMap) { |
331 | cache.write(data: reinterpret_cast<const char *>(&entry), len: sizeof(entry)); |
332 | } |
333 | cache.write(data: iso3166_2stringTable); |
334 | } |
335 | |