| 1 | /* |
| 2 | SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org> |
| 3 | |
| 4 | SPDX-License-Identifier: LGPL-2.0-or-later |
| 5 | */ |
| 6 | |
| 7 | #include "config-localedata.h" |
| 8 | |
| 9 | #include "isocodes_p.h" |
| 10 | #include "isocodescache_p.h" |
| 11 | #include "logging.h" |
| 12 | |
| 13 | #include <QDir> |
| 14 | #include <QFile> |
| 15 | #include <QFileInfo> |
| 16 | #include <QJsonArray> |
| 17 | #include <QJsonDocument> |
| 18 | #include <QJsonObject> |
| 19 | #include <QStandardPaths> |
| 20 | |
| 21 | using namespace Qt::Literals; |
| 22 | |
| 23 | // increment those when changing the format |
| 24 | enum : uint32_t { |
| 25 | = 0x4B493102, |
| 26 | = 0x4B493201, |
| 27 | }; |
| 28 | |
| 29 | static QString isoCodesPath(QStringView file) |
| 30 | { |
| 31 | #ifndef Q_OS_ANDROID |
| 32 | auto path = QStandardPaths::locate(type: QStandardPaths::GenericDataLocation, fileName: QLatin1String("iso-codes/json/" ) + file, options: QStandardPaths::LocateFile); |
| 33 | if (!path.isEmpty()) { |
| 34 | return path; |
| 35 | } |
| 36 | |
| 37 | // search manually in the compile-time determined prefix |
| 38 | // needed for example for non-installed Windows binaries to work, such as unit tests |
| 39 | for (const char *installLocation : {"/share" , "/bin/data" }) { |
| 40 | path = QLatin1String(ISO_CODES_PREFIX) + QLatin1String(installLocation) + QLatin1String("/iso-codes/json/" ) + file; |
| 41 | if (QFileInfo::exists(file: path)) { |
| 42 | return path; |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | return {}; |
| 47 | #else |
| 48 | return QLatin1String("assets:/share/iso-codes/json/" ) + file; |
| 49 | #endif |
| 50 | } |
| 51 | |
| 52 | static QString cachePath() |
| 53 | { |
| 54 | return QStandardPaths::writableLocation(type: QStandardPaths::GenericCacheLocation) + QLatin1String("/org.kde.ki18n/iso-codes/" ); |
| 55 | } |
| 56 | |
| 57 | static QString cacheFilePath(QStringView file) |
| 58 | { |
| 59 | return cachePath() + file; |
| 60 | } |
| 61 | |
| 62 | static void initResources() |
| 63 | { |
| 64 | #ifdef HAVE_EMBEDDED_CACHE |
| 65 | Q_INIT_RESOURCE(isocodescache); |
| 66 | #endif |
| 67 | } |
| 68 | |
| 69 | IsoCodesCache::~IsoCodesCache() = default; |
| 70 | |
| 71 | IsoCodesCache *IsoCodesCache::instance() |
| 72 | { |
| 73 | static IsoCodesCache s_cache; |
| 74 | return &s_cache; |
| 75 | } |
| 76 | |
| 77 | void IsoCodesCache::loadIso3166_1() |
| 78 | { |
| 79 | if (!m_iso3166_1CacheData && !loadIso3166_1Cache()) { |
| 80 | QDir().mkpath(dirPath: cachePath()); |
| 81 | createIso3166_1Cache(isoCodesPath: isoCodesPath(file: u"iso_3166-1.json" ), cacheFilePath: cacheFilePath(file: u"iso_3166-1" )); |
| 82 | loadIso3166_1Cache(); |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | static std::unique_ptr<QFile> openCacheFile(QStringView cacheFileName, QStringView isoCodesFileName) |
| 87 | { |
| 88 | QFileInfo jsonFi(isoCodesPath(file: isoCodesFileName)); |
| 89 | if (!jsonFi.exists()) { // no source file means we can only use an embedded cache |
| 90 | initResources(); |
| 91 | auto f = std::make_unique<QFile>(args: QLatin1String(":/org.kde.ki18n/iso-codes/cache/" ) + cacheFileName); |
| 92 | if (!f->open(flags: QFile::ReadOnly) || f->size() < 8) { |
| 93 | return {}; |
| 94 | } |
| 95 | return f; |
| 96 | } |
| 97 | auto f = std::make_unique<QFile>(args: cacheFilePath(file: cacheFileName)); |
| 98 | if (!f->open(flags: QFile::ReadOnly) || f->fileTime(time: QFile::FileModificationTime) < jsonFi.lastModified() || f->size() < 8) { |
| 99 | return {}; |
| 100 | } |
| 101 | return f; |
| 102 | } |
| 103 | |
| 104 | bool IsoCodesCache::loadIso3166_1Cache() |
| 105 | { |
| 106 | auto f = openCacheFile(cacheFileName: u"iso_3166-1" , isoCodesFileName: u"iso_3166-1.json" ); |
| 107 | if (!f) { |
| 108 | return false; |
| 109 | } |
| 110 | m_iso3166_1CacheSize = f->size(); |
| 111 | |
| 112 | // validate cache file is usable |
| 113 | // header matches |
| 114 | const auto data = f->map(offset: 0, size: m_iso3166_1CacheSize); |
| 115 | if (*reinterpret_cast<const uint32_t *>(data) != Iso3166_1CacheHeader) { |
| 116 | return false; |
| 117 | } |
| 118 | // lookup tables fit into the available size |
| 119 | const auto size = *(reinterpret_cast<const uint32_t *>(data) + 1); |
| 120 | if (sizeof(Iso3166_1CacheHeader) + sizeof(size) + size * sizeof(MapEntry<uint16_t>) * 2 >= m_iso3166_1CacheSize) { |
| 121 | return false; |
| 122 | } |
| 123 | // string table is 0 terminated |
| 124 | if (data[m_iso3166_1CacheSize - 1] != '\0') { |
| 125 | return false; |
| 126 | } |
| 127 | |
| 128 | m_iso3166_1CacheFile = std::move(f); |
| 129 | m_iso3166_1CacheData = data; |
| 130 | return true; |
| 131 | } |
| 132 | |
| 133 | uint32_t IsoCodesCache::countryCount() const |
| 134 | { |
| 135 | return m_iso3166_1CacheData ? *(reinterpret_cast<const uint32_t *>(m_iso3166_1CacheData) + 1) : 0; |
| 136 | } |
| 137 | |
| 138 | const MapEntry<uint16_t> *IsoCodesCache::countryNameMapBegin() const |
| 139 | { |
| 140 | return m_iso3166_1CacheData ? reinterpret_cast<const MapEntry<uint16_t> *>(m_iso3166_1CacheData + sizeof(uint32_t) * 2) : nullptr; |
| 141 | } |
| 142 | |
| 143 | const MapEntry<uint16_t> *IsoCodesCache::countryAlpha3MapBegin() const |
| 144 | { |
| 145 | return m_iso3166_1CacheData ? countryNameMapBegin() + countryCount() : nullptr; |
| 146 | } |
| 147 | |
| 148 | const char *IsoCodesCache::countryStringTableLookup(uint16_t offset) const |
| 149 | { |
| 150 | if (m_iso3166_1CacheData) { |
| 151 | const auto pos = offset + 2 * sizeof(uint32_t) + 2 * countryCount() * sizeof(MapEntry<uint16_t>); |
| 152 | return m_iso3166_1CacheSize > pos ? reinterpret_cast<const char *>(m_iso3166_1CacheData + pos) : nullptr; |
| 153 | } |
| 154 | return nullptr; |
| 155 | } |
| 156 | |
| 157 | [[nodiscard]] static QByteArray nameForIso3166_1(const QJsonObject &entry) |
| 158 | { |
| 159 | if (const auto commonName = entry.value(key: "common_name"_L1 ).toString(); !commonName.isEmpty()) { |
| 160 | return commonName.toUtf8(); |
| 161 | } |
| 162 | return entry.value(key: "name"_L1 ).toString().toUtf8(); |
| 163 | } |
| 164 | |
| 165 | void IsoCodesCache::createIso3166_1Cache(const QString &isoCodesPath, const QString &cacheFilePath) |
| 166 | { |
| 167 | qCDebug(KI18NLD) << "Rebuilding ISO 3166-1 cache" ; |
| 168 | |
| 169 | QFile file(isoCodesPath); |
| 170 | if (!file.open(flags: QFile::ReadOnly)) { |
| 171 | qCWarning(KI18NLD) << "Unable to open iso_3166-1.json" << isoCodesPath << file.errorString(); |
| 172 | return; |
| 173 | } |
| 174 | |
| 175 | std::vector<MapEntry<uint16_t>> alpha2NameMap; |
| 176 | std::vector<MapEntry<uint16_t>> alpha3alpha2Map; |
| 177 | QByteArray iso3166_1stringTable; |
| 178 | |
| 179 | const auto doc = QJsonDocument::fromJson(json: file.readAll()); |
| 180 | const auto array = doc.object().value(key: QLatin1String("3166-1" )).toArray(); |
| 181 | for (const auto &entryVal : array) { |
| 182 | const auto entry = entryVal.toObject(); |
| 183 | const auto alpha2 = entry.value(key: QLatin1String("alpha_2" )).toString(); |
| 184 | if (alpha2.size() != 2) { |
| 185 | continue; |
| 186 | } |
| 187 | const auto alpha2Key = IsoCodes::alpha2CodeToKey(code: alpha2); |
| 188 | |
| 189 | assert(std::numeric_limits<uint16_t>::max() > iso3166_1stringTable.size()); |
| 190 | alpha2NameMap.push_back(x: {.key: alpha2Key, .value: (uint16_t)iso3166_1stringTable.size()}); |
| 191 | iso3166_1stringTable.append(a: nameForIso3166_1(entry)); |
| 192 | iso3166_1stringTable.append(c: '\0'); |
| 193 | |
| 194 | const auto alpha3Key = IsoCodes::alpha3CodeToKey(code: entry.value(key: QLatin1String("alpha_3" )).toString()); |
| 195 | alpha3alpha2Map.push_back(x: {.key: alpha3Key, .value: alpha2Key}); |
| 196 | } |
| 197 | |
| 198 | std::sort(first: alpha2NameMap.begin(), last: alpha2NameMap.end()); |
| 199 | std::sort(first: alpha3alpha2Map.begin(), last: alpha3alpha2Map.end()); |
| 200 | |
| 201 | // write out binary cache file |
| 202 | QFile cache(cacheFilePath); |
| 203 | if (!cache.open(flags: QFile::WriteOnly)) { |
| 204 | qCWarning(KI18NLD) << "Failed to write ISO 3166-1 cache:" << cache.errorString() << cache.fileName(); |
| 205 | return; |
| 206 | } |
| 207 | |
| 208 | uint32_t n = Iso3166_1CacheHeader; |
| 209 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // header |
| 210 | n = alpha2NameMap.size(); |
| 211 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // size |
| 212 | for (auto entry : alpha2NameMap) { |
| 213 | cache.write(data: reinterpret_cast<const char *>(&entry), len: sizeof(entry)); |
| 214 | } |
| 215 | for (auto entry : alpha3alpha2Map) { |
| 216 | cache.write(data: reinterpret_cast<const char *>(&entry), len: sizeof(entry)); |
| 217 | } |
| 218 | cache.write(data: iso3166_1stringTable); |
| 219 | } |
| 220 | |
| 221 | void IsoCodesCache::loadIso3166_2() |
| 222 | { |
| 223 | if (!m_iso3166_2CacheData && !loadIso3166_2Cache()) { |
| 224 | QDir().mkpath(dirPath: cachePath()); |
| 225 | createIso3166_2Cache(isoCodesPath: isoCodesPath(file: u"iso_3166-2.json" ), cacheFilePath: cacheFilePath(file: u"iso_3166-2" )); |
| 226 | loadIso3166_2Cache(); |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | bool IsoCodesCache::loadIso3166_2Cache() |
| 231 | { |
| 232 | auto f = openCacheFile(cacheFileName: u"iso_3166-2" , isoCodesFileName: u"iso_3166-2.json" ); |
| 233 | if (!f) { |
| 234 | return false; |
| 235 | } |
| 236 | m_iso3166_2CacheSize = f->size(); |
| 237 | |
| 238 | // validate cache file is usable |
| 239 | // header matches |
| 240 | const auto data = f->map(offset: 0, size: m_iso3166_2CacheSize); |
| 241 | if (*reinterpret_cast<const uint32_t *>(data) != Iso3166_2CacheHeader) { |
| 242 | return false; |
| 243 | } |
| 244 | // name lookup table fits into the available size |
| 245 | auto size = *(reinterpret_cast<const uint32_t *>(data) + 1); |
| 246 | auto offset = 3 * sizeof(uint32_t) + size * sizeof(MapEntry<uint32_t>); |
| 247 | if (offset >= m_iso3166_2CacheSize) { |
| 248 | return false; |
| 249 | } |
| 250 | // hierarchy map boundary check |
| 251 | size = *(reinterpret_cast<const uint32_t *>(data + offset) - 1); |
| 252 | offset += size * sizeof(MapEntry<uint32_t>); |
| 253 | if (offset >= m_iso3166_2CacheSize) { |
| 254 | return false; |
| 255 | } |
| 256 | // string table is 0 terminated |
| 257 | if (data[m_iso3166_2CacheSize - 1] != '\0') { |
| 258 | return false; |
| 259 | } |
| 260 | |
| 261 | m_iso3166_2CacheFile = std::move(f); |
| 262 | m_iso3166_2CacheData = data; |
| 263 | return true; |
| 264 | } |
| 265 | |
| 266 | uint32_t IsoCodesCache::subdivisionCount() const |
| 267 | { |
| 268 | return m_iso3166_2CacheData ? *(reinterpret_cast<const uint32_t *>(m_iso3166_2CacheData) + 1) : 0; |
| 269 | } |
| 270 | |
| 271 | const MapEntry<uint32_t> *IsoCodesCache::subdivisionNameMapBegin() const |
| 272 | { |
| 273 | return m_iso3166_2CacheData ? reinterpret_cast<const MapEntry<uint32_t> *>(m_iso3166_2CacheData + 2 * sizeof(uint32_t)) : nullptr; |
| 274 | } |
| 275 | |
| 276 | uint32_t IsoCodesCache::subdivisionHierachyMapSize() const |
| 277 | { |
| 278 | return m_iso3166_2CacheData |
| 279 | ? *(reinterpret_cast<const uint32_t *>(m_iso3166_2CacheData + 2 * sizeof(uint32_t) + subdivisionCount() * sizeof(MapEntry<uint32_t>))) |
| 280 | : 0; |
| 281 | } |
| 282 | |
| 283 | const MapEntry<uint32_t> *IsoCodesCache::subdivisionParentMapBegin() const |
| 284 | { |
| 285 | return m_iso3166_2CacheData |
| 286 | ? reinterpret_cast<const MapEntry<uint32_t> *>(m_iso3166_2CacheData + 3 * sizeof(uint32_t) + subdivisionCount() * sizeof(MapEntry<uint32_t>)) |
| 287 | : nullptr; |
| 288 | } |
| 289 | |
| 290 | const char *IsoCodesCache::subdivisionStringTableLookup(uint16_t offset) const |
| 291 | { |
| 292 | if (m_iso3166_2CacheData) { |
| 293 | const auto pos = offset + 3 * sizeof(uint32_t) + (subdivisionCount() + subdivisionHierachyMapSize()) * sizeof(MapEntry<uint32_t>); |
| 294 | return m_iso3166_2CacheSize > pos ? reinterpret_cast<const char *>(m_iso3166_2CacheData + pos) : nullptr; |
| 295 | } |
| 296 | return nullptr; |
| 297 | } |
| 298 | |
| 299 | void IsoCodesCache::createIso3166_2Cache(const QString &isoCodesPath, const QString &cacheFilePath) |
| 300 | { |
| 301 | qCDebug(KI18NLD) << "Rebuilding ISO 3166-2 cache" ; |
| 302 | QFile file(isoCodesPath); |
| 303 | if (!file.open(flags: QFile::ReadOnly)) { |
| 304 | qCWarning(KI18NLD) << "Unable to open iso_3166-2.json" << isoCodesPath << file.errorString(); |
| 305 | return; |
| 306 | } |
| 307 | |
| 308 | std::vector<MapEntry<uint32_t>> subdivNameMap; |
| 309 | std::vector<MapEntry<uint32_t>> subdivParentMap; |
| 310 | QByteArray iso3166_2stringTable; |
| 311 | |
| 312 | const auto doc = QJsonDocument::fromJson(json: file.readAll()); |
| 313 | const auto array = doc.object().value(key: QLatin1String("3166-2" )).toArray(); |
| 314 | for (const auto &entryVal : array) { |
| 315 | const auto entry = entryVal.toObject(); |
| 316 | const auto key = IsoCodes::subdivisionCodeToKey(code: entry.value(key: QLatin1String("code" )).toString()); |
| 317 | |
| 318 | assert(std::numeric_limits<uint16_t>::max() > iso3166_2stringTable.size()); |
| 319 | subdivNameMap.push_back(x: {.key: key, .value: (uint16_t)iso3166_2stringTable.size()}); |
| 320 | iso3166_2stringTable.append(a: entry.value(key: QLatin1String("name" )).toString().toUtf8()); |
| 321 | iso3166_2stringTable.append(c: '\0'); |
| 322 | |
| 323 | const auto parentKey = IsoCodes::parentCodeToKey(code: entry.value(key: QLatin1String("parent" )).toString()); |
| 324 | if (parentKey) { |
| 325 | subdivParentMap.push_back(x: {.key: key, .value: parentKey}); |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | std::sort(first: subdivNameMap.begin(), last: subdivNameMap.end()); |
| 330 | std::sort(first: subdivParentMap.begin(), last: subdivParentMap.end()); |
| 331 | |
| 332 | // write out binary cache file |
| 333 | QFile cache(cacheFilePath); |
| 334 | if (!cache.open(flags: QFile::WriteOnly)) { |
| 335 | qCWarning(KI18NLD) << "Failed to write ISO 3166-2 cache:" << cache.errorString() << cache.fileName(); |
| 336 | return; |
| 337 | } |
| 338 | |
| 339 | uint32_t n = Iso3166_2CacheHeader; |
| 340 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // header |
| 341 | n = subdivNameMap.size(); |
| 342 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // size of the name map |
| 343 | for (auto entry : subdivNameMap) { |
| 344 | cache.write(data: reinterpret_cast<const char *>(&entry), len: sizeof(entry)); |
| 345 | } |
| 346 | n = subdivParentMap.size(); |
| 347 | cache.write(data: reinterpret_cast<const char *>(&n), len: 4); // size of the hierarchy map |
| 348 | for (auto entry : subdivParentMap) { |
| 349 | cache.write(data: reinterpret_cast<const char *>(&entry), len: sizeof(entry)); |
| 350 | } |
| 351 | cache.write(data: iso3166_2stringTable); |
| 352 | } |
| 353 | |