1/*
2 SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "kcountry.h"
8#include "isocodes_p.h"
9#include "isocodescache_p.h"
10#include "kcatalog_p.h"
11#include "klocalizedstring.h"
12#include "logging.h"
13#include "spatial_index_p.h"
14#include "timezonedata_p.h"
15
16#include <cstring>
17
18static_assert(sizeof(KCountry) == 2);
19
20KCountry::KCountry()
21 : d(0)
22{
23}
24
25KCountry::KCountry(const KCountry &) = default;
26KCountry::~KCountry() = default;
27
28KCountry &KCountry::operator=(const KCountry &) = default;
29
30bool KCountry::operator==(const KCountry &other) const
31{
32 return d == other.d;
33}
34
35bool KCountry::operator!=(const KCountry &other) const
36{
37 return d != other.d;
38}
39
40bool KCountry::isValid() const
41{
42 return d != 0;
43}
44
45QString KCountry::alpha2() const
46{
47 if (d == 0) {
48 return {};
49 }
50
51 QString code(2, QLatin1Char('\0'));
52 code[0] = QLatin1Char(d >> 8);
53 code[1] = QLatin1Char(d & 0xff);
54 return code;
55}
56
57QString KCountry::alpha3() const
58{
59 const auto cache = IsoCodesCache::instance();
60 const auto it = std::find_if(first: cache->countryAlpha3MapBegin(), last: cache->countryAlpha3MapEnd(), pred: [this](auto entry) {
61 return entry.value == d;
62 });
63 if (it != cache->countryAlpha3MapEnd()) {
64 uint16_t alpha3Key = (*it).key;
65 QString code(3, QLatin1Char('\0'));
66 code[2] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(key: alpha3Key));
67 alpha3Key /= IsoCodes::AlphaNumKeyFactor;
68 code[1] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(key: alpha3Key));
69 alpha3Key /= IsoCodes::AlphaNumKeyFactor;
70 code[0] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(key: alpha3Key));
71 return code;
72 }
73 return {};
74}
75
76QString KCountry::name() const
77{
78 if (d == 0) {
79 return {};
80 }
81
82 auto cache = IsoCodesCache::instance();
83 cache->loadIso3166_1();
84 const auto it = std::lower_bound(first: cache->countryNameMapBegin(), last: cache->countryNameMapEnd(), val: d);
85 if (it != cache->countryNameMapEnd() && (*it).key == d) {
86 return i18nd(domain: "iso_3166-1", text: cache->countryStringTableLookup(offset: (*it).value));
87 }
88 return {};
89}
90
91QString KCountry::emojiFlag() const
92{
93 if (d == 0) {
94 return {};
95 }
96
97 QString flag;
98 char flagA[] = "\xF0\x9F\x87\xA6";
99 flagA[3] = 0xA6 + ((d >> 8) - 'A');
100 flag += QString::fromUtf8(utf8: flagA);
101 flagA[3] = 0xA6 + ((d & 0xff) - 'A');
102 flag += QString::fromUtf8(utf8: flagA);
103 return flag;
104}
105
106QLocale::Country KCountry::country() const
107{
108 if (d == 0) {
109 return QLocale::AnyCountry;
110 }
111
112 return QLocale::codeToTerritory(territoryCode: alpha2());
113}
114
115QList<const char *> KCountry::timeZoneIds() const
116{
117 QList<const char *> tzs;
118 if (d == 0) {
119 return tzs;
120 }
121
122 const auto countryIt = std::lower_bound(first: TimezoneData::countryTimezoneMapBegin(), last: TimezoneData::countryTimezoneMapEnd(), val: d);
123 if (countryIt != TimezoneData::countryTimezoneMapEnd() && (*countryIt).key == d) {
124 tzs.push_back(t: TimezoneData::ianaIdLookup(offset: (*countryIt).value));
125 return tzs;
126 }
127
128 const auto [subdivBegin, subdivEnd] =
129 std::equal_range(first: TimezoneData::subdivisionTimezoneMapBegin(), last: TimezoneData::subdivisionTimezoneMapEnd(), val: d, comp: [](auto lhs, auto rhs) {
130 if constexpr (std::is_same_v<decltype(lhs), uint16_t>)
131 return lhs < (rhs.key >> 16);
132 else
133 return (lhs.key >> 16) < rhs;
134 });
135 for (auto it = subdivBegin; it != subdivEnd; ++it) {
136 const auto tzId = TimezoneData::ianaIdLookup(offset: (*it).value);
137 if (!tzs.contains(t: tzId)) {
138 tzs.push_back(t: tzId);
139 }
140 }
141
142 return tzs;
143}
144
145QString KCountry::currencyCode() const
146{
147 if (d == 0) {
148 return {};
149 }
150
151 QString currency;
152 const auto ls = QLocale::matchingLocales(language: QLocale::AnyLanguage, script: QLocale::AnyScript, territory: country());
153 for (const auto &l : ls) {
154 if (currency.isEmpty()) {
155 currency = l.currencySymbol(QLocale::CurrencyIsoCode);
156 } else if (currency != l.currencySymbol(QLocale::CurrencyIsoCode)) {
157 qCDebug(KI18NLD) << "conflicting currency information in QLocale for" << alpha2();
158 return {};
159 }
160 }
161 return currency;
162}
163
164QList<KCountrySubdivision> KCountry::subdivisions() const
165{
166 if (d == 0) {
167 return {};
168 }
169
170 QList<KCountrySubdivision> l;
171 auto cache = IsoCodesCache::instance();
172 cache->loadIso3166_2();
173 // we don't have a country->subdivisions map, instead we use the full list of subdivisions
174 // (which is sorted by country due to the country being in the two most significant bytes of its key),
175 // and check the child->parent subdivision map for root elements
176 auto it = std::lower_bound(first: cache->subdivisionNameMapBegin(), last: cache->subdivisionNameMapEnd(), val: d, comp: [](auto lhs, auto rhs) {
177 return (lhs.key >> 16) < rhs;
178 });
179
180 auto [parentBegin, parentEnd] = std::equal_range(first: cache->subdivisionParentMapBegin(), last: cache->subdivisionParentMapEnd(), val: d, comp: [](auto lhs, auto rhs) {
181 if constexpr (std::is_same_v<decltype(lhs), uint16_t>)
182 return lhs < (rhs.key >> 16);
183 else
184 return (lhs.key >> 16) < rhs;
185 });
186
187 for (; it != cache->subdivisionNameMapEnd() && ((*it).key >> 16) == d; ++it) {
188 if (!std::binary_search(first: parentBegin, last: parentEnd, val: (*it).key)) {
189 KCountrySubdivision s;
190 s.d = (*it).key;
191 l.push_back(t: s);
192 }
193 }
194
195 return l;
196}
197
198static uint16_t validatedAlpha2Key(uint16_t alpha2Key)
199{
200 if (!alpha2Key) {
201 return 0;
202 }
203
204 auto cache = IsoCodesCache::instance();
205 cache->loadIso3166_1();
206 const auto it = std::lower_bound(first: cache->countryNameMapBegin(), last: cache->countryNameMapEnd(), val: alpha2Key);
207 if (it != cache->countryNameMapEnd() && (*it).key == alpha2Key) {
208 return alpha2Key;
209 }
210 return 0;
211}
212
213KCountry KCountry::fromAlpha2(QStringView alpha2Code)
214{
215 KCountry c;
216 c.d = validatedAlpha2Key(alpha2Key: IsoCodes::alpha2CodeToKey(code: alpha2Code));
217 return c;
218}
219
220KCountry KCountry::fromAlpha2(const char *alpha2Code)
221{
222 KCountry c;
223 if (!alpha2Code) {
224 return c;
225 }
226 c.d = validatedAlpha2Key(alpha2Key: IsoCodes::alpha2CodeToKey(code: alpha2Code, size: std::strlen(s: alpha2Code)));
227 return c;
228}
229
230static uint16_t alpha3Lookup(uint16_t alpha3Key)
231{
232 if (!alpha3Key) {
233 return 0;
234 }
235
236 auto cache = IsoCodesCache::instance();
237 cache->loadIso3166_1();
238 const auto it = std::lower_bound(first: cache->countryAlpha3MapBegin(), last: cache->countryAlpha3MapEnd(), val: alpha3Key);
239 if (it != cache->countryAlpha3MapEnd() && (*it).key == alpha3Key) {
240 return (*it).value;
241 }
242 return 0;
243}
244
245KCountry KCountry::fromAlpha3(QStringView alpha3Code)
246{
247 KCountry c;
248 c.d = alpha3Lookup(alpha3Key: IsoCodes::alpha3CodeToKey(code: alpha3Code));
249 return c;
250}
251
252KCountry KCountry::fromAlpha3(const char *alpha3Code)
253{
254 KCountry c;
255 if (!alpha3Code) {
256 return c;
257 }
258 c.d = alpha3Lookup(alpha3Key: IsoCodes::alpha3CodeToKey(code: alpha3Code, size: std::strlen(s: alpha3Code)));
259 return c;
260}
261
262KCountry KCountry::fromLocation(float latitude, float longitude)
263{
264 const auto entry = SpatialIndex::lookup(lat: latitude, lon: longitude);
265 KCountry c;
266 c.d = entry.m_subdiv >> 16;
267 return c;
268}
269
270KCountry KCountry::fromQLocale(QLocale::Country country)
271{
272 return fromAlpha2(alpha2Code: QLocale::territoryToCode(territory: country).data());
273}
274
275static QString normalizeCountryName(QStringView name)
276{
277 QString res;
278 res.reserve(asize: name.size());
279 for (const auto c : name) {
280 // the following needs to be done fairly fine-grained, as this can easily mess up scripts
281 // that rely on some non-letter characters to work
282 // all values used below were obtained by similar code in KContacts, which used to do
283 // a full offline pre-computation of this and checked for ambiguities introduced by too
284 // aggressive normalization
285 switch (c.category()) {
286 // strip decorative elements that don't contribute to identification (parenthesis, dashes, quotes, etc)
287 case QChar::Punctuation_Connector:
288 case QChar::Punctuation_Dash:
289 case QChar::Punctuation_Open:
290 case QChar::Punctuation_Close:
291 case QChar::Punctuation_InitialQuote:
292 case QChar::Punctuation_FinalQuote:
293 case QChar::Punctuation_Other:
294 continue;
295 default:
296 break;
297 }
298
299 if (c.isSpace()) {
300 continue;
301 }
302
303 // if the character has a canonical decomposition skip the combining diacritic markers following it
304 // this works particularly well for Latin, but messes up Hangul
305 if (c.script() != QChar::Script_Hangul && c.decompositionTag() == QChar::Canonical) {
306 res.push_back(c: c.decomposition().at(i: 0).toCaseFolded());
307 } else {
308 res.push_back(c: c.toCaseFolded());
309 }
310 }
311
312 return res;
313}
314
315static void checkSubstringMatch(QStringView lhs, QStringView rhs, uint16_t code, uint16_t &result)
316{
317 if (result == std::numeric_limits<uint16_t>::max() || result == code || rhs.isEmpty()) {
318 return;
319 }
320 const auto matches = lhs.startsWith(s: rhs) || rhs.startsWith(s: lhs) || lhs.endsWith(s: rhs) || rhs.endsWith(s: lhs);
321 if (!matches) {
322 return;
323 }
324 result = result == 0 ? code : std::numeric_limits<uint16_t>::max();
325}
326
327KCountry KCountry::fromName(QStringView name)
328{
329 if (name.isEmpty()) {
330 return {};
331 }
332 const auto normalizedName = normalizeCountryName(name);
333
334 auto cache = IsoCodesCache::instance();
335 cache->loadIso3166_1();
336
337 uint16_t substrMatch = 0;
338
339 // check untranslated names
340 for (auto it = cache->countryNameMapBegin(); it != cache->countryNameMapEnd(); ++it) {
341 const auto normalizedCountry = normalizeCountryName(name: QString::fromUtf8(utf8: cache->countryStringTableLookup(offset: (*it).value)));
342 if (normalizedName == normalizedCountry) {
343 KCountry c;
344 c.d = (*it).key;
345 return c;
346 }
347 checkSubstringMatch(lhs: normalizedName, rhs: normalizedCountry, code: (*it).key, result&: substrMatch);
348 }
349
350 // check translated names
351 const auto langs = KCatalog::availableCatalogLanguages(domain: "iso_3166-1");
352 for (const auto &lang : langs) {
353 const auto catalog = KCatalog("iso_3166-1", lang);
354 for (auto it = cache->countryNameMapBegin(); it != cache->countryNameMapEnd(); ++it) {
355 const auto normalizedCountry = normalizeCountryName(name: catalog.translate(msgid: cache->countryStringTableLookup(offset: (*it).value)));
356 if (normalizedName == normalizedCountry) {
357 KCountry c;
358 c.d = (*it).key;
359 return c;
360 }
361 checkSubstringMatch(lhs: normalizedName, rhs: normalizedCountry, code: (*it).key, result&: substrMatch);
362 }
363 }
364
365 // unique prefix/suffix match
366 if (substrMatch != std::numeric_limits<uint16_t>::max() && substrMatch != 0) {
367 KCountry c;
368 c.d = substrMatch;
369 return c;
370 }
371
372 // fallback to code lookups
373 if (normalizedName.size() == 3) {
374 return fromAlpha3(alpha3Code: normalizedName);
375 }
376 if (normalizedName.size() == 2) {
377 return fromAlpha2(alpha2Code: normalizedName);
378 }
379
380 return {};
381}
382
383QList<KCountry> KCountry::allCountries()
384{
385 QList<KCountry> l;
386 auto cache = IsoCodesCache::instance();
387 cache->loadIso3166_1();
388 l.reserve(asize: cache->countryCount());
389 std::transform(first: cache->countryNameMapBegin(), last: cache->countryNameMapEnd(), result: std::back_inserter(x&: l), unary_op: [](auto entry) {
390 KCountry c;
391 c.d = entry.key;
392 return c;
393 });
394 return l;
395}
396
397QStringList KCountry::timeZoneIdsStringList() const
398{
399 const auto tzIds = timeZoneIds();
400 QStringList l;
401 l.reserve(asize: tzIds.size());
402 std::transform(first: tzIds.begin(), last: tzIds.end(), result: std::back_inserter(x&: l), unary_op: [](const char *tzId) {
403 return QString::fromUtf8(utf8: tzId);
404 });
405 return l;
406}
407
408#include "moc_kcountry.cpp"
409

source code of ki18n/src/localedata/kcountry.cpp