1/*
2 SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "kcountry.h"
8#include "isocodes_p.h"
9#include "isocodescache_p.h"
10#include "kcatalog_p.h"
11#include "klocalizedstring.h"
12#include "logging.h"
13#include "spatial_index_p.h"
14#include "timezonedata_p.h"
15
16#include <cstring>
17
18using namespace Qt::Literals;
19
20static_assert(sizeof(KCountry) == 2);
21
22KCountry::KCountry()
23 : d(0)
24{
25}
26
27KCountry::KCountry(const KCountry &) = default;
28KCountry::~KCountry() = default;
29
30KCountry &KCountry::operator=(const KCountry &) = default;
31
32bool KCountry::operator==(const KCountry &other) const
33{
34 return d == other.d;
35}
36
37bool KCountry::operator!=(const KCountry &other) const
38{
39 return d != other.d;
40}
41
42bool KCountry::isValid() const
43{
44 return d != 0;
45}
46
47QString KCountry::alpha2() const
48{
49 if (d == 0) {
50 return {};
51 }
52
53 QString code(2, QLatin1Char('\0'));
54 code[0] = QLatin1Char(d >> 8);
55 code[1] = QLatin1Char(d & 0xff);
56 return code;
57}
58
59QString KCountry::alpha3() const
60{
61 const auto cache = IsoCodesCache::instance();
62 const auto it = std::find_if(first: cache->countryAlpha3MapBegin(), last: cache->countryAlpha3MapEnd(), pred: [this](auto entry) {
63 return entry.value == d;
64 });
65 if (it != cache->countryAlpha3MapEnd()) {
66 uint16_t alpha3Key = (*it).key;
67 QString code(3, QLatin1Char('\0'));
68 code[2] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(key: alpha3Key));
69 alpha3Key /= IsoCodes::AlphaNumKeyFactor;
70 code[1] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(key: alpha3Key));
71 alpha3Key /= IsoCodes::AlphaNumKeyFactor;
72 code[0] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(key: alpha3Key));
73 return code;
74 }
75 return {};
76}
77
78QString KCountry::name() const
79{
80 if (d == 0) {
81 return {};
82 }
83
84 auto cache = IsoCodesCache::instance();
85 cache->loadIso3166_1();
86 const auto it = std::lower_bound(first: cache->countryNameMapBegin(), last: cache->countryNameMapEnd(), val: d);
87 if (it != cache->countryNameMapEnd() && (*it).key == d) {
88 return i18nd(domain: "iso_3166-1", text: cache->countryStringTableLookup(offset: (*it).value));
89 }
90 return {};
91}
92
93QString KCountry::emojiFlag() const
94{
95 if (d == 0) {
96 return {};
97 }
98
99 QString flag;
100 char flagA[] = "\xF0\x9F\x87\xA6";
101 flagA[3] = 0xA6 + ((d >> 8) - 'A');
102 flag += QString::fromUtf8(utf8: flagA);
103 flagA[3] = 0xA6 + ((d & 0xff) - 'A');
104 flag += QString::fromUtf8(utf8: flagA);
105 return flag;
106}
107
108QLocale::Country KCountry::country() const
109{
110 if (d == 0) {
111 return QLocale::AnyCountry;
112 }
113
114 return QLocale::codeToTerritory(territoryCode: alpha2());
115}
116
117QList<const char *> KCountry::timeZoneIds() const
118{
119 QList<const char *> tzs;
120 if (d == 0) {
121 return tzs;
122 }
123
124 const auto countryIt = std::lower_bound(first: TimezoneData::countryTimezoneMapBegin(), last: TimezoneData::countryTimezoneMapEnd(), val: d);
125 if (countryIt != TimezoneData::countryTimezoneMapEnd() && (*countryIt).key == d) {
126 tzs.push_back(t: TimezoneData::ianaIdLookup(offset: (*countryIt).value));
127 return tzs;
128 }
129
130 const auto [subdivBegin, subdivEnd] =
131 std::equal_range(first: TimezoneData::subdivisionTimezoneMapBegin(), last: TimezoneData::subdivisionTimezoneMapEnd(), val: d, comp: [](auto lhs, auto rhs) {
132 if constexpr (std::is_same_v<decltype(lhs), uint16_t>)
133 return lhs < (rhs.key >> 16);
134 else
135 return (lhs.key >> 16) < rhs;
136 });
137 for (auto it = subdivBegin; it != subdivEnd; ++it) {
138 const auto tzId = TimezoneData::ianaIdLookup(offset: (*it).value);
139 if (!tzs.contains(t: tzId)) {
140 tzs.push_back(t: tzId);
141 }
142 }
143
144 return tzs;
145}
146
147QString KCountry::currencyCode() const
148{
149 if (d == 0) {
150 return {};
151 }
152
153 QString currency;
154 const auto ls = QLocale::matchingLocales(language: QLocale::AnyLanguage, script: QLocale::AnyScript, territory: country());
155 for (const auto &l : ls) {
156 if (currency.isEmpty()) {
157 currency = l.currencySymbol(QLocale::CurrencyIsoCode);
158 } else if (currency != l.currencySymbol(QLocale::CurrencyIsoCode)) {
159 qCDebug(KI18NLD) << "conflicting currency information in QLocale for" << alpha2();
160 return {};
161 }
162 }
163 return currency;
164}
165
166QList<KCountrySubdivision> KCountry::subdivisions() const
167{
168 if (d == 0) {
169 return {};
170 }
171
172 QList<KCountrySubdivision> l;
173 auto cache = IsoCodesCache::instance();
174 cache->loadIso3166_2();
175 // we don't have a country->subdivisions map, instead we use the full list of subdivisions
176 // (which is sorted by country due to the country being in the two most significant bytes of its key),
177 // and check the child->parent subdivision map for root elements
178 auto it = std::lower_bound(first: cache->subdivisionNameMapBegin(), last: cache->subdivisionNameMapEnd(), val: d, comp: [](auto lhs, auto rhs) {
179 return (lhs.key >> 16) < rhs;
180 });
181
182 auto [parentBegin, parentEnd] = std::equal_range(first: cache->subdivisionParentMapBegin(), last: cache->subdivisionParentMapEnd(), val: d, comp: [](auto lhs, auto rhs) {
183 if constexpr (std::is_same_v<decltype(lhs), uint16_t>)
184 return lhs < (rhs.key >> 16);
185 else
186 return (lhs.key >> 16) < rhs;
187 });
188
189 for (; it != cache->subdivisionNameMapEnd() && ((*it).key >> 16) == d; ++it) {
190 if (!std::binary_search(first: parentBegin, last: parentEnd, val: (*it).key)) {
191 KCountrySubdivision s;
192 s.d = (*it).key;
193 l.push_back(t: s);
194 }
195 }
196
197 return l;
198}
199
200static uint16_t validatedAlpha2Key(uint16_t alpha2Key)
201{
202 if (!alpha2Key) {
203 return 0;
204 }
205
206 auto cache = IsoCodesCache::instance();
207 cache->loadIso3166_1();
208 const auto it = std::lower_bound(first: cache->countryNameMapBegin(), last: cache->countryNameMapEnd(), val: alpha2Key);
209 if (it != cache->countryNameMapEnd() && (*it).key == alpha2Key) {
210 return alpha2Key;
211 }
212 return 0;
213}
214
215KCountry KCountry::fromAlpha2(QStringView alpha2Code)
216{
217 KCountry c;
218 c.d = validatedAlpha2Key(alpha2Key: IsoCodes::alpha2CodeToKey(code: alpha2Code));
219 return c;
220}
221
222KCountry KCountry::fromAlpha2(const char *alpha2Code)
223{
224 KCountry c;
225 if (!alpha2Code) {
226 return c;
227 }
228 c.d = validatedAlpha2Key(alpha2Key: IsoCodes::alpha2CodeToKey(code: alpha2Code, size: std::strlen(s: alpha2Code)));
229 return c;
230}
231
232static uint16_t alpha3Lookup(uint16_t alpha3Key)
233{
234 if (!alpha3Key) {
235 return 0;
236 }
237
238 auto cache = IsoCodesCache::instance();
239 cache->loadIso3166_1();
240 const auto it = std::lower_bound(first: cache->countryAlpha3MapBegin(), last: cache->countryAlpha3MapEnd(), val: alpha3Key);
241 if (it != cache->countryAlpha3MapEnd() && (*it).key == alpha3Key) {
242 return (*it).value;
243 }
244 return 0;
245}
246
247KCountry KCountry::fromAlpha3(QStringView alpha3Code)
248{
249 KCountry c;
250 c.d = alpha3Lookup(alpha3Key: IsoCodes::alpha3CodeToKey(code: alpha3Code));
251 return c;
252}
253
254KCountry KCountry::fromAlpha3(const char *alpha3Code)
255{
256 KCountry c;
257 if (!alpha3Code) {
258 return c;
259 }
260 c.d = alpha3Lookup(alpha3Key: IsoCodes::alpha3CodeToKey(code: alpha3Code, size: std::strlen(s: alpha3Code)));
261 return c;
262}
263
264KCountry KCountry::fromLocation(float latitude, float longitude)
265{
266 const auto entry = SpatialIndex::lookup(lat: latitude, lon: longitude);
267 KCountry c;
268 c.d = entry.m_subdiv >> 16;
269 return c;
270}
271
272KCountry KCountry::fromQLocale(QLocale::Country country)
273{
274 const QString territoryCode = QLocale::territoryToCode(territory: country);
275 return fromAlpha2(alpha2Code: territoryCode);
276}
277
278static QString normalizeCountryName(QStringView name)
279{
280 QString res;
281 res.reserve(asize: name.size());
282 for (const auto c : name) {
283 // the following needs to be done fairly fine-grained, as this can easily mess up scripts
284 // that rely on some non-letter characters to work
285 // all values used below were obtained by similar code in KContacts, which used to do
286 // a full offline pre-computation of this and checked for ambiguities introduced by too
287 // aggressive normalization
288 switch (c.category()) {
289 // strip decorative elements that don't contribute to identification (parenthesis, dashes, quotes, etc)
290 case QChar::Punctuation_Connector:
291 case QChar::Punctuation_Dash:
292 case QChar::Punctuation_Open:
293 case QChar::Punctuation_Close:
294 case QChar::Punctuation_InitialQuote:
295 case QChar::Punctuation_FinalQuote:
296 case QChar::Punctuation_Other:
297 continue;
298 default:
299 break;
300 }
301
302 if (c.isSpace()) {
303 if (!res.isEmpty() && !res.back().isSpace()) {
304 res.push_back(c: ' '_L1);
305 }
306 continue;
307 }
308
309 // if the character has a canonical decomposition skip the combining diacritic markers following it
310 // this works particularly well for Latin, but messes up Hangul
311 if (c.script() != QChar::Script_Hangul && c.decompositionTag() == QChar::Canonical) {
312 res.push_back(c: c.decomposition().at(i: 0).toCaseFolded());
313 } else {
314 res.push_back(c: c.toCaseFolded());
315 }
316 }
317
318 return res.trimmed();
319}
320
321// check is @p needle is a space-separated substring of haystack
322static bool isSeparatedSubstring(QStringView haystack, QStringView needle)
323{
324 auto idx = haystack.indexOf(s: needle);
325 if (idx < 0) {
326 return false;
327 }
328 if (idx > 0 && !haystack[idx - 1].isSpace()) {
329 return false;
330 }
331 idx += needle.size();
332 return idx >= haystack.size() || haystack[idx].isSpace();
333}
334
335static void checkSubstringMatch(QStringView lhs, QStringView rhs, uint16_t code, uint16_t &result)
336{
337 if (result == std::numeric_limits<uint16_t>::max() || result == code || rhs.isEmpty()) {
338 return;
339 }
340 const auto matches = isSeparatedSubstring(haystack: lhs, needle: rhs) || isSeparatedSubstring(haystack: rhs, needle: lhs);
341
342 if (!matches) {
343 return;
344 }
345 result = result == 0 ? code : std::numeric_limits<uint16_t>::max();
346}
347
348KCountry KCountry::fromName(QStringView name)
349{
350 if (name.isEmpty()) {
351 return {};
352 }
353 const auto normalizedName = normalizeCountryName(name);
354
355 auto cache = IsoCodesCache::instance();
356 cache->loadIso3166_1();
357
358 uint16_t substrMatch = 0;
359
360 // check untranslated names
361 for (auto it = cache->countryNameMapBegin(); it != cache->countryNameMapEnd(); ++it) {
362 const auto normalizedCountry = normalizeCountryName(name: QString::fromUtf8(utf8: cache->countryStringTableLookup(offset: (*it).value)));
363 if (normalizedName == normalizedCountry) {
364 KCountry c;
365 c.d = (*it).key;
366 return c;
367 }
368 checkSubstringMatch(lhs: normalizedName, rhs: normalizedCountry, code: (*it).key, result&: substrMatch);
369 }
370
371 // check translated names
372 const auto langs = KCatalog::availableCatalogLanguages(domain: "iso_3166-1");
373 for (const auto &lang : langs) {
374 const auto catalog = KCatalog("iso_3166-1", lang);
375 for (auto it = cache->countryNameMapBegin(); it != cache->countryNameMapEnd(); ++it) {
376 const auto normalizedCountry = normalizeCountryName(name: catalog.translate(msgid: cache->countryStringTableLookup(offset: (*it).value)));
377 if (normalizedName == normalizedCountry) {
378 KCountry c;
379 c.d = (*it).key;
380 return c;
381 }
382 checkSubstringMatch(lhs: normalizedName, rhs: normalizedCountry, code: (*it).key, result&: substrMatch);
383 }
384 }
385
386 // unique prefix/suffix match
387 if (substrMatch != std::numeric_limits<uint16_t>::max() && substrMatch != 0) {
388 KCountry c;
389 c.d = substrMatch;
390 return c;
391 }
392
393 // fallback to code lookups
394 if (normalizedName.size() == 3) {
395 return fromAlpha3(alpha3Code: normalizedName);
396 }
397 if (normalizedName.size() == 2) {
398 return fromAlpha2(alpha2Code: normalizedName);
399 }
400
401 return {};
402}
403
404QList<KCountry> KCountry::allCountries()
405{
406 QList<KCountry> l;
407 auto cache = IsoCodesCache::instance();
408 cache->loadIso3166_1();
409 l.reserve(asize: cache->countryCount());
410 std::transform(first: cache->countryNameMapBegin(), last: cache->countryNameMapEnd(), result: std::back_inserter(x&: l), unary_op: [](auto entry) {
411 KCountry c;
412 c.d = entry.key;
413 return c;
414 });
415 return l;
416}
417
418QStringList KCountry::timeZoneIdsStringList() const
419{
420 const auto tzIds = timeZoneIds();
421 QStringList l;
422 l.reserve(asize: tzIds.size());
423 std::transform(first: tzIds.begin(), last: tzIds.end(), result: std::back_inserter(x&: l), unary_op: [](const char *tzId) {
424 return QString::fromUtf8(utf8: tzId);
425 });
426 return l;
427}
428
429#include "moc_kcountry.cpp"
430

source code of ki18n/src/localedata/kcountry.cpp