1 | /* |
2 | This file is part of the KDE libraries |
3 | |
4 | SPDX-FileCopyrightText: 1999 Lars Knoll <knoll@kde.org> |
5 | SPDX-FileCopyrightText: 2001, 2003, 2004, 2005, 2006 Nicolas GOUTTE <goutte@kde.org> |
6 | SPDX-FileCopyrightText: 2007 Nick Shaforostoff <shafff@ukr.net> |
7 | |
8 | SPDX-License-Identifier: LGPL-2.0-or-later |
9 | */ |
10 | #include "kcharsets.h" |
11 | #include "kcharsets_p.h" |
12 | #include "kcodecs_debug.h" |
13 | |
14 | #include <kentities.h> |
15 | |
16 | #include <QHash> |
17 | |
18 | #include <algorithm> |
19 | #include <assert.h> |
20 | |
21 | /* |
22 | * The encoding names (like "ISO 8859-1") in this list are user-visible, |
23 | * and should be mostly uppercase. |
24 | * Generate with generate_string_table.pl (located in kde-dev-scripts), |
25 | * input data: |
26 | ISO 8859-1 |
27 | i18n:Western European |
28 | ISO 8859-15 |
29 | i18n:Western European |
30 | ISO 8859-14 |
31 | i18n:Western European |
32 | cp 1252 |
33 | i18n:Western European |
34 | IBM850 |
35 | i18n:Western European |
36 | ISO 8859-2 |
37 | i18n:Central European |
38 | ISO 8859-3 |
39 | i18n:Central European |
40 | ISO 8859-4 |
41 | i18n:Baltic |
42 | ISO 8859-13 |
43 | i18n:Baltic |
44 | ISO 8859-16 |
45 | i18n:South-Eastern Europe |
46 | cp 1250 |
47 | i18n:Central European |
48 | cp 1254 |
49 | i18n:Turkish |
50 | cp 1257 |
51 | i18n:Baltic |
52 | KOI8-R |
53 | i18n:Cyrillic |
54 | ISO 8859-5 |
55 | i18n:Cyrillic |
56 | cp 1251 |
57 | i18n:Cyrillic |
58 | KOI8-U |
59 | i18n:Cyrillic |
60 | IBM866 |
61 | i18n:Cyrillic |
62 | Big5 |
63 | i18n:Chinese Traditional |
64 | Big5-HKSCS |
65 | i18n:Chinese Traditional |
66 | GB18030 |
67 | i18n:Chinese Simplified |
68 | GBK |
69 | i18n:Chinese Simplified |
70 | GB2312 |
71 | i18n:Chinese Simplified |
72 | EUC-KR |
73 | i18n:Korean |
74 | windows-949 |
75 | i18n:Korean |
76 | sjis |
77 | i18n:Japanese |
78 | ISO-2022-JP |
79 | i18n:Japanese |
80 | EUC-JP |
81 | i18n:Japanese |
82 | ISO 8859-7 |
83 | i18n:Greek |
84 | cp 1253 |
85 | i18n:Greek |
86 | ISO 8859-6 |
87 | i18n:Arabic |
88 | cp 1256 |
89 | i18n:Arabic |
90 | ISO 8859-8 |
91 | i18n:Hebrew |
92 | ISO 8859-8-I |
93 | i18n:Hebrew |
94 | cp 1255 |
95 | i18n:Hebrew |
96 | ISO 8859-9 |
97 | i18n:Turkish |
98 | TIS620 |
99 | i18n:Thai |
100 | ISO 8859-11 |
101 | i18n:Thai |
102 | UTF-8 |
103 | i18n:Unicode |
104 | UTF-16 |
105 | i18n:Unicode |
106 | utf7 |
107 | i18n:Unicode |
108 | ucs2 |
109 | i18n:Unicode |
110 | ISO 10646-UCS-2 |
111 | i18n:Unicode |
112 | windows-1258 |
113 | i18n:Other |
114 | IBM874 |
115 | i18n:Other |
116 | TSCII |
117 | i18n:Other |
118 | */ |
119 | /* |
120 | * Notes about the table: |
121 | * |
122 | * - The following entries were disabled and removed from the table: |
123 | ibm852 |
124 | i18n:Central European |
125 | pt 154 |
126 | i18n:Cyrillic // ### TODO "PT 154" seems to have been removed from Qt |
127 | * |
128 | * - ISO 8559-11 is the deprecated name of TIS-620 |
129 | * - utf7 is not in Qt |
130 | * - UTF-16 is duplicated as "ucs2" and "ISO 10646-UCS-2" |
131 | * - windows-1258: TODO |
132 | * - IBM874: TODO |
133 | * - TSCII: TODO |
134 | */ |
135 | |
136 | /* |
137 | * This redefines the QT_TRANSLATE_NOOP3 macro provided by Qt to indicate that |
138 | * statically initialised text should be translated so that it expands to just |
139 | * the string that should be translated, making it possible to use it in the |
140 | * single string construct below. |
141 | */ |
142 | #undef QT_TRANSLATE_NOOP3 |
143 | #define QT_TRANSLATE_NOOP3(a, b, c) b |
144 | |
145 | /* |
146 | * THE FOLLOWING CODE IS GENERATED. PLEASE DO NOT EDIT BY HAND. |
147 | * The script used was generate_string_table.pl which can be found in kde-dev-scripts. |
148 | * It was then edited to use QT_TRANSLATE_NOOP3 instead of I18N_NOOP. |
149 | */ |
150 | |
151 | static const char language_for_encoding_string[] = |
152 | "ISO 8859-1\0" |
153 | QT_TRANSLATE_NOOP3("KCharsets" , "Western European" , "@item Text character set" )"\0" |
154 | "ISO 8859-15\0" |
155 | "ISO 8859-14\0" |
156 | "cp 1252\0" |
157 | "IBM850\0" |
158 | "ISO 8859-2\0" |
159 | QT_TRANSLATE_NOOP3("KCharsets" , "Central European" , "@item Text character set" )"\0" |
160 | "ISO 8859-3\0" |
161 | "ISO 8859-4\0" |
162 | QT_TRANSLATE_NOOP3("KCharsets" , "Baltic" , "@item Text character set" )"\0" |
163 | "ISO 8859-13\0" |
164 | "ISO 8859-16\0" |
165 | QT_TRANSLATE_NOOP3("KCharsets" , "South-Eastern Europe" , "@item Text character set" )"\0" |
166 | "cp 1250\0" |
167 | "cp 1254\0" |
168 | QT_TRANSLATE_NOOP3("KCharsets" , "Turkish" , "@item Text character set" )"\0" |
169 | "cp 1257\0" |
170 | "KOI8-R\0" |
171 | QT_TRANSLATE_NOOP3("KCharsets" , "Cyrillic" , "@item Text character set" )"\0" |
172 | "ISO 8859-5\0" |
173 | "cp 1251\0" |
174 | "KOI8-U\0" |
175 | "IBM866\0" |
176 | "Big5\0" |
177 | QT_TRANSLATE_NOOP3("KCharsets" , "Chinese Traditional" , "@item Text character set" )"\0" |
178 | "Big5-HKSCS\0" |
179 | "GB18030\0" |
180 | QT_TRANSLATE_NOOP3("KCharsets" , "Chinese Simplified" , "@item Text character set" )"\0" |
181 | "GBK\0" |
182 | "GB2312\0" |
183 | "EUC-KR\0" |
184 | QT_TRANSLATE_NOOP3("KCharsets" , "Korean" , "@item Text character set" )"\0" |
185 | "windows-949\0" |
186 | "sjis\0" |
187 | QT_TRANSLATE_NOOP3("KCharsets" , "Japanese" , "@item Text character set" )"\0" |
188 | "ISO-2022-JP\0" |
189 | "EUC-JP\0" |
190 | "ISO 8859-7\0" |
191 | QT_TRANSLATE_NOOP3("KCharsets" , "Greek" , "@item Text character set" )"\0" |
192 | "cp 1253\0" |
193 | "ISO 8859-6\0" |
194 | QT_TRANSLATE_NOOP3("KCharsets" , "Arabic" , "@item Text character set" )"\0" |
195 | "cp 1256\0" |
196 | "ISO 8859-8\0" |
197 | QT_TRANSLATE_NOOP3("KCharsets" , "Hebrew" , "@item Text character set" )"\0" |
198 | "ISO 8859-8-I\0" |
199 | "cp 1255\0" |
200 | "ISO 8859-9\0" |
201 | "TIS620\0" |
202 | QT_TRANSLATE_NOOP3("KCharsets" , "Thai" , "@item Text character set" )"\0" |
203 | "ISO 8859-11\0" |
204 | "UTF-8\0" |
205 | QT_TRANSLATE_NOOP3("KCharsets" , "Unicode" , "@item Text character set" )"\0" |
206 | "UTF-16\0" |
207 | "utf7\0" |
208 | "ucs2\0" |
209 | "ISO 10646-UCS-2\0" |
210 | "windows-1258\0" |
211 | QT_TRANSLATE_NOOP3("KCharsets" , "Other" , "@item Text character set" )"\0" |
212 | "IBM874\0" |
213 | "TSCII\0" |
214 | "\0" ; |
215 | |
216 | static const int language_for_encoding_indices[] = { |
217 | 0, 11, 28, 11, 40, 11, 52, 11, 60, 11, 67, 78, 95, 78, 106, 117, 124, 117, 136, 148, 169, 78, 177, 185, 193, 117, 201, 208, 217, 208, 228, |
218 | 208, 236, 208, 243, 208, 250, 255, 275, 255, 286, 294, 313, 294, 317, 294, 324, 331, 338, 331, 350, 355, 364, 355, 376, 355, 383, 394, 400, 394, 408, 419, |
219 | 426, 419, 434, 445, 452, 445, 465, 445, 473, 185, 484, 491, 496, 491, 508, 514, 522, 514, 529, 514, 534, 514, 539, 514, 555, 568, 574, 568, 581, 568, -1}; |
220 | |
221 | /* |
222 | * GENERATED CODE ENDS HERE |
223 | */ |
224 | |
225 | /* |
226 | * defines some different names for codecs that are built into Qt. |
227 | * The names in this list must be lower-case. |
228 | * input data for generate_string_table.pl: |
229 | iso-ir-111 |
230 | koi8-r |
231 | koi unified |
232 | koi8-r |
233 | us-ascii |
234 | iso 8859-1 |
235 | usascii |
236 | iso 8859-1 |
237 | ascii |
238 | iso 8859-1 |
239 | unicode-1-1-utf-7 |
240 | utf-7 |
241 | ucs2 |
242 | iso-10646-ucs-2 |
243 | iso10646-1 |
244 | iso-10646-ucs-2 |
245 | gb18030.2000-1 |
246 | gb18030 |
247 | gb18030.2000-0 |
248 | gb18030 |
249 | gbk-0 |
250 | gbk |
251 | gb2312 |
252 | gbk |
253 | gb2312.1980-0 |
254 | gbk |
255 | big5-0 |
256 | big5 |
257 | euc-kr |
258 | euckr |
259 | cp 949 |
260 | windows-949 |
261 | euc-jp |
262 | eucjp |
263 | jisx0201.1976-0 |
264 | eucjp |
265 | jisx0208.1983-0 |
266 | eucjp |
267 | jisx0208.1990-0 |
268 | eucjp |
269 | jisx0208.1997-0 |
270 | eucjp |
271 | jisx0212.1990-0 |
272 | eucjp |
273 | jisx0213.2000-1 |
274 | eucjp |
275 | jisx0213.2000-2 |
276 | eucjp |
277 | shift_jis |
278 | sjis |
279 | shift-jis |
280 | sjis |
281 | sjis |
282 | sjis |
283 | iso-2022-jp |
284 | jis7 |
285 | windows850 |
286 | ibm850 |
287 | windows866 |
288 | ibm866 |
289 | windows-850 |
290 | ibm850 |
291 | windows-866 |
292 | ibm866 |
293 | cp-10000 |
294 | apple roman |
295 | thai-tis620 |
296 | iso 8859-11 |
297 | windows-874 |
298 | ibm874 |
299 | windows874 |
300 | ibm874 |
301 | cp-874 |
302 | ibm874 |
303 | ksc5601.1987-0 |
304 | euckr |
305 | ks_c_5601-1987 |
306 | euckr |
307 | mac-roman |
308 | apple roman |
309 | macintosh |
310 | apple roman |
311 | mac |
312 | apple roman |
313 | csiso2022jp |
314 | iso-2022-jp |
315 | */ |
316 | /* |
317 | * Notes about the table: |
318 | * - using ISO-8859-1 for ASCII is only an approximation (as you cannot test if a character is part of the set) |
319 | * - utf7 is not in Qt |
320 | * - UTF-16 is duplicated as "ucs2" and "ISO 10646-UCS-2" |
321 | * - sjis: appears on the table for x-sjis |
322 | * - jis7: ISO-2022-JP is now the default name in Qt4 |
323 | * - cp-874: is it really needed? |
324 | * - mac-roman: appears on the table for x-mac-roman |
325 | * - csiso2022jp: See bug #77243 |
326 | */ |
327 | |
328 | /* |
329 | * THE FOLLOWING CODE IS GENERATED. PLEASE DO NOT EDIT BY HAND. |
330 | * The script used was generate_string_table.pl which can be found in kde-dev-scripts. |
331 | */ |
332 | |
333 | static const char builtin_string[] = |
334 | "iso-ir-111\0" |
335 | "koi8-r\0" |
336 | "koi unified\0" |
337 | "us-ascii\0" |
338 | "iso 8859-1\0" |
339 | "usascii\0" |
340 | "ascii\0" |
341 | "unicode-1-1-utf-7\0" |
342 | "utf-7\0" |
343 | "ucs2\0" |
344 | "iso-10646-ucs-2\0" |
345 | "iso10646-1\0" |
346 | "gb18030.2000-1\0" |
347 | "gb18030\0" |
348 | "gb18030.2000-0\0" |
349 | "gbk-0\0" |
350 | "gbk\0" |
351 | "gb2312\0" |
352 | "gb2312.1980-0\0" |
353 | "big5-0\0" |
354 | "big5\0" |
355 | "euc-kr\0" |
356 | "euckr\0" |
357 | "cp 949\0" |
358 | "windows-949\0" |
359 | "euc-jp\0" |
360 | "eucjp\0" |
361 | "jisx0201.1976-0\0" |
362 | "jisx0208.1983-0\0" |
363 | "jisx0208.1990-0\0" |
364 | "jisx0208.1997-0\0" |
365 | "jisx0212.1990-0\0" |
366 | "jisx0213.2000-1\0" |
367 | "jisx0213.2000-2\0" |
368 | "shift_jis\0" |
369 | "sjis\0" |
370 | "shift-jis\0" |
371 | "iso-2022-jp\0" |
372 | "jis7\0" |
373 | "windows850\0" |
374 | "ibm850\0" |
375 | "windows866\0" |
376 | "ibm866\0" |
377 | "windows-850\0" |
378 | "windows-866\0" |
379 | "cp-10000\0" |
380 | "apple roman\0" |
381 | "thai-tis620\0" |
382 | "iso 8859-11\0" |
383 | "windows-874\0" |
384 | "ibm874\0" |
385 | "windows874\0" |
386 | "cp-874\0" |
387 | "ksc5601.1987-0\0" |
388 | "ks_c_5601-1987\0" |
389 | "mac-roman\0" |
390 | "macintosh\0" |
391 | "mac\0" |
392 | "csiso2022jp\0" |
393 | "\0" ; |
394 | |
395 | static const int builtin_indices[] = {0, 11, 18, 11, 30, 39, 50, 39, 58, 39, 64, 82, 88, 93, 109, 93, 120, 135, 143, 135, 158, 164, |
396 | 168, 164, 175, 164, 189, 196, 201, 208, 214, 221, 233, 240, 246, 240, 262, 240, 278, 240, 294, 240, 310, 240, |
397 | 326, 240, 342, 240, 358, 368, 373, 368, 368, 368, 383, 395, 400, 411, 418, 429, 436, 411, 448, 429, 460, 469, |
398 | 481, 493, 505, 517, 524, 517, 535, 517, 542, 208, 557, 208, 572, 469, 582, 469, 592, 469, 596, 383, -1}; |
399 | |
400 | /* |
401 | * GENERATED CODE ENDS HERE |
402 | */ |
403 | |
404 | /* |
405 | * some last resort hints in case the charmap file couldn't be found. |
406 | * This gives at least a partial conversion and helps make things readable. |
407 | * |
408 | * the name used as input here is already converted to the more canonical |
409 | * name as defined in the aliases array. |
410 | * |
411 | * Input data: |
412 | cp1250 |
413 | iso-8859-2 |
414 | koi8-r |
415 | iso-8859-5 |
416 | koi8-u |
417 | koi8-r |
418 | pt 154 |
419 | windows-1251 |
420 | paratype-154 |
421 | windows-1251 |
422 | pt-154 |
423 | windows-1251 |
424 | */ |
425 | /* Notes: |
426 | * - KDE had always "CP 1251" as best fallback to PT 154. As Qt does not offer this encoding anymore, the codepage 1251 is used as fallback. |
427 | */ |
428 | |
429 | /* |
430 | * THE FOLLOWING CODE IS GENERATED. PLEASE DO NOT EDIT BY HAND. |
431 | * The script used was generate_string_table.pl which can be found in kde-dev-scripts. |
432 | */ |
433 | |
434 | static const char conversion_hints_string[] = |
435 | "cp1250\0" |
436 | "iso-8859-2\0" |
437 | "koi8-r\0" |
438 | "iso-8859-5\0" |
439 | "koi8-u\0" |
440 | "pt 154\0" |
441 | "windows-1251\0" |
442 | "paratype-154\0" |
443 | "pt-154\0" |
444 | "\0" ; |
445 | |
446 | static const int conversion_hints_indices[] = {0, 7, 18, 25, 36, 18, 43, 50, 63, 50, 76, 50, -1}; |
447 | |
448 | /* |
449 | * GENERATED CODE ENDS HERE |
450 | */ |
451 | |
452 | struct KCharsetsSingletonPrivate { |
453 | KCharsets instance; |
454 | }; |
455 | |
456 | Q_GLOBAL_STATIC(KCharsetsSingletonPrivate, globalCharsets) |
457 | |
458 | // search an array of items index/data, find first matching index |
459 | // and return data, or return 0 |
460 | static inline const char *kcharsets_array_search(const char *start, const int *indices, const char *entry) |
461 | { |
462 | for (int i = 0; indices[i] != -1; i += 2) { |
463 | if (qstrcmp(str1: start + indices[i], str2: entry) == 0) { |
464 | return start + indices[i + 1]; |
465 | } |
466 | } |
467 | return nullptr; |
468 | } |
469 | |
470 | // -------------------------------------------------------------------------- |
471 | |
472 | KCharsets::KCharsets() |
473 | : d(new KCharsetsPrivate) |
474 | { |
475 | } |
476 | |
477 | KCharsets::~KCharsets() = default; |
478 | |
479 | QChar KCharsets::fromEntity(QStringView str) |
480 | { |
481 | QChar res = QChar::Null; |
482 | |
483 | if (str.isEmpty()) { |
484 | return QChar::Null; |
485 | } |
486 | |
487 | int pos = 0; |
488 | if (str[pos] == QLatin1Char('&')) { |
489 | pos++; |
490 | } |
491 | |
492 | // Check for '�' or '�' sequence |
493 | if (str[pos] == QLatin1Char('#') && str.length() - pos > 1) { |
494 | bool ok; |
495 | pos++; |
496 | if (str[pos] == QLatin1Char('x') || str[pos] == QLatin1Char('X')) { |
497 | pos++; |
498 | // '�', hexadecimal character reference |
499 | const auto tmp = str.mid(pos); |
500 | res = QChar(tmp.toInt(ok: &ok, base: 16)); |
501 | } else { |
502 | // '�', decimal character reference |
503 | const auto tmp = str.mid(pos); |
504 | res = QChar(tmp.toInt(ok: &ok, base: 10)); |
505 | } |
506 | if (ok) { |
507 | return res; |
508 | } else { |
509 | return QChar::Null; |
510 | } |
511 | } |
512 | |
513 | const QByteArray raw(str.toLatin1()); |
514 | const entity *e = KCodecsEntities::kde_findEntity(str: raw.data(), len: raw.length()); |
515 | |
516 | if (!e) { |
517 | // qCDebug(KCODECS_LOG) << "unknown entity " << str <<", len = " << str.length(); |
518 | return QChar::Null; |
519 | } |
520 | // qCDebug(KCODECS_LOG) << "got entity " << str << " = " << e->code; |
521 | |
522 | return QChar(e->code); |
523 | } |
524 | |
525 | QChar KCharsets::fromEntity(QStringView str, int &len) |
526 | { |
527 | // entities are never longer than 8 chars... we start from |
528 | // that length and work backwards... |
529 | len = 8; |
530 | while (len > 0) { |
531 | const auto tmp = str.left(n: len); |
532 | QChar res = fromEntity(str: tmp); |
533 | if (res != QChar::Null) { |
534 | return res; |
535 | } |
536 | len--; |
537 | } |
538 | return QChar::Null; |
539 | } |
540 | |
541 | QString KCharsets::toEntity(const QChar &ch) |
542 | { |
543 | return QString::asprintf(format: "�x%x;" , ch.unicode()); |
544 | } |
545 | |
546 | QString KCharsets::resolveEntities(const QString &input) |
547 | { |
548 | QString text = input; |
549 | const QChar *p = text.unicode(); |
550 | const QChar *end = p + text.length(); |
551 | const QChar *ampersand = nullptr; |
552 | bool scanForSemicolon = false; |
553 | |
554 | for (; p < end; ++p) { |
555 | const QChar ch = *p; |
556 | |
557 | if (ch == QLatin1Char('&')) { |
558 | ampersand = p; |
559 | scanForSemicolon = true; |
560 | continue; |
561 | } |
562 | |
563 | if (ch != QLatin1Char(';') || scanForSemicolon == false) { |
564 | continue; |
565 | } |
566 | |
567 | assert(ampersand); |
568 | |
569 | scanForSemicolon = false; |
570 | |
571 | const QChar *entityBegin = ampersand + 1; |
572 | |
573 | const uint entityLength = p - entityBegin; |
574 | if (entityLength == 0) { |
575 | continue; |
576 | } |
577 | |
578 | const QChar entityValue = KCharsets::fromEntity(str: QStringView(entityBegin, entityLength)); |
579 | if (entityValue.isNull()) { |
580 | continue; |
581 | } |
582 | |
583 | const uint ampersandPos = ampersand - text.unicode(); |
584 | |
585 | text[(int)ampersandPos] = entityValue; |
586 | text.remove(i: ampersandPos + 1, len: entityLength + 1); |
587 | p = text.unicode() + ampersandPos; |
588 | end = text.unicode() + text.length(); |
589 | ampersand = nullptr; |
590 | } |
591 | |
592 | return text; |
593 | } |
594 | |
595 | QStringList KCharsets::availableEncodingNames() const |
596 | { |
597 | QStringList available; |
598 | for (const int *p = language_for_encoding_indices; *p != -1; p += 2) { |
599 | available.append(t: QString::fromUtf8(utf8: language_for_encoding_string + *p)); |
600 | } |
601 | available.sort(); |
602 | return available; |
603 | } |
604 | |
605 | QString KCharsets::descriptionForEncoding(QStringView encoding) const |
606 | { |
607 | const char *lang = kcharsets_array_search(start: language_for_encoding_string, indices: language_for_encoding_indices, entry: encoding.toUtf8().data()); |
608 | if (lang) { |
609 | return tr(sourceText: "%1 ( %2 )" , disambiguation: "@item %1 character set, %2 encoding" ).arg(args: tr(sourceText: lang, disambiguation: "@item Text character set" ), args&: encoding); |
610 | } else { |
611 | return tr(sourceText: "Other encoding (%1)" , disambiguation: "@item" ).arg(a: encoding); |
612 | } |
613 | } |
614 | |
615 | QString KCharsets::encodingForName(const QString &descriptiveName) const |
616 | { |
617 | const int left = descriptiveName.lastIndexOf(c: QLatin1Char('(')); |
618 | |
619 | if (left < 0) { // No parenthesis, so assume it is a normal encoding name |
620 | return descriptiveName.trimmed(); |
621 | } |
622 | |
623 | QString name(descriptiveName.mid(position: left + 1)); |
624 | |
625 | const int right = name.lastIndexOf(c: QLatin1Char(')')); |
626 | |
627 | if (right < 0) { |
628 | return name; |
629 | } |
630 | |
631 | return name.left(n: right).trimmed(); |
632 | } |
633 | |
634 | QStringList KCharsets::descriptiveEncodingNames() const |
635 | { |
636 | QStringList encodings; |
637 | for (const int *p = language_for_encoding_indices; *p != -1; p += 2) { |
638 | const QString name = QString::fromUtf8(utf8: language_for_encoding_string + p[0]); |
639 | const QString description = tr(sourceText: language_for_encoding_string + p[1], disambiguation: "@item Text character set" ); |
640 | encodings.append(t: tr(sourceText: "%1 ( %2 )" , disambiguation: "@item Text encoding: %1 character set, %2 encoding" ).arg(args: description, args: name)); |
641 | } |
642 | encodings.sort(); |
643 | return encodings; |
644 | } |
645 | |
646 | QList<QStringList> KCharsets::encodingsByScript() const |
647 | { |
648 | if (!d->encodingsByScript.isEmpty()) { |
649 | return d->encodingsByScript; |
650 | } |
651 | int i; |
652 | for (const int *p = language_for_encoding_indices; *p != -1; p += 2) { |
653 | const QString name = QString::fromUtf8(utf8: language_for_encoding_string + p[0]); |
654 | const QString description = tr(sourceText: language_for_encoding_string + p[1], disambiguation: "@item Text character set" ); |
655 | |
656 | for (i = 0; i < d->encodingsByScript.size(); ++i) { |
657 | if (d->encodingsByScript.at(i).at(i: 0) == description) { |
658 | d->encodingsByScript[i].append(t: name); |
659 | break; |
660 | } |
661 | } |
662 | |
663 | if (i == d->encodingsByScript.size()) { |
664 | d->encodingsByScript.append(t: QStringList() << description << name); |
665 | } |
666 | } |
667 | return d->encodingsByScript; |
668 | } |
669 | |
670 | KCharsets *KCharsets::charsets() |
671 | { |
672 | return &globalCharsets()->instance; |
673 | } |
674 | |