qicucodec.cpp source code [qtbase/src/corelib/codecs/qicucodec.cpp]

1	/****************************************************************************
2	**
3	** Copyright (C) 2016 The Qt Company Ltd.
4	** Contact: https://www.qt.io/licensing/
5	**
6	** This file is part of the QtCore module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial License Usage
10	** Licensees holding valid commercial Qt licenses may use this file in
11	** accordance with the commercial license agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and The Qt Company. For licensing terms
14	** and conditions see https://www.qt.io/terms-conditions. For further
15	** information use the contact form at https://www.qt.io/contact-us.
16	**
17	** GNU Lesser General Public License Usage
18	** Alternatively, this file may be used under the terms of the GNU Lesser
19	** General Public License version 3 as published by the Free Software
20	** Foundation and appearing in the file LICENSE.LGPL3 included in the
21	** packaging of this file. Please review the following information to
22	** ensure the GNU Lesser General Public License version 3 requirements
23	** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24	**
25	** GNU General Public License Usage
26	** Alternatively, this file may be used under the terms of the GNU
27	** General Public License version 2.0 or (at your option) the GNU General
28	** Public license version 3 or any later version approved by the KDE Free
29	** Qt Foundation. The licenses are as published by the Free Software
30	** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31	** included in the packaging of this file. Please review the following
32	** information to ensure the GNU General Public License requirements will
33	** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34	** https://www.gnu.org/licenses/gpl-3.0.html.
35	**
36	** $QT_END_LICENSE$
37	**
38	****************************************************************************/
39
40	#include "qicucodec_p.h"
41
42	#include "qtextcodec_p.h"
43	#include "qutfcodec_p.h"
44	#include "qlatincodec_p.h"
45	#include "qsimplecodec_p.h"
46	#include "private/qcoreglobaldata_p.h"
47	#include "qdebug.h"
48
49	#include "unicode/ucnv.h"
50
51	#if QT_CONFIG(codecs)
52	#include "qtsciicodec_p.h"
53	#include "qisciicodec_p.h"
54	#endif
55
56	QT_BEGIN_NAMESPACE
57
58	typedef QList<QTextCodec*>::ConstIterator TextCodecListConstIt;
59	typedef QList<QByteArray>::ConstIterator ByteArrayListConstIt;
60
61	static void qIcuCodecStateFree(QTextCodec::ConverterState *state)
62	{
63	ucnv_close(converter: static_cast<UConverter *>(state->d));
64	}
65
66	bool qTextCodecNameMatch(const char n, const* char *h)
67	{
68	return ucnv_compareNames(name1: n, name2: h) == `0`;
69	}
70
71	/ The list below is generated from http://www.iana.org/assignments/character-sets/*
72	using the snippet of code below:
73
74	#include <QtCore>
75	#include <unicode/ucnv.h>
76
77	int main(int argc, char argv)
78	{
79	QCoreApplication app(argc, argv);
80
81	QFile file("character-sets.txt");
82	file.open(QFile::ReadOnly);
83	QByteArray name;
84	int mib = -1;
85	QByteArray nameList;
86	int pos = 0;
87	while (!file.atEnd()) {
88	QByteArray s = file.readLine().trimmed();
89	if (s.isEmpty()) {
90	if (mib != -1) {
91	UErrorCode error = U_ZERO_ERROR;
92	const char standard_name = ucnv_getStandardName(name, "MIME", &error);*
93	if (U_FAILURE(error) \|\| !standard_name) {
94	error = U_ZERO_ERROR;
95	standard_name = ucnv_getStandardName(name, "IANA", &error);
96	}
97	UConverter conv = ucnv_open(standard_name, &error);*
98	if (!U_FAILURE(error) && conv && standard_name) {
99	ucnv_close(conv);
100	printf(" { %d, %d },\n", mib, pos);
101	nameList += "\"";
102	nameList += standard_name;
103	nameList += "\\0\"\n";
104	pos += strlen(standard_name) + 1;
105	}
106	}
107	name = QByteArray();
108	mib = -1;
109	}
110	if (s.startsWith("Name: ")) {
111	name = s.mid(5).trimmed();
112	if (name.indexOf(' ') > 0)
113	name = name.left(name.indexOf(' '));
114	}
115	if (s.startsWith("MIBenum:"))
116	mib = s.mid(8).trimmed().toInt();
117	if (s.startsWith("Alias:") && s.contains("MIME")) {
118	name = s.mid(6).trimmed();
119	name = name.left(name.indexOf(' ')).trimmed();
120	}
121	}
122	qDebug() << nameList;
123	}
124	*/
125
126	struct MibToName {
127	short mib;
128	short index;
129	};
130
131	static const MibToName mibToName[] = {
132	{ .mib: `3`, .index: `0` },
133	{ .mib: `4`, .index: `9` },
134	{ .mib: `5`, .index: `20` },
135	{ .mib: `6`, .index: `31` },
136	{ .mib: `7`, .index: `42` },
137	{ .mib: `8`, .index: `53` },
138	{ .mib: `9`, .index: `64` },
139	{ .mib: `10`, .index: `75` },
140	{ .mib: `11`, .index: `86` },
141	{ .mib: `12`, .index: `97` },
142	{ .mib: `13`, .index: `108` },
143	{ .mib: `16`, .index: `120` },
144	{ .mib: `17`, .index: `134` },
145	{ .mib: `18`, .index: `144` },
146	{ .mib: `30`, .index: `151` },
147	{ .mib: `36`, .index: `160` },
148	{ .mib: `37`, .index: `167` },
149	{ .mib: `38`, .index: `179` },
150	{ .mib: `39`, .index: `186` },
151	{ .mib: `40`, .index: `198` },
152	{ .mib: `57`, .index: `212` },
153	{ .mib: `81`, .index: `223` },
154	{ .mib: `82`, .index: `234` },
155	{ .mib: `84`, .index: `245` },
156	{ .mib: `85`, .index: `256` },
157	{ .mib: `104`, .index: `267` },
158	{ .mib: `105`, .index: `279` },
159	{ .mib: `106`, .index: `295` },
160	{ .mib: `109`, .index: `301` },
161	{ .mib: `110`, .index: `313` },
162	{ .mib: `111`, .index: `325` },
163	{ .mib: `113`, .index: `337` },
164	{ .mib: `114`, .index: `341` },
165	{ .mib: `1000`, .index: `349` },
166	{ .mib: `1001`, .index: `356` },
167	{ .mib: `1011`, .index: `363` },
168	{ .mib: `1012`, .index: `368` },
169	{ .mib: `1013`, .index: `374` },
170	{ .mib: `1014`, .index: `383` },
171	{ .mib: `1015`, .index: `392` },
172	{ .mib: `1016`, .index: `399` },
173	{ .mib: `1017`, .index: `406` },
174	{ .mib: `1018`, .index: `413` },
175	{ .mib: `1019`, .index: `422` },
176	{ .mib: `1020`, .index: `431` },
177	{ .mib: `2004`, .index: `438` },
178	{ .mib: `2005`, .index: `448` },
179	{ .mib: `2009`, .index: `472` },
180	{ .mib: `2013`, .index: `479` },
181	{ .mib: `2016`, .index: `486` },
182	{ .mib: `2024`, .index: `495` },
183	{ .mib: `2025`, .index: `505` },
184	{ .mib: `2026`, .index: `512` },
185	{ .mib: `2027`, .index: `517` },
186	{ .mib: `2028`, .index: `527` },
187	{ .mib: `2030`, .index: `534` },
188	{ .mib: `2033`, .index: `541` },
189	{ .mib: `2034`, .index: `548` },
190	{ .mib: `2035`, .index: `555` },
191	{ .mib: `2037`, .index: `562` },
192	{ .mib: `2038`, .index: `569` },
193	{ .mib: `2039`, .index: `576` },
194	{ .mib: `2040`, .index: `583` },
195	{ .mib: `2041`, .index: `590` },
196	{ .mib: `2043`, .index: `597` },
197	{ .mib: `2011`, .index: `604` },
198	{ .mib: `2044`, .index: `611` },
199	{ .mib: `2045`, .index: `618` },
200	{ .mib: `2010`, .index: `624` },
201	{ .mib: `2046`, .index: `631` },
202	{ .mib: `2047`, .index: `638` },
203	{ .mib: `2048`, .index: `645` },
204	{ .mib: `2049`, .index: `652` },
205	{ .mib: `2050`, .index: `659` },
206	{ .mib: `2051`, .index: `666` },
207	{ .mib: `2052`, .index: `673` },
208	{ .mib: `2053`, .index: `680` },
209	{ .mib: `2054`, .index: `687` },
210	{ .mib: `2055`, .index: `694` },
211	{ .mib: `2056`, .index: `701` },
212	{ .mib: `2062`, .index: `708` },
213	{ .mib: `2063`, .index: `715` },
214	{ .mib: `2084`, .index: `723` },
215	{ .mib: `2085`, .index: `730` },
216	{ .mib: `2086`, .index: `741` },
217	{ .mib: `2087`, .index: `748` },
218	{ .mib: `2088`, .index: `755` },
219	{ .mib: `2089`, .index: `762` },
220	{ .mib: `2091`, .index: `771` },
221	{ .mib: `2092`, .index: `780` },
222	{ .mib: `2093`, .index: `789` },
223	{ .mib: `2094`, .index: `798` },
224	{ .mib: `2095`, .index: `807` },
225	{ .mib: `2096`, .index: `816` },
226	{ .mib: `2097`, .index: `825` },
227	{ .mib: `2098`, .index: `834` },
228	{ .mib: `2099`, .index: `843` },
229	{ .mib: `2100`, .index: `852` },
230	{ .mib: `2101`, .index: `861` },
231	{ .mib: `2102`, .index: `872` },
232	{ .mib: `2250`, .index: `880` },
233	{ .mib: `2251`, .index: `893` },
234	{ .mib: `2252`, .index: `906` },
235	{ .mib: `2253`, .index: `919` },
236	{ .mib: `2254`, .index: `932` },
237	{ .mib: `2255`, .index: `945` },
238	{ .mib: `2256`, .index: `958` },
239	{ .mib: `2257`, .index: `971` },
240	{ .mib: `2258`, .index: `984` },
241	{ .mib: `2259`, .index: `997` },
242	};
243	int mibToNameSize = sizeof(mibToName)/sizeof(MibToName);
244
245	static const char mibToNameTable[] =
246	"US-ASCII\0"
247	"ISO-8859-1\0"
248	"ISO-8859-2\0"
249	"ISO-8859-3\0"
250	"ISO-8859-4\0"
251	"ISO-8859-5\0"
252	"ISO-8859-6\0"
253	"ISO-8859-7\0"
254	"ISO-8859-8\0"
255	"ISO-8859-9\0"
256	"ISO-8859-10\0"
257	"ISO-2022-JP-1\0"
258	"Shift_JIS\0"
259	"EUC-JP\0"
260	"US-ASCII\0"
261	"EUC-KR\0"
262	"ISO-2022-KR\0"
263	"EUC-KR\0"
264	"ISO-2022-JP\0"
265	"ISO-2022-JP-2\0"
266	"GB_2312-80\0"
267	"ISO-8859-6\0"
268	"ISO-8859-6\0"
269	"ISO-8859-8\0"
270	"ISO-8859-8\0"
271	"ISO-2022-CN\0"
272	"ISO-2022-CN-EXT\0"
273	"UTF-8\0"
274	"ISO-8859-13\0"
275	"ISO-8859-14\0"
276	"ISO-8859-15\0"
277	"GBK\0"
278	"GB18030\0"
279	"UTF-16\0"
280	"UTF-32\0"
281	"SCSU\0"
282	"UTF-7\0"
283	"UTF-16BE\0"
284	"UTF-16LE\0"
285	"UTF-16\0"
286	"CESU-8\0"
287	"UTF-32\0"
288	"UTF-32BE\0"
289	"UTF-32LE\0"
290	"BOCU-1\0"
291	"hp-roman8\0"
292	"Adobe-Standard-Encoding\0"
293	"IBM850\0"
294	"IBM862\0"
295	"IBM-Thai\0"
296	"Shift_JIS\0"
297	"GB2312\0"
298	"Big5\0"
299	"macintosh\0"
300	"IBM037\0"
301	"IBM273\0"
302	"IBM277\0"
303	"IBM278\0"
304	"IBM280\0"
305	"IBM284\0"
306	"IBM285\0"
307	"IBM290\0"
308	"IBM297\0"
309	"IBM420\0"
310	"IBM424\0"
311	"IBM437\0"
312	"IBM500\0"
313	"cp851\0"
314	"IBM852\0"
315	"IBM855\0"
316	"IBM857\0"
317	"IBM860\0"
318	"IBM861\0"
319	"IBM863\0"
320	"IBM864\0"
321	"IBM865\0"
322	"IBM868\0"
323	"IBM869\0"
324	"IBM870\0"
325	"IBM871\0"
326	"IBM918\0"
327	"IBM1026\0"
328	"KOI8-R\0"
329	"HZ-GB-2312\0"
330	"IBM866\0"
331	"IBM775\0"
332	"KOI8-U\0"
333	"IBM00858\0"
334	"IBM01140\0"
335	"IBM01141\0"
336	"IBM01142\0"
337	"IBM01143\0"
338	"IBM01144\0"
339	"IBM01145\0"
340	"IBM01146\0"
341	"IBM01147\0"
342	"IBM01148\0"
343	"IBM01149\0"
344	"Big5-HKSCS\0"
345	"IBM1047\0"
346	"windows-1250\0"
347	"windows-1251\0"
348	"windows-1252\0"
349	"windows-1253\0"
350	"windows-1254\0"
351	"windows-1255\0"
352	"windows-1256\0"
353	"windows-1257\0"
354	"windows-1258\0"
355	"TIS-620\0";
356
357	static QTextCodec loadQtCodec(const* char *name)
358	{
359	if (!strcmp(s1: name, s2: "UTF-8"))
360	return new QUtf8Codec;
361	if (!strcmp(s1: name, s2: "UTF-16"))
362	return new QUtf16Codec;
363	if (!strcmp(s1: name, s2: "ISO-8859-1"))
364	return new QLatin1Codec;
365	if (!strcmp(s1: name, s2: "UTF-16BE"))
366	return new QUtf16BECodec;
367	if (!strcmp(s1: name, s2: "UTF-16LE"))
368	return new QUtf16LECodec;
369	if (!strcmp(s1: name, s2: "UTF-32"))
370	return new QUtf32Codec;
371	if (!strcmp(s1: name, s2: "UTF-32BE"))
372	return new QUtf32BECodec;
373	if (!strcmp(s1: name, s2: "UTF-32LE"))
374	return new QUtf32LECodec;
375	if (!strcmp(s1: name, s2: "ISO-8859-16") \|\| !strcmp(s1: name, s2: "latin10") \|\| !strcmp(s1: name, s2: "iso-ir-226"))
376	return new QSimpleTextCodec (`13` / == 8859-16/);
377	#if QT_CONFIG(codecs)
378	if (!strcmp(s1: name, s2: "TSCII"))
379	return new QTsciiCodec;
380	if (!qstrnicmp(name, "iscii", len: `5`))
381	return QIsciiCodec::create(name);
382	#endif
383
384	return nullptr;
385	}
386
387	/// \threadsafe
388	QList<QByteArray> QIcuCodec::availableCodecs()
389	{
390	QList<QByteArray> codecs;
391	int n = ucnv_countAvailable();
392	for (int i = `0`; i < n; ++i) {
393	const char *name = ucnv_getAvailableName(n: i);
394
395	UErrorCode error = U_ZERO_ERROR;
396	const char *standardName = ucnv_getStandardName(name, standard: "MIME", pErrorCode: &error);
397	if (U_FAILURE(code: error) \|\| !standardName) {
398	error = U_ZERO_ERROR;
399	standardName = ucnv_getStandardName(name, standard: "IANA", pErrorCode: &error);
400	}
401	if (U_FAILURE(code: error))
402	continue;
403
404	error = U_ZERO_ERROR;
405	int ac = ucnv_countAliases(alias: standardName, pErrorCode: &error);
406	if (U_FAILURE(code: error))
407	continue;
408	for (int j = `0`; j < ac; ++j) {
409	error = U_ZERO_ERROR;
410	const char *alias = ucnv_getAlias(alias: standardName, n: j, pErrorCode: &error);
411	if (!U_SUCCESS(code: error))
412	continue;
413	codecs += alias;
414	}
415	}
416
417	// handled by Qt and not in ICU:
418	codecs += "TSCII";
419
420	return codecs;
421	}
422
423	/// \threadsafe
424	QList<int> QIcuCodec::availableMibs()
425	{
426	QList<int> mibs;
427	mibs.reserve(alloc: mibToNameSize + `1`);
428	for (int i = `0`; i < mibToNameSize; ++i)
429	mibs += mibToName[i].mib;
430
431	// handled by Qt and not in ICU:
432	mibs += `2107`; // TSCII
433
434	return mibs;
435	}
436
437	QTextCodec *QIcuCodec::defaultCodecUnlocked()
438	{
439	QCoreGlobalData *globalData = QCoreGlobalData::instance();
440	if (!globalData)
441	return nullptr;
442	QTextCodec *c = globalData->codecForLocale.loadAcquire();
443	if (c)
444	return c;
445
446	#if defined(QT_LOCALE_IS_UTF8)
447	const char *name = "UTF-8";
448	#else
449	const char *name = ucnv_getDefaultName();
450	#endif
451	c = codecForNameUnlocked(name);
452	globalData->codecForLocale.storeRelease(newValue: c);
453	return c;
454	}
455
456
457	QTextCodec QIcuCodec::codecForNameUnlocked(const* char *name)
458	{
459	// backwards compatibility with Qt 4.x
460	if (!qstrcmp(str1: name, str2: "CP949"))
461	name = "windows-949";
462	else if (!qstrcmp(str1: name, str2: "Apple Roman"))
463	name = "macintosh";
464	// these are broken data in ICU 4.4, and can't be resolved even though they are aliases to tis-620
465	if (!qstrcmp(str1: name, str2: "windows-874-2000")
466	\|\| !qstrcmp(str1: name, str2: "windows-874")
467	\|\| !qstrcmp(str1: name, str2: "MS874")
468	\|\| !qstrcmp(str1: name, str2: "x-windows-874")
469	\|\| !qstrcmp(str1: name, str2: "ISO 8859-11"))
470	name = "TIS-620";
471
472	UErrorCode error = U_ZERO_ERROR;
473	// MIME gives better default names
474	const char *standardName = ucnv_getStandardName(name, standard: "MIME", pErrorCode: &error);
475	if (U_FAILURE(code: error) \|\| !standardName) {
476	error = U_ZERO_ERROR;
477	standardName = ucnv_getStandardName(name, standard: "IANA", pErrorCode: &error);
478	}
479	bool qt_only = false;
480	if (U_FAILURE(code: error) \|\| !standardName) {
481	standardName = name;
482	qt_only = true;
483	} else {
484	// correct some issues where the ICU data set contains duplicated entries.
485	// Where this happens it's because one data set is a subset of another. We
486	// always use the larger data set.
487
488	if (qstrcmp(str1: standardName, str2: "GB2312") == `0` \|\| qstrcmp(str1: standardName, str2: "GB_2312-80") == `0`)
489	standardName = "GBK";
490	else if (qstrcmp(str1: standardName, str2: "KSC_5601") == `0` \|\| qstrcmp(str1: standardName, str2: "EUC-KR") == `0` \|\| qstrcmp(str1: standardName, str2: "cp1363") == `0`)
491	standardName = "windows-949";
492	}
493
494	QCoreGlobalData *globalData = QCoreGlobalData::instance();
495	QTextCodecCache *cache = &globalData->codecCache;
496
497	QTextCodec *codec;
498	if (cache) {
499	codec = cache->value(akey: standardName);
500	if (codec)
501	return codec;
502	}
503
504	for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) {
505	QTextCodec cursor = it;
506	if (qTextCodecNameMatch(n: cursor->name(), h: standardName)) {
507	if (cache)
508	cache->insert(akey: standardName, avalue: cursor);
509	return cursor;
510	}
511	QList<QByteArray> aliases = cursor->aliases();
512	for (ByteArrayListConstIt ait = aliases.constBegin(), acend = aliases.constEnd(); ait != acend; ++ait) {
513	if (qTextCodecNameMatch(n: *ait, h: standardName)) {
514	if (cache)
515	cache->insert(akey: standardName, avalue: cursor);
516	return cursor;
517	}
518	}
519	}
520
521	QTextCodec *c = loadQtCodec(name: standardName);
522	if (c)
523	return c;
524
525	if (qt_only)
526	return nullptr;
527
528	// check whether there is really a converter for the name available.
529	UConverter *conv = ucnv_open(converterName: standardName, err: &error);
530	if (!conv) {
531	qDebug(msg: "codecForName: ucnv_open failed %s %s", standardName, u_errorName(code: error));
532	return nullptr;
533	}
534	//qDebug() << "QIcuCodec: Standard name for " << name << "is" << standardName;
535	ucnv_close(converter: conv);
536
537
538	c = new QIcuCodec (standardName);
539	if (cache)
540	cache->insert(akey: standardName, avalue: c);
541	return c;
542	}
543
544
545	QTextCodec QIcuCodec::codecForMibUnlocked(int* mib)
546	{
547	for (int i = `0`; i < mibToNameSize; ++i) {
548	if (mibToName[i].mib == mib)
549	return codecForNameUnlocked(name: mibToNameTable + mibToName[i].index);
550	}
551
552	if (mib == `2107`)
553	return codecForNameUnlocked(name: "TSCII");
554
555	return nullptr;
556	}
557
558
559	QIcuCodec::QIcuCodec(const char *name)
560	: m_name(name)
561	{
562	}
563
564	QIcuCodec::~QIcuCodec()
565	{
566	}
567
568	/!*
569	\internal
570
571	Custom callback for the ICU from Unicode conversion. It's invoked when the
572	conversion from Unicode detects illegal or unrecognized character.
573
574	Assumes that context contains a pointer to QTextCodec::ConverterState
575	structure. Updates its invalid characters count and calls a default
576	callback, that replaces the invalid characters properly.
577	*/
578	static void customFromUnicodeSubstitutionCallback(const void *context,
579	UConverterFromUnicodeArgs *fromUArgs,
580	const UChar *codeUnits,
581	int32_t length,
582	UChar32 codePoint,
583	UConverterCallbackReason reason,
584	UErrorCode *err)
585	{
586	auto state = reinterpret_cast<QTextCodec::ConverterState >(const_cast<void *>(context));
587	if (state)
588	state->invalidChars++;
589	// Call the default callback that replaces all illegal or unrecognized
590	// sequences with the substitute string
591	UCNV_FROM_U_CALLBACK_SUBSTITUTE(context: nullptr, fromUArgs, codeUnits, length, codePoint, reason, err);
592	}
593
594	UConverter QIcuCodec::getConverter(QTextCodec::ConverterState state) const
595	{
596	UConverter conv = nullptr*;
597	if (state) {
598	if (!state->d) {
599	// first time
600	state->flags \|= QTextCodec::FreeFunction;
601	QTextCodecUnalignedPointer::encode(dst: state->state_data, fn: qIcuCodecStateFree);
602	UErrorCode error = U_ZERO_ERROR;
603	state->d = ucnv_open(converterName: m_name, err: &error);
604	ucnv_setSubstChars(converter: static_cast<UConverter *>(state->d),
605	subChars: state->flags & QTextCodec::ConvertInvalidToNull ? "\0" : "?", len: `1`, err: &error);
606	if (!U_FAILURE(code: error)) {
607	error = U_ZERO_ERROR;
608	ucnv_setFromUCallBack(converter: static_cast<UConverter *>(state->d),
609	newAction: customFromUnicodeSubstitutionCallback, newContext: state, oldAction: nullptr,
610	oldContext: nullptr, err: &error);
611	if (U_FAILURE(code: error)) {
612	qDebug(msg: "getConverter(state) failed to install custom callback. "
613	"canEncode() may report incorrect results.");
614	}
615	} else {
616	qDebug(msg: "getConverter(state) ucnv_open failed %s %s", m_name, u_errorName(code: error));
617	}
618	}
619	conv = static_cast<UConverter *>(state->d);
620	}
621	if (!conv) {
622	// stateless conversion
623	UErrorCode error = U_ZERO_ERROR;
624	conv = ucnv_open(converterName: m_name, err: &error);
625	ucnv_setSubstChars(converter: conv, subChars: "?", len: `1`, err: &error);
626	if (U_FAILURE(code: error))
627	qDebug(msg: "getConverter(no state) ucnv_open failed %s %s", m_name, u_errorName(code: error));
628	}
629	return conv;
630	}
631
632	QString QIcuCodec::convertToUnicode(const char chars, int* length, QTextCodec::ConverterState state) const*
633	{
634	UConverter *conv = getConverter(state);
635
636	QString string(length + `2`, Qt::Uninitialized);
637
638	const char *end = chars + length;
639	int convertedChars = `0`;
640	while (`1`) {
641	UChar uc = (UChar )string.data();
642	UChar *ucEnd = uc + string.length();
643	uc += convertedChars;
644	UErrorCode error = U_ZERO_ERROR;
645	ucnv_toUnicode(converter: conv,
646	target: &uc, targetLimit: ucEnd,
647	source: &chars, sourceLimit: end,
648	offsets: nullptr, flush: false, err: &error);
649	if (!U_SUCCESS(code: error) && error != U_BUFFER_OVERFLOW_ERROR) {
650	qDebug(msg: "convertToUnicode failed: %s", u_errorName(code: error));
651	break;
652	}
653
654	convertedChars = uc - (UChar *)string.data();
655	if (chars >= end)
656	break;
657	string.resize(size: string.length()*`2`);
658	}
659	string.resize(size: convertedChars);
660
661	if (!state)
662	ucnv_close(converter: conv);
663	return string;
664	}
665
666
667	QByteArray QIcuCodec::convertFromUnicode(const QChar unicode, int* length, QTextCodec::ConverterState state) const*
668	{
669	UConverter *conv = getConverter(state);
670
671	int requiredLength = UCNV_GET_MAX_BYTES_FOR_STRING(length, ucnv_getMaxCharSize(conv));
672	QByteArray string(requiredLength, Qt::Uninitialized);
673
674	const UChar uc = (const* UChar *)unicode;
675	const UChar *end = uc + length;
676	int convertedChars = `0`;
677	while (`1`) {
678	char ch = (char* *)string.data();
679	char *chEnd = ch + string.length();
680	ch += convertedChars;
681	UErrorCode error = U_ZERO_ERROR;
682	ucnv_fromUnicode(converter: conv,
683	target: &ch, targetLimit: chEnd,
684	source: &uc, sourceLimit: end,
685	offsets: nullptr, flush: false, err: &error);
686	if (!U_SUCCESS(code: error))
687	qDebug(msg: "convertFromUnicode failed: %s", u_errorName(code: error));
688	convertedChars = ch - string.data();
689	if (uc >= end)
690	break;
691	string.resize(size: string.length()*`2`);
692	}
693	string.resize(size: convertedChars);
694
695	if (!state)
696	ucnv_close(converter: conv);
697
698	return string;
699	}
700
701
702	QByteArray QIcuCodec::name() const
703	{
704	return m_name;
705	}
706
707
708	QList<QByteArray> QIcuCodec::aliases() const
709	{
710	UErrorCode error = U_ZERO_ERROR;
711
712	int n = ucnv_countAliases(alias: m_name, pErrorCode: &error);
713
714	QList<QByteArray> aliases;
715	for (int i = `0`; i < n; ++i) {
716	const char *a = ucnv_getAlias(alias: m_name, n: i, pErrorCode: &error);
717	// skip the canonical name
718	if (!a \|\| !qstrcmp(str1: a, str2: m_name))
719	continue;
720	aliases += a;
721	}
722
723	return aliases;
724	}
725
726
727	int QIcuCodec::mibEnum() const
728	{
729	for (int i = `0`; i < mibToNameSize; ++i) {
730	if (qTextCodecNameMatch(n: m_name, h: (mibToNameTable + mibToName[i].index)))
731	return mibToName[i].mib;
732	}
733
734	return `0`;
735	}
736
737	QT_END_NAMESPACE
738

Provided by KDAB

Definitions

source code of qtbase/src/corelib/codecs/qicucodec.cpp