1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2018 The Qt Company Ltd. |
4 | ** Copyright (C) 2018 Intel Corporation. |
5 | ** Contact: https://www.qt.io/licensing/ |
6 | ** |
7 | ** This file is part of the QtCore module of the Qt Toolkit. |
8 | ** |
9 | ** $QT_BEGIN_LICENSE:LGPL$ |
10 | ** Commercial License Usage |
11 | ** Licensees holding valid commercial Qt licenses may use this file in |
12 | ** accordance with the commercial license agreement provided with the |
13 | ** Software or, alternatively, in accordance with the terms contained in |
14 | ** a written agreement between you and The Qt Company. For licensing terms |
15 | ** and conditions see https://www.qt.io/terms-conditions. For further |
16 | ** information use the contact form at https://www.qt.io/contact-us. |
17 | ** |
18 | ** GNU Lesser General Public License Usage |
19 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
20 | ** General Public License version 3 as published by the Free Software |
21 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
22 | ** packaging of this file. Please review the following information to |
23 | ** ensure the GNU Lesser General Public License version 3 requirements |
24 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
25 | ** |
26 | ** GNU General Public License Usage |
27 | ** Alternatively, this file may be used under the terms of the GNU |
28 | ** General Public License version 2.0 or (at your option) the GNU General |
29 | ** Public license version 3 or any later version approved by the KDE Free |
30 | ** Qt Foundation. The licenses are as published by the Free Software |
31 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
32 | ** included in the packaging of this file. Please review the following |
33 | ** information to ensure the GNU General Public License requirements will |
34 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
35 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
36 | ** |
37 | ** $QT_END_LICENSE$ |
38 | ** |
39 | ****************************************************************************/ |
40 | |
41 | #include "qplatformdefs.h" |
42 | |
43 | #include "qtextcodec.h" |
44 | #include "qtextcodec_p.h" |
45 | |
46 | #include "qbytearraymatcher.h" |
47 | #include "qendian.h" |
48 | #include "qfile.h" |
49 | #include "qlist.h" |
50 | #include <private/qlocking_p.h> |
51 | #include "qstringlist.h" |
52 | #include "qvarlengtharray.h" |
53 | #if !defined(QT_BOOTSTRAPPED) |
54 | #include <private/qcoreapplication_p.h> |
55 | #endif |
56 | #include "private/qcoreglobaldata_p.h" |
57 | |
58 | #include "qutfcodec_p.h" |
59 | #include "qlatincodec_p.h" |
60 | |
61 | #if !defined(QT_BOOTSTRAPPED) |
62 | #if QT_CONFIG(codecs) |
63 | # include "qtsciicodec_p.h" |
64 | # include "qisciicodec_p.h" |
65 | #endif |
66 | #if QT_CONFIG(icu) |
67 | #include "qicucodec_p.h" |
68 | #else |
69 | #if QT_CONFIG(iconv) |
70 | # include "qiconvcodec_p.h" |
71 | #endif |
72 | #ifdef Q_OS_WIN |
73 | # include "qwindowscodec_p.h" |
74 | #endif |
75 | # include "qsimplecodec_p.h" |
76 | #if QT_CONFIG(big_codecs) |
77 | # ifndef Q_OS_INTEGRITY |
78 | # include "qgb18030codec_p.h" |
79 | # include "qeucjpcodec_p.h" |
80 | # include "qjiscodec_p.h" |
81 | # include "qsjiscodec_p.h" |
82 | # include "qeuckrcodec_p.h" |
83 | # include "qbig5codec_p.h" |
84 | # endif // !Q_OS_INTEGRITY |
85 | #endif // big_codecs |
86 | |
87 | #endif // icu |
88 | #endif // QT_BOOTSTRAPPED |
89 | |
90 | #include <mutex> |
91 | |
92 | #include <stdlib.h> |
93 | #include <ctype.h> |
94 | #include <locale.h> |
95 | #if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_ANDROID) |
96 | # include <langinfo.h> |
97 | #endif |
98 | |
99 | QT_BEGIN_NAMESPACE |
100 | |
101 | typedef QList<QTextCodec*>::ConstIterator TextCodecListConstIt; |
102 | typedef QList<QByteArray>::ConstIterator ByteArrayListConstIt; |
103 | |
104 | Q_GLOBAL_STATIC(QRecursiveMutex, textCodecsMutex); |
105 | |
106 | class TextCodecsMutexLocker |
107 | { |
108 | using Lock = decltype(qt_unique_lock(mutex&: std::declval<QRecursiveMutex&>())); |
109 | // ### FIXME: this is used when textCodecsMutex already == nullptr |
110 | const Lock lock = qt_unique_lock(mutex: textCodecsMutex()); |
111 | public: |
112 | TextCodecsMutexLocker() {} // required d/t an ICC 19 bug |
113 | }; |
114 | |
115 | #if !QT_CONFIG(icu) |
116 | static char qtolower(char c) |
117 | { if (c >= 'A' && c <= 'Z') return c + 0x20; return c; } |
118 | static bool qisalnum(char c) |
119 | { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); } |
120 | |
121 | bool qTextCodecNameMatch(const char *n, const char *h) |
122 | { |
123 | if (qstricmp(n, h) == 0) |
124 | return true; |
125 | |
126 | // if the letters and numbers are the same, we have a match |
127 | while (*n != '\0') { |
128 | if (qisalnum(*n)) { |
129 | for (;;) { |
130 | if (*h == '\0') |
131 | return false; |
132 | if (qisalnum(*h)) |
133 | break; |
134 | ++h; |
135 | } |
136 | if (qtolower(*n) != qtolower(*h)) |
137 | return false; |
138 | ++h; |
139 | } |
140 | ++n; |
141 | } |
142 | while (*h && !qisalnum(*h)) |
143 | ++h; |
144 | return (*h == '\0'); |
145 | } |
146 | |
147 | |
148 | #if !defined(Q_OS_WIN32) && !defined(QT_LOCALE_IS_UTF8) |
149 | static QTextCodec *checkForCodec(const QByteArray &name) { |
150 | QTextCodec *c = QTextCodec::codecForName(name); |
151 | if (!c) { |
152 | const int index = name.indexOf('@'); |
153 | if (index != -1) { |
154 | c = QTextCodec::codecForName(name.left(index)); |
155 | } |
156 | } |
157 | return c; |
158 | } |
159 | #endif |
160 | |
161 | static void setup(); |
162 | |
163 | // \threadsafe |
164 | // this returns the codec the method sets up as locale codec to |
165 | // avoid a race condition in codecForLocale() when |
166 | // setCodecForLocale(0) is called at the same time. |
167 | static QTextCodec *setupLocaleMapper() |
168 | { |
169 | QCoreGlobalData *globalData = QCoreGlobalData::instance(); |
170 | |
171 | QTextCodec *locale = nullptr; |
172 | |
173 | { |
174 | const TextCodecsMutexLocker locker; |
175 | if (globalData->allCodecs.isEmpty()) |
176 | setup(); |
177 | } |
178 | |
179 | #if !defined(QT_BOOTSTRAPPED) |
180 | QCoreApplicationPrivate::initLocale(); |
181 | #endif |
182 | |
183 | #if defined(QT_LOCALE_IS_UTF8) |
184 | locale = QTextCodec::codecForName("UTF-8" ); |
185 | #elif defined(Q_OS_WIN) |
186 | locale = QTextCodec::codecForName("System" ); |
187 | #else |
188 | |
189 | // First try getting the codecs name from nl_langinfo and see |
190 | // if we have a builtin codec for it. |
191 | // Only fall back to using iconv if we can't find a builtin codec |
192 | // This is because the builtin utf8 codec is around 5 times faster |
193 | // then the using QIconvCodec |
194 | |
195 | #if defined (_XOPEN_UNIX) |
196 | char *charset = nl_langinfo(CODESET); |
197 | if (charset) |
198 | locale = QTextCodec::codecForName(charset); |
199 | #endif |
200 | #if QT_CONFIG(iconv) |
201 | if (!locale) { |
202 | // no builtin codec for the locale found, let's try using iconv |
203 | (void) new QIconvCodec(); |
204 | locale = QTextCodec::codecForName("System" ); |
205 | } |
206 | #endif |
207 | |
208 | if (!locale) { |
209 | // Very poorly defined and followed standards causes lots of |
210 | // code to try to get all the cases... This logic is |
211 | // duplicated in QIconvCodec, so if you change it here, change |
212 | // it there too. |
213 | |
214 | // Try to determine locale codeset from locale name assigned to |
215 | // LC_CTYPE category. |
216 | |
217 | // First part is getting that locale name. First try setlocale() which |
218 | // definitely knows it, but since we cannot fully trust it, get ready |
219 | // to fall back to environment variables. |
220 | const QByteArray ctype = setlocale(LC_CTYPE, nullptr); |
221 | |
222 | // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG |
223 | // environment variables. |
224 | QByteArray lang = qgetenv("LC_ALL" ); |
225 | if (lang.isEmpty() || lang == "C" ) { |
226 | lang = qgetenv("LC_CTYPE" ); |
227 | } |
228 | if (lang.isEmpty() || lang == "C" ) { |
229 | lang = qgetenv("LANG" ); |
230 | } |
231 | |
232 | // Now try these in order: |
233 | // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) |
234 | // 2. CODESET from lang if it contains a .CODESET part |
235 | // 3. ctype (maybe the locale is named "ISO-8859-1" or something) |
236 | // 4. locale (ditto) |
237 | // 5. check for "@euro" |
238 | // 6. guess locale from ctype unless ctype is "C" |
239 | // 7. guess locale from lang |
240 | |
241 | // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) |
242 | int indexOfDot = ctype.indexOf('.'); |
243 | if (indexOfDot != -1) |
244 | locale = checkForCodec( ctype.mid(indexOfDot + 1) ); |
245 | |
246 | // 2. CODESET from lang if it contains a .CODESET part |
247 | if (!locale) { |
248 | indexOfDot = lang.indexOf('.'); |
249 | if (indexOfDot != -1) |
250 | locale = checkForCodec( lang.mid(indexOfDot + 1) ); |
251 | } |
252 | |
253 | // 3. ctype (maybe the locale is named "ISO-8859-1" or something) |
254 | if (!locale && !ctype.isEmpty() && ctype != "C" ) |
255 | locale = checkForCodec(ctype); |
256 | |
257 | // 4. locale (ditto) |
258 | if (!locale && !lang.isEmpty()) |
259 | locale = checkForCodec(lang); |
260 | |
261 | // 5. "@euro" |
262 | if ((!locale && ctype.contains("@euro" )) || lang.contains("@euro" )) |
263 | locale = checkForCodec("ISO 8859-15" ); |
264 | } |
265 | |
266 | #endif |
267 | // If everything failed, we default to 8859-1 |
268 | if (!locale) |
269 | locale = QTextCodec::codecForName("ISO 8859-1" ); |
270 | globalData->codecForLocale.storeRelease(locale); |
271 | return locale; |
272 | } |
273 | |
274 | |
275 | // textCodecsMutex need to be locked to enter this function |
276 | static void setup() |
277 | { |
278 | static bool initialized = false; |
279 | if (initialized) |
280 | return; |
281 | initialized = true; |
282 | |
283 | #if QT_CONFIG(codecs) && !defined(QT_BOOTSTRAPPED) |
284 | (void)new QTsciiCodec; |
285 | for (int i = 0; i < 9; ++i) |
286 | (void)new QIsciiCodec(i); |
287 | for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i) |
288 | (void)new QSimpleTextCodec(i); |
289 | |
290 | # if QT_CONFIG(big_codecs) && !defined(Q_OS_INTEGRITY) |
291 | (void)new QGb18030Codec; |
292 | (void)new QGbkCodec; |
293 | (void)new QGb2312Codec; |
294 | (void)new QEucJpCodec; |
295 | (void)new QJisCodec; |
296 | (void)new QSjisCodec; |
297 | (void)new QEucKrCodec; |
298 | (void)new QCP949Codec; |
299 | (void)new QBig5Codec; |
300 | (void)new QBig5hkscsCodec; |
301 | # endif // big_codecs && !Q_OS_INTEGRITY |
302 | #if QT_CONFIG(iconv) |
303 | (void) new QIconvCodec; |
304 | #endif |
305 | #if defined(Q_OS_WIN32) |
306 | (void) new QWindowsLocalCodec; |
307 | #endif // Q_OS_WIN32 |
308 | #endif // codecs && !QT_BOOTSTRAPPED |
309 | |
310 | (void)new QUtf16Codec; |
311 | (void)new QUtf16BECodec; |
312 | (void)new QUtf16LECodec; |
313 | (void)new QUtf32Codec; |
314 | (void)new QUtf32BECodec; |
315 | (void)new QUtf32LECodec; |
316 | (void)new QLatin15Codec; |
317 | (void)new QLatin1Codec; |
318 | (void)new QUtf8Codec; |
319 | } |
320 | #else |
321 | static void setup() {} |
322 | #endif // icu |
323 | |
324 | /*! |
325 | \enum QTextCodec::ConversionFlag |
326 | |
327 | \value DefaultConversion No flag is set. |
328 | \value ConvertInvalidToNull If this flag is set, each invalid input |
329 | character is output as a null character. |
330 | \value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any. |
331 | |
332 | \omitvalue FreeFunction |
333 | */ |
334 | |
335 | /*! |
336 | \fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags) |
337 | |
338 | Constructs a ConverterState object initialized with the given \a flags. |
339 | */ |
340 | |
341 | /*! |
342 | Destroys the ConverterState object. |
343 | */ |
344 | QTextCodec::ConverterState::~ConverterState() |
345 | { |
346 | if (flags & FreeFunction) |
347 | (QTextCodecUnalignedPointer::decode(src: state_data))(this); |
348 | else if (d) |
349 | free(ptr: d); |
350 | } |
351 | |
352 | /*! |
353 | \class QTextCodec |
354 | \inmodule QtCore |
355 | \brief The QTextCodec class provides conversions between text encodings. |
356 | \reentrant |
357 | \ingroup i18n |
358 | |
359 | Qt uses Unicode to store, draw and manipulate strings. In many |
360 | situations you may wish to deal with data that uses a different |
361 | encoding. For example, most Japanese documents are still stored |
362 | in Shift-JIS or ISO 2022-JP, while Russian users often have their |
363 | documents in KOI8-R or Windows-1251. |
364 | |
365 | Qt provides a set of QTextCodec classes to help with converting |
366 | non-Unicode formats to and from Unicode. You can also create your |
367 | own codec classes. |
368 | |
369 | The supported encodings are: |
370 | |
371 | \list |
372 | \li \l{Big5 Text Codec}{Big5} |
373 | \li \l{Big5-HKSCS Text Codec}{Big5-HKSCS} |
374 | \li CP949 |
375 | \li \l{EUC-JP Text Codec}{EUC-JP} |
376 | \li \l{EUC-KR Text Codec}{EUC-KR} |
377 | \li \l{GBK Text Codec}{GB18030} |
378 | \li HP-ROMAN8 |
379 | \li IBM 850 |
380 | \li IBM 866 |
381 | \li IBM 874 |
382 | \li \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP} |
383 | \li ISO 8859-1 to 10 |
384 | \li ISO 8859-13 to 16 |
385 | \li Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml |
386 | \li KOI8-R |
387 | \li KOI8-U |
388 | \li Macintosh |
389 | \li \l{Shift-JIS Text Codec}{Shift-JIS} |
390 | \li TIS-620 |
391 | \li \l{TSCII Text Codec}{TSCII} |
392 | \li UTF-8 |
393 | \li UTF-16 |
394 | \li UTF-16BE |
395 | \li UTF-16LE |
396 | \li UTF-32 |
397 | \li UTF-32BE |
398 | \li UTF-32LE |
399 | \li Windows-1250 to 1258 |
400 | \endlist |
401 | |
402 | If Qt is compiled with ICU support enabled, most codecs supported by |
403 | ICU will also be available to the application. |
404 | |
405 | \l {QTextCodec}s can be used as follows to convert some locally encoded |
406 | string to Unicode. Suppose you have some string encoded in Russian |
407 | KOI8-R encoding, and want to convert it to Unicode. The simple way |
408 | to do it is like this: |
409 | |
410 | \snippet code/src_corelib_codecs_qtextcodec.cpp 0 |
411 | |
412 | After this, \c string holds the text converted to Unicode. |
413 | Converting a string from Unicode to the local encoding is just as |
414 | easy: |
415 | |
416 | \snippet code/src_corelib_codecs_qtextcodec.cpp 1 |
417 | |
418 | To read or write files in various encodings, use QTextStream and |
419 | its \l{QTextStream::setCodec()}{setCodec()} function. See the |
420 | \l{tools/codecs}{Codecs} example for an application of QTextCodec |
421 | to file I/O. |
422 | |
423 | Some care must be taken when trying to convert the data in chunks, |
424 | for example, when receiving it over a network. In such cases it is |
425 | possible that a multi-byte character will be split over two |
426 | chunks. At best this might result in the loss of a character and |
427 | at worst cause the entire conversion to fail. |
428 | |
429 | The approach to use in these situations is to create a QTextDecoder |
430 | object for the codec and use this QTextDecoder for the whole |
431 | decoding process, as shown below: |
432 | |
433 | \snippet code/src_corelib_codecs_qtextcodec.cpp 2 |
434 | |
435 | The QTextDecoder object maintains state between chunks and therefore |
436 | works correctly even if a multi-byte character is split between |
437 | chunks. |
438 | |
439 | \section1 Creating Your Own Codec Class |
440 | |
441 | Support for new text encodings can be added to Qt by creating |
442 | QTextCodec subclasses. |
443 | |
444 | The pure virtual functions describe the encoder to the system and |
445 | the coder is used as required in the different text file formats |
446 | supported by QTextStream, and under X11, for the locale-specific |
447 | character input and output. |
448 | |
449 | To add support for another encoding to Qt, make a subclass of |
450 | QTextCodec and implement the functions listed in the table below. |
451 | |
452 | \table |
453 | \header \li Function \li Description |
454 | |
455 | \row \li name() |
456 | \li Returns the official name for the encoding. If the |
457 | encoding is listed in the |
458 | \l{IANA character-sets encoding file}, the name |
459 | should be the preferred MIME name for the encoding. |
460 | |
461 | \row \li aliases() |
462 | \li Returns a list of alternative names for the encoding. |
463 | QTextCodec provides a default implementation that returns |
464 | an empty list. For example, "ISO-8859-1" has "latin1", |
465 | "CP819", "IBM819", and "iso-ir-100" as aliases. |
466 | |
467 | \row \li \l{QTextCodec::mibEnum()}{mibEnum()} |
468 | \li Return the MIB enum for the encoding if it is listed in |
469 | the \l{IANA character-sets encoding file}. |
470 | |
471 | \row \li convertToUnicode() |
472 | \li Converts an 8-bit character string to Unicode. |
473 | |
474 | \row \li convertFromUnicode() |
475 | \li Converts a Unicode string to an 8-bit character string. |
476 | \endtable |
477 | |
478 | \sa QTextStream, QTextDecoder, QTextEncoder, {Text Codecs Example} |
479 | */ |
480 | |
481 | /*! |
482 | Constructs a QTextCodec, and gives it the highest precedence. The |
483 | QTextCodec should always be constructed on the heap (i.e. with \c |
484 | new). Qt takes ownership and will delete it when the application |
485 | terminates. |
486 | */ |
487 | QTextCodec::QTextCodec() |
488 | { |
489 | const TextCodecsMutexLocker locker; |
490 | |
491 | QCoreGlobalData *globalInstance = QCoreGlobalData::instance(); |
492 | if (globalInstance->allCodecs.isEmpty()) |
493 | setup(); |
494 | |
495 | globalInstance->allCodecs.prepend(t: this); |
496 | } |
497 | |
498 | |
499 | /*! |
500 | \nonreentrant |
501 | |
502 | Destroys the QTextCodec. Note that you should not delete codecs |
503 | yourself: once created they become Qt's responsibility. |
504 | */ |
505 | QTextCodec::~QTextCodec() |
506 | { |
507 | QCoreGlobalData *globalData = QCoreGlobalData::instance(); |
508 | if (!globalData) |
509 | return; |
510 | |
511 | globalData->codecForLocale.testAndSetRelaxed(expectedValue: this, newValue: nullptr); |
512 | |
513 | const TextCodecsMutexLocker locker; |
514 | |
515 | globalData->allCodecs.removeOne(t: this); |
516 | |
517 | auto it = globalData->codecCache.begin(); |
518 | |
519 | while (it != globalData->codecCache.end()) { |
520 | if (it.value() == this) |
521 | it = globalData->codecCache.erase(it); |
522 | else |
523 | ++it; |
524 | } |
525 | } |
526 | |
527 | /*! |
528 | \fn QTextCodec *QTextCodec::codecForName(const char *name) |
529 | |
530 | Searches all installed QTextCodec objects and returns the one |
531 | which best matches \a name; the match is case-insensitive. Returns |
532 | 0 if no codec matching the name \a name could be found. |
533 | */ |
534 | |
535 | /*! |
536 | \threadsafe |
537 | Searches all installed QTextCodec objects and returns the one |
538 | which best matches \a name; the match is case-insensitive. Returns |
539 | 0 if no codec matching the name \a name could be found. |
540 | */ |
541 | QTextCodec *QTextCodec::codecForName(const QByteArray &name) |
542 | { |
543 | if (name.isEmpty()) |
544 | return nullptr; |
545 | |
546 | const TextCodecsMutexLocker locker; |
547 | |
548 | QCoreGlobalData *globalData = QCoreGlobalData::instance(); |
549 | if (!globalData) |
550 | return nullptr; |
551 | setup(); |
552 | |
553 | #if !QT_CONFIG(icu) |
554 | QTextCodecCache *cache = &globalData->codecCache; |
555 | QTextCodec *codec; |
556 | codec = cache->value(name); |
557 | if (codec) |
558 | return codec; |
559 | |
560 | for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) { |
561 | QTextCodec *cursor = *it; |
562 | if (qTextCodecNameMatch(cursor->name(), name)) { |
563 | if (cache) |
564 | cache->insert(name, cursor); |
565 | return cursor; |
566 | } |
567 | QList<QByteArray> aliases = cursor->aliases(); |
568 | for (ByteArrayListConstIt ait = aliases.constBegin(), acend = aliases.constEnd(); ait != acend; ++ait) { |
569 | if (qTextCodecNameMatch(*ait, name)) { |
570 | cache->insert(name, cursor); |
571 | return cursor; |
572 | } |
573 | } |
574 | } |
575 | |
576 | return nullptr; |
577 | #else |
578 | return QIcuCodec::codecForNameUnlocked(name); |
579 | #endif |
580 | } |
581 | |
582 | |
583 | /*! |
584 | \threadsafe |
585 | Returns the QTextCodec which matches the |
586 | \l{QTextCodec::mibEnum()}{MIBenum} \a mib. |
587 | */ |
588 | QTextCodec* QTextCodec::codecForMib(int mib) |
589 | { |
590 | const TextCodecsMutexLocker locker; |
591 | |
592 | QCoreGlobalData *globalData = QCoreGlobalData::instance(); |
593 | if (!globalData) |
594 | return nullptr; |
595 | if (globalData->allCodecs.isEmpty()) |
596 | setup(); |
597 | |
598 | QByteArray key = "MIB: " + QByteArray::number(mib); |
599 | |
600 | QTextCodecCache *cache = &globalData->codecCache; |
601 | QTextCodec *codec; |
602 | if (cache) { |
603 | codec = cache->value(akey: key); |
604 | if (codec) |
605 | return codec; |
606 | } |
607 | |
608 | for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) { |
609 | QTextCodec *cursor = *it; |
610 | if (cursor->mibEnum() == mib) { |
611 | if (cache) |
612 | cache->insert(akey: key, avalue: cursor); |
613 | return cursor; |
614 | } |
615 | } |
616 | |
617 | #if QT_CONFIG(icu) |
618 | return QIcuCodec::codecForMibUnlocked(mib); |
619 | #else |
620 | return nullptr; |
621 | #endif |
622 | } |
623 | |
624 | /*! |
625 | \threadsafe |
626 | Returns the list of all available codecs, by name. Call |
627 | QTextCodec::codecForName() to obtain the QTextCodec for the name. |
628 | |
629 | The list may contain many mentions of the same codec |
630 | if the codec has aliases. |
631 | |
632 | \sa availableMibs(), name(), aliases() |
633 | */ |
634 | QList<QByteArray> QTextCodec::availableCodecs() |
635 | { |
636 | const TextCodecsMutexLocker locker; |
637 | |
638 | QCoreGlobalData *globalData = QCoreGlobalData::instance(); |
639 | if (globalData->allCodecs.isEmpty()) |
640 | setup(); |
641 | |
642 | QList<QByteArray> codecs; |
643 | |
644 | for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) { |
645 | codecs += (*it)->name(); |
646 | codecs += (*it)->aliases(); |
647 | } |
648 | |
649 | #if QT_CONFIG(icu) |
650 | codecs += QIcuCodec::availableCodecs(); |
651 | #endif |
652 | |
653 | return codecs; |
654 | } |
655 | |
656 | /*! |
657 | \threadsafe |
658 | Returns the list of MIBs for all available codecs. Call |
659 | QTextCodec::codecForMib() to obtain the QTextCodec for the MIB. |
660 | |
661 | \sa availableCodecs(), mibEnum() |
662 | */ |
663 | QList<int> QTextCodec::availableMibs() |
664 | { |
665 | #if QT_CONFIG(icu) |
666 | return QIcuCodec::availableMibs(); |
667 | #else |
668 | const TextCodecsMutexLocker locker; |
669 | |
670 | QCoreGlobalData *globalData = QCoreGlobalData::instance(); |
671 | if (globalData->allCodecs.isEmpty()) |
672 | setup(); |
673 | |
674 | QList<int> codecs; |
675 | |
676 | for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) |
677 | codecs += (*it)->mibEnum(); |
678 | |
679 | return codecs; |
680 | #endif |
681 | } |
682 | |
683 | /*! |
684 | \nonreentrant |
685 | |
686 | Set the codec to \a c; this will be returned by |
687 | codecForLocale(). If \a c is \nullptr, the codec is reset to |
688 | the default. |
689 | |
690 | This might be needed for some applications that want to use their |
691 | own mechanism for setting the locale. |
692 | |
693 | \sa codecForLocale() |
694 | */ |
695 | void QTextCodec::setCodecForLocale(QTextCodec *c) |
696 | { |
697 | QCoreGlobalData::instance()->codecForLocale.storeRelease(newValue: c); |
698 | } |
699 | |
700 | /*! |
701 | \threadsafe |
702 | Returns a pointer to the codec most suitable for this locale. |
703 | |
704 | The codec will be retrieved from ICU where that backend is in use, otherwise |
705 | it may be obtained from an OS-specific API. In the latter case, the codec's |
706 | name may be "System". |
707 | */ |
708 | |
709 | QTextCodec* QTextCodec::codecForLocale() |
710 | { |
711 | QCoreGlobalData *globalData = QCoreGlobalData::instance(); |
712 | if (!globalData) |
713 | return nullptr; |
714 | |
715 | QTextCodec *codec = globalData->codecForLocale.loadAcquire(); |
716 | if (!codec) { |
717 | #if QT_CONFIG(icu) |
718 | const TextCodecsMutexLocker locker; |
719 | codec = QIcuCodec::defaultCodecUnlocked(); |
720 | #else |
721 | // setupLocaleMapper locks as necessary |
722 | codec = setupLocaleMapper(); |
723 | #endif |
724 | } |
725 | |
726 | return codec; |
727 | } |
728 | |
729 | |
730 | /*! |
731 | \fn QByteArray QTextCodec::name() const |
732 | |
733 | QTextCodec subclasses must reimplement this function. It returns |
734 | the name of the encoding supported by the subclass. |
735 | |
736 | If the codec is registered as a character set in the |
737 | \l{IANA character-sets encoding file} this method should |
738 | return the preferred mime name for the codec if defined, |
739 | otherwise its name. |
740 | */ |
741 | |
742 | /*! |
743 | \fn int QTextCodec::mibEnum() const |
744 | |
745 | Subclasses of QTextCodec must reimplement this function. It |
746 | returns the \l{QTextCodec::mibEnum()}{MIBenum} (see \l{IANA character-sets encoding file} |
747 | for more information). It is important that each QTextCodec |
748 | subclass returns the correct unique value for this function. |
749 | */ |
750 | |
751 | /*! |
752 | Subclasses can return a number of aliases for the codec in question. |
753 | |
754 | Standard aliases for codecs can be found in the |
755 | \l{IANA character-sets encoding file}. |
756 | */ |
757 | QList<QByteArray> QTextCodec::aliases() const |
758 | { |
759 | return QList<QByteArray>(); |
760 | } |
761 | |
762 | /*! |
763 | \fn QString QTextCodec::convertToUnicode(const char *chars, int len, |
764 | ConverterState *state) const |
765 | |
766 | QTextCodec subclasses must reimplement this function. |
767 | |
768 | Converts the first \a len characters of \a chars from the |
769 | encoding of the subclass to Unicode, and returns the result in a |
770 | QString. |
771 | |
772 | \a state can be \nullptr, in which case the conversion is stateless and |
773 | default conversion rules should be used. If state is not 0, the |
774 | codec should save the state after the conversion in \a state, and |
775 | adjust the \c remainingChars and \c invalidChars members of the struct. |
776 | */ |
777 | |
778 | /*! |
779 | \fn QByteArray QTextCodec::convertFromUnicode(const QChar *input, int number, |
780 | ConverterState *state) const |
781 | |
782 | QTextCodec subclasses must reimplement this function. |
783 | |
784 | Converts the first \a number of characters from the \a input array |
785 | from Unicode to the encoding of the subclass, and returns the result |
786 | in a QByteArray. |
787 | |
788 | \a state can be \nullptr in which case the conversion is stateless and |
789 | default conversion rules should be used. If state is not 0, the |
790 | codec should save the state after the conversion in \a state, and |
791 | adjust the \c remainingChars and \c invalidChars members of the struct. |
792 | */ |
793 | |
794 | /*! |
795 | Creates a QTextDecoder with a specified \a flags to decode chunks |
796 | of \c{char *} data to create chunks of Unicode data. |
797 | |
798 | The caller is responsible for deleting the returned object. |
799 | |
800 | \since 4.7 |
801 | */ |
802 | QTextDecoder* QTextCodec::makeDecoder(QTextCodec::ConversionFlags flags) const |
803 | { |
804 | return new QTextDecoder(this, flags); |
805 | } |
806 | |
807 | /*! |
808 | Creates a QTextEncoder with a specified \a flags to encode chunks |
809 | of Unicode data as \c{char *} data. |
810 | |
811 | The caller is responsible for deleting the returned object. |
812 | |
813 | \since 4.7 |
814 | */ |
815 | QTextEncoder* QTextCodec::makeEncoder(QTextCodec::ConversionFlags flags) const |
816 | { |
817 | return new QTextEncoder(this, flags); |
818 | } |
819 | |
820 | /*! |
821 | \fn QByteArray QTextCodec::fromUnicode(const QChar *input, int number, |
822 | ConverterState *state) const |
823 | |
824 | Converts the first \a number of characters from the \a input array |
825 | from Unicode to the encoding of this codec, and returns the result |
826 | in a QByteArray. |
827 | |
828 | The \a state of the convertor used is updated. |
829 | */ |
830 | |
831 | #if QT_STRINGVIEW_LEVEL < 2 |
832 | /*! |
833 | Converts \a str from Unicode to the encoding of this codec, and |
834 | returns the result in a QByteArray. |
835 | */ |
836 | QByteArray QTextCodec::fromUnicode(const QString& str) const |
837 | { |
838 | return convertFromUnicode(in: str.constData(), length: str.length(), state: nullptr); |
839 | } |
840 | #endif |
841 | |
842 | /*! |
843 | \overload |
844 | \since 5.10 |
845 | |
846 | Converts \a str from Unicode to the encoding of this codec, and |
847 | returns the result in a QByteArray. |
848 | */ |
849 | QByteArray QTextCodec::fromUnicode(QStringView str) const |
850 | { |
851 | return convertFromUnicode(in: str.data(), length: str.length(), state: nullptr); |
852 | } |
853 | |
854 | /*! |
855 | \fn QString QTextCodec::toUnicode(const char *input, int size, |
856 | ConverterState *state) const |
857 | |
858 | Converts the first \a size characters from the \a input from the |
859 | encoding of this codec to Unicode, and returns the result in a |
860 | QString. |
861 | |
862 | The \a state of the convertor used is updated. |
863 | */ |
864 | |
865 | /*! |
866 | Converts \a a from the encoding of this codec to Unicode, and |
867 | returns the result in a QString. |
868 | */ |
869 | QString QTextCodec::toUnicode(const QByteArray& a) const |
870 | { |
871 | return convertToUnicode(in: a.constData(), length: a.length(), state: nullptr); |
872 | } |
873 | |
874 | /*! |
875 | Returns \c true if the Unicode character \a ch can be fully encoded |
876 | with this codec; otherwise returns \c false. |
877 | */ |
878 | bool QTextCodec::canEncode(QChar ch) const |
879 | { |
880 | ConverterState state; |
881 | state.flags = ConvertInvalidToNull; |
882 | convertFromUnicode(in: &ch, length: 1, state: &state); |
883 | return (state.invalidChars == 0); |
884 | } |
885 | |
886 | #if QT_STRINGVIEW_LEVEL < 2 |
887 | /*! |
888 | \overload |
889 | |
890 | \a s contains the string being tested for encode-ability. |
891 | */ |
892 | bool QTextCodec::canEncode(const QString& s) const |
893 | { |
894 | ConverterState state; |
895 | state.flags = ConvertInvalidToNull; |
896 | convertFromUnicode(in: s.constData(), length: s.length(), state: &state); |
897 | return (state.invalidChars == 0); |
898 | } |
899 | #endif |
900 | |
901 | /*! |
902 | \overload |
903 | \since 5.10 |
904 | |
905 | Returns \c true if the Unicode string \a s can be fully encoded |
906 | with this codec; otherwise returns \c false. |
907 | */ |
908 | bool QTextCodec::canEncode(QStringView s) const |
909 | { |
910 | ConverterState state; |
911 | state.flags = ConvertInvalidToNull; |
912 | convertFromUnicode(in: s.data(), length: s.length(), state: &state); |
913 | return !state.invalidChars; |
914 | } |
915 | /*! |
916 | \overload |
917 | |
918 | \a chars contains the source characters. |
919 | */ |
920 | QString QTextCodec::toUnicode(const char *chars) const |
921 | { |
922 | int len = qstrlen(str: chars); |
923 | return convertToUnicode(in: chars, length: len, state: nullptr); |
924 | } |
925 | |
926 | |
927 | /*! |
928 | \class QTextEncoder |
929 | \inmodule QtCore |
930 | \brief The QTextEncoder class provides a state-based encoder. |
931 | \reentrant |
932 | \ingroup i18n |
933 | |
934 | A text encoder converts text from Unicode into an encoded text format |
935 | using a specific codec. |
936 | |
937 | The encoder converts Unicode into another format, remembering any |
938 | state that is required between calls. |
939 | |
940 | \sa QTextCodec::makeEncoder(), QTextDecoder |
941 | */ |
942 | |
943 | /*! |
944 | \fn QTextEncoder::QTextEncoder(const QTextCodec *codec) |
945 | |
946 | Constructs a text encoder for the given \a codec. |
947 | */ |
948 | |
949 | /*! |
950 | Constructs a text encoder for the given \a codec and conversion \a flags. |
951 | |
952 | \since 4.7 |
953 | */ |
954 | QTextEncoder::QTextEncoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags) |
955 | : c(codec), state() |
956 | { |
957 | state.flags = flags; |
958 | } |
959 | |
960 | /*! |
961 | Destroys the encoder. |
962 | */ |
963 | QTextEncoder::~QTextEncoder() |
964 | { |
965 | } |
966 | |
967 | /*! |
968 | \internal |
969 | \since 4.5 |
970 | Determines whether the eecoder encountered a failure while decoding the input. If |
971 | an error was encountered, the produced result is undefined, and gets converted as according |
972 | to the conversion flags. |
973 | */ |
974 | bool QTextEncoder::hasFailure() const |
975 | { |
976 | return state.invalidChars != 0; |
977 | } |
978 | |
979 | #if QT_STRINGVIEW_LEVEL < 2 |
980 | /*! |
981 | Converts the Unicode string \a str into an encoded QByteArray. |
982 | */ |
983 | QByteArray QTextEncoder::fromUnicode(const QString& str) |
984 | { |
985 | QByteArray result = c->fromUnicode(in: str.constData(), length: str.length(), state: &state); |
986 | return result; |
987 | } |
988 | #endif |
989 | |
990 | /*! |
991 | \overload |
992 | \since 5.10 |
993 | Converts the Unicode string \a str into an encoded QByteArray. |
994 | */ |
995 | QByteArray QTextEncoder::fromUnicode(QStringView str) |
996 | { |
997 | return c->fromUnicode(in: str.data(), length: str.length(), state: &state); |
998 | } |
999 | |
1000 | /*! |
1001 | \overload |
1002 | |
1003 | Converts \a len characters (not bytes) from \a uc, and returns the |
1004 | result in a QByteArray. |
1005 | */ |
1006 | QByteArray QTextEncoder::fromUnicode(const QChar *uc, int len) |
1007 | { |
1008 | QByteArray result = c->fromUnicode(in: uc, length: len, state: &state); |
1009 | return result; |
1010 | } |
1011 | |
1012 | /*! |
1013 | \class QTextDecoder |
1014 | \inmodule QtCore |
1015 | \brief The QTextDecoder class provides a state-based decoder. |
1016 | \reentrant |
1017 | \ingroup i18n |
1018 | |
1019 | A text decoder converts text from an encoded text format into Unicode |
1020 | using a specific codec. |
1021 | |
1022 | The decoder converts text in this format into Unicode, remembering any |
1023 | state that is required between calls. |
1024 | |
1025 | \sa QTextCodec::makeDecoder(), QTextEncoder |
1026 | */ |
1027 | |
1028 | /*! |
1029 | \fn QTextDecoder::QTextDecoder(const QTextCodec *codec) |
1030 | |
1031 | Constructs a text decoder for the given \a codec. |
1032 | */ |
1033 | |
1034 | /*! |
1035 | Constructs a text decoder for the given \a codec and conversion \a flags. |
1036 | |
1037 | \since 4.7 |
1038 | */ |
1039 | |
1040 | QTextDecoder::QTextDecoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags) |
1041 | : c(codec), state() |
1042 | { |
1043 | state.flags = flags; |
1044 | } |
1045 | |
1046 | /*! |
1047 | Destroys the decoder. |
1048 | */ |
1049 | QTextDecoder::~QTextDecoder() |
1050 | { |
1051 | } |
1052 | |
1053 | /*! |
1054 | \fn QString QTextDecoder::toUnicode(const char *chars, int len) |
1055 | |
1056 | Converts the first \a len bytes in \a chars to Unicode, returning |
1057 | the result. |
1058 | |
1059 | If not all characters are used (e.g. if only part of a multi-byte |
1060 | encoding is at the end of the characters), the decoder remembers |
1061 | enough state to continue with the next call to this function. |
1062 | */ |
1063 | QString QTextDecoder::toUnicode(const char *chars, int len) |
1064 | { |
1065 | return c->toUnicode(in: chars, length: len, state: &state); |
1066 | } |
1067 | |
1068 | // in qstring.cpp: |
1069 | void qt_from_latin1(ushort *dst, const char *str, size_t size) noexcept; |
1070 | |
1071 | /*! \overload |
1072 | |
1073 | The converted string is returned in \a target. |
1074 | */ |
1075 | void QTextDecoder::toUnicode(QString *target, const char *chars, int len) |
1076 | { |
1077 | Q_ASSERT(target); |
1078 | switch (c->mibEnum()) { |
1079 | case 106: // utf8 |
1080 | static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state); |
1081 | break; |
1082 | case 4: // latin1 |
1083 | target->resize(size: len); |
1084 | qt_from_latin1(dst: (ushort*)target->data(), str: chars, size: len); |
1085 | break; |
1086 | default: |
1087 | *target = c->toUnicode(in: chars, length: len, state: &state); |
1088 | } |
1089 | } |
1090 | |
1091 | |
1092 | /*! |
1093 | \overload |
1094 | |
1095 | Converts the bytes in the byte array specified by \a ba to Unicode |
1096 | and returns the result. |
1097 | */ |
1098 | QString QTextDecoder::toUnicode(const QByteArray &ba) |
1099 | { |
1100 | return c->toUnicode(in: ba.constData(), length: ba.length(), state: &state); |
1101 | } |
1102 | |
1103 | /*! |
1104 | \since 4.4 |
1105 | |
1106 | Tries to detect the encoding of the provided snippet of HTML in |
1107 | the given byte array, \a ba, by checking the BOM (Byte Order Mark) |
1108 | and the content-type meta header and returns a QTextCodec instance |
1109 | that is capable of decoding the html to unicode. If the codec |
1110 | cannot be detected from the content provided, \a defaultCodec is |
1111 | returned. |
1112 | |
1113 | \sa codecForUtfText() |
1114 | */ |
1115 | QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec) |
1116 | { |
1117 | // determine charset |
1118 | QTextCodec *c = QTextCodec::codecForUtfText(ba, defaultCodec: nullptr); |
1119 | if (!c) { |
1120 | static Q_RELAXED_CONSTEXPR auto matcher = qMakeStaticByteArrayMatcher(pattern: "meta " ); |
1121 | QByteArray = ba.left(len: 1024).toLower(); |
1122 | int pos = matcher.indexIn(haystack: header); |
1123 | if (pos != -1) { |
1124 | static Q_RELAXED_CONSTEXPR auto matcher = qMakeStaticByteArrayMatcher(pattern: "charset=" ); |
1125 | pos = matcher.indexIn(haystack: header, from: pos); |
1126 | if (pos != -1) { |
1127 | pos += qstrlen(str: "charset=" ); |
1128 | |
1129 | int pos2 = pos; |
1130 | // The attribute can be closed with either """, "'", ">" or "/", |
1131 | // none of which are valid charset characters. |
1132 | while (++pos2 < header.size()) { |
1133 | char ch = header.at(i: pos2); |
1134 | if (ch == '\"' || ch == '\'' || ch == '>') { |
1135 | QByteArray name = header.mid(index: pos, len: pos2 - pos); |
1136 | if (name == "unicode" ) // QTBUG-41998, ICU will return UTF-16. |
1137 | name = QByteArrayLiteral("UTF-8" ); |
1138 | c = QTextCodec::codecForName(name); |
1139 | return c ? c : defaultCodec; |
1140 | } |
1141 | } |
1142 | } |
1143 | } |
1144 | } |
1145 | if (!c) |
1146 | c = defaultCodec; |
1147 | |
1148 | return c; |
1149 | } |
1150 | |
1151 | /*! |
1152 | \overload |
1153 | |
1154 | Tries to detect the encoding of the provided snippet of HTML in |
1155 | the given byte array, \a ba, by checking the BOM (Byte Order Mark) |
1156 | and the content-type meta header and returns a QTextCodec instance |
1157 | that is capable of decoding the html to unicode. If the codec cannot |
1158 | be detected, this overload returns a Latin-1 QTextCodec. |
1159 | */ |
1160 | QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba) |
1161 | { |
1162 | return codecForHtml(ba, defaultCodec: QTextCodec::codecForName(name: "ISO-8859-1" )); |
1163 | } |
1164 | |
1165 | /*! |
1166 | \since 4.6 |
1167 | |
1168 | Tries to detect the encoding of the provided snippet \a ba by |
1169 | using the BOM (Byte Order Mark) and returns a QTextCodec instance |
1170 | that is capable of decoding the text to unicode. This function can |
1171 | detect one of the following codecs: |
1172 | |
1173 | \list |
1174 | \li UTF-32 Little Endian |
1175 | \li UTF-32 Big Endian |
1176 | \li UTF-16 Little Endian |
1177 | \li UTF-16 Big Endian |
1178 | \li UTF-8 |
1179 | \endlist |
1180 | |
1181 | If the codec cannot be detected from the content provided, \a defaultCodec |
1182 | is returned. |
1183 | |
1184 | \sa codecForHtml() |
1185 | */ |
1186 | QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec) |
1187 | { |
1188 | const int arraySize = ba.size(); |
1189 | const uchar *buf = reinterpret_cast<const uchar *>(ba.constData()); |
1190 | const uint bom = 0xfeff; |
1191 | |
1192 | if (arraySize > 3) { |
1193 | uint uc = qFromUnaligned<uint>(src: buf); |
1194 | if (uc == qToBigEndian(source: bom)) |
1195 | return QTextCodec::codecForMib(mib: 1018); // utf-32 be |
1196 | else if (uc == qToLittleEndian(source: bom)) |
1197 | return QTextCodec::codecForMib(mib: 1019); // utf-32 le |
1198 | } |
1199 | |
1200 | if (arraySize < 2) |
1201 | return defaultCodec; |
1202 | |
1203 | ushort uc = qFromUnaligned<ushort>(src: buf); |
1204 | if (uc == qToBigEndian(source: ushort(bom))) |
1205 | return QTextCodec::codecForMib(mib: 1013); // utf16 be |
1206 | else if (uc == qToLittleEndian(source: ushort(bom))) |
1207 | return QTextCodec::codecForMib(mib: 1014); // utf16 le |
1208 | |
1209 | if (arraySize < 3) |
1210 | return defaultCodec; |
1211 | |
1212 | static const char utf8bom[] = "\xef\xbb\xbf" ; |
1213 | if (memcmp(s1: buf, s2: utf8bom, n: sizeof(utf8bom) - 1) == 0) |
1214 | return QTextCodec::codecForMib(mib: 106); // utf-8 |
1215 | |
1216 | return defaultCodec; |
1217 | } |
1218 | |
1219 | /*! |
1220 | \overload |
1221 | |
1222 | Tries to detect the encoding of the provided snippet \a ba by |
1223 | using the BOM (Byte Order Mark) and returns a QTextCodec instance |
1224 | that is capable of decoding the text to unicode. This function can |
1225 | detect one of the following codecs: |
1226 | |
1227 | \list |
1228 | \li UTF-32 Little Endian |
1229 | \li UTF-32 Big Endian |
1230 | \li UTF-16 Little Endian |
1231 | \li UTF-16 Big Endian |
1232 | \li UTF-8 |
1233 | \endlist |
1234 | |
1235 | If the codec cannot be detected from the content provided, this overload |
1236 | returns a Latin-1 QTextCodec. |
1237 | |
1238 | \sa codecForHtml() |
1239 | */ |
1240 | QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba) |
1241 | { |
1242 | return codecForUtfText(ba, defaultCodec: QTextCodec::codecForMib(/*Latin 1*/ mib: 4)); |
1243 | } |
1244 | |
1245 | /*! |
1246 | \fn QTextCodec * QTextCodec::codecForTr () |
1247 | \obsolete |
1248 | |
1249 | Returns the codec used by QObject::tr() on its argument. If this |
1250 | function returns \nullptr (the default), tr() assumes Latin-1. |
1251 | */ |
1252 | |
1253 | /*! |
1254 | \internal |
1255 | \since 4.3 |
1256 | Determines whether the decoder encountered a failure while decoding the |
1257 | input. If an error was encountered, the produced result is undefined, and |
1258 | gets converted as according to the conversion flags. |
1259 | */ |
1260 | bool QTextDecoder::hasFailure() const |
1261 | { |
1262 | return state.invalidChars != 0; |
1263 | } |
1264 | |
1265 | /*! |
1266 | \internal |
1267 | \since 5.12 |
1268 | |
1269 | Determines whether the decoder needs more bytes to continue decoding. That |
1270 | is, this signifies that the input string ended in the middle of a |
1271 | multi-byte sequence. Note that it's possible some codecs do not report this. |
1272 | */ |
1273 | bool QTextDecoder::needsMoreData() const |
1274 | { |
1275 | return state.remainingChars; |
1276 | } |
1277 | |
1278 | QT_END_NAMESPACE |
1279 | |