1 | /* |
2 | SPDX-FileCopyrightText: 2000-2001 Dawit Alemayehu <adawit@kde.org> |
3 | SPDX-FileCopyrightText: 2001 Rik Hemsley (rikkus) <rik@kde.org> |
4 | SPDX-FileCopyrightText: 2001-2002 Marc Mutz <mutz@kde.org> |
5 | |
6 | SPDX-License-Identifier: LGPL-2.0-only |
7 | |
8 | The encoding and decoding utilities in KCodecs with the exception of |
9 | quoted-printable are based on the java implementation in HTTPClient |
10 | package by Ronald Tschalär Copyright (C) 1996-1999. // krazy:exclude=copyright |
11 | |
12 | The quoted-printable codec as described in RFC 2045, section 6.7. is by |
13 | Rik Hemsley (C) 2001. |
14 | */ |
15 | |
16 | #include "kcodecs.h" |
17 | #include "kcharsets.h" |
18 | #include "kcharsets_p.h" |
19 | #include "kcodecs_debug.h" |
20 | #include "kcodecs_p.h" |
21 | #include "kcodecsbase64.h" |
22 | #include "kcodecsqp.h" |
23 | #include "kcodecsuuencode.h" |
24 | |
25 | #include <array> |
26 | #include <cassert> |
27 | #include <cstring> |
28 | #include <stdio.h> |
29 | #include <stdlib.h> |
30 | #include <string.h> |
31 | |
32 | #include <QDebug> |
33 | #include <QStringDecoder> |
34 | #include <QStringEncoder> |
35 | |
36 | #if defined(Q_OS_WIN) |
37 | #define strncasecmp _strnicmp |
38 | #endif |
39 | |
40 | namespace KCodecs |
41 | { |
42 | static QList<QByteArray> charsetCache; |
43 | |
44 | QByteArray cachedCharset(const QByteArray &name) |
45 | { |
46 | auto it = std::find_if(first: charsetCache.cbegin(), last: charsetCache.cend(), pred: [&name](const QByteArray &charset) { |
47 | return qstricmp(name.data(), charset.data()) == 0; |
48 | }); |
49 | if (it != charsetCache.cend()) { |
50 | return *it; |
51 | } |
52 | |
53 | charsetCache.append(t: name.toUpper()); |
54 | return charsetCache.last(); |
55 | } |
56 | |
57 | namespace CodecNames |
58 | { |
59 | QByteArray utf8() |
60 | { |
61 | return QByteArrayLiteral("UTF-8" ); |
62 | } |
63 | } |
64 | |
65 | Q_REQUIRED_RESULT |
66 | QByteArray updateEncodingCharset(const QByteArray ¤tCharset, const QByteArray &nextCharset) |
67 | { |
68 | if (!nextCharset.isEmpty()) { |
69 | if (currentCharset.isEmpty()) { |
70 | return nextCharset; |
71 | } |
72 | if (currentCharset != nextCharset) { |
73 | // only one charset per string supported, so change to superset charset UTF-8, |
74 | // which should cover any possible chars |
75 | return CodecNames::utf8(); |
76 | } |
77 | } |
78 | return currentCharset; |
79 | } |
80 | |
81 | } // namespace KCodecs |
82 | |
83 | /******************************** KCodecs ********************************/ |
84 | |
85 | QByteArray KCodecs::quotedPrintableEncode(QByteArrayView in, bool useCRLF) |
86 | { |
87 | Codec *codec = Codec::codecForName(name: "quoted-printable" ); |
88 | return codec->encode(src: in, newline: useCRLF ? Codec::NewlineCRLF : Codec::NewlineLF); |
89 | } |
90 | |
91 | void KCodecs::quotedPrintableEncode(QByteArrayView in, QByteArray &out, bool useCRLF) |
92 | { |
93 | out = quotedPrintableEncode(in, useCRLF: useCRLF ? Codec::NewlineCRLF : Codec::NewlineLF); |
94 | } |
95 | |
96 | QByteArray KCodecs::quotedPrintableDecode(QByteArrayView in) |
97 | { |
98 | Codec *codec = Codec::codecForName(name: "quoted-printable" ); |
99 | return codec->decode(src: in); |
100 | } |
101 | |
102 | void KCodecs::quotedPrintableDecode(QByteArrayView in, QByteArray &out) |
103 | { |
104 | out = quotedPrintableDecode(in); |
105 | } |
106 | |
107 | QByteArray KCodecs::base64Encode(QByteArrayView in) |
108 | { |
109 | Codec *codec = Codec::codecForName(name: "base64" ); |
110 | return codec->encode(src: in); |
111 | } |
112 | |
113 | void KCodecs::base64Encode(QByteArrayView in, QByteArray &out, bool insertLFs) |
114 | { |
115 | Q_UNUSED(insertLFs); |
116 | out = base64Encode(in); |
117 | } |
118 | |
119 | QByteArray KCodecs::base64Decode(QByteArrayView in) |
120 | { |
121 | Codec *codec = Codec::codecForName(name: "base64" ); |
122 | return codec->decode(src: in); |
123 | } |
124 | |
125 | void KCodecs::base64Decode(const QByteArrayView in, QByteArray &out) |
126 | { |
127 | out = base64Decode(in); |
128 | } |
129 | |
130 | QByteArray KCodecs::uudecode(QByteArrayView in) |
131 | { |
132 | Codec *codec = Codec::codecForName(name: "x-uuencode" ); |
133 | return codec->decode(src: in); |
134 | } |
135 | |
136 | void KCodecs::uudecode(QByteArrayView in, QByteArray &out) |
137 | { |
138 | out = uudecode(in); |
139 | } |
140 | |
141 | //@cond PRIVATE |
142 | |
143 | namespace KCodecs |
144 | { |
145 | // parse the encoded-word (scursor points to after the initial '=') |
146 | bool parseEncodedWord(const char *&scursor, |
147 | const char *const send, |
148 | QString *result, |
149 | QByteArray *language, |
150 | QByteArray *usedCS, |
151 | const QByteArray &defaultCS, |
152 | CharsetOption charsetOption) |
153 | { |
154 | assert(result); |
155 | assert(language); |
156 | |
157 | // make sure the caller already did a bit of the work. |
158 | assert(*(scursor - 1) == '='); |
159 | |
160 | // |
161 | // STEP 1: |
162 | // scan for the charset/language portion of the encoded-word |
163 | // |
164 | |
165 | char ch = *scursor++; |
166 | |
167 | if (ch != '?') { |
168 | // qCDebug(KCODECS_LOG) << "first"; |
169 | // qCDebug(KCODECS_LOG) << "Premature end of encoded word"; |
170 | return false; |
171 | } |
172 | |
173 | // remember start of charset (i.e. just after the initial "=?") and |
174 | // language (just after the first '*') fields: |
175 | const char *charsetStart = scursor; |
176 | const char *languageStart = nullptr; |
177 | |
178 | // find delimiting '?' (and the '*' separating charset and language |
179 | // tags, if any): |
180 | for (; scursor != send; scursor++) { |
181 | if (*scursor == '?') { |
182 | break; |
183 | } else if (*scursor == '*' && languageStart == nullptr) { |
184 | languageStart = scursor + 1; |
185 | } |
186 | } |
187 | |
188 | // not found? can't be an encoded-word! |
189 | if (scursor == send || *scursor != '?') { |
190 | // qCDebug(KCODECS_LOG) << "second"; |
191 | // qCDebug(KCODECS_LOG) << "Premature end of encoded word"; |
192 | return false; |
193 | } |
194 | |
195 | // extract the language information, if any (if languageStart is 0, |
196 | // language will be null, too): |
197 | QByteArray maybeLanguage(languageStart, scursor - languageStart); |
198 | // extract charset information (keep in mind: the size given to the |
199 | // ctor is one off due to the \0 terminator): |
200 | QByteArray maybeCharset(charsetStart, (languageStart ? languageStart - 1 : scursor) - charsetStart); |
201 | |
202 | // |
203 | // STEP 2: |
204 | // scan for the encoding portion of the encoded-word |
205 | // |
206 | |
207 | // remember start of encoding (just _after_ the second '?'): |
208 | scursor++; |
209 | const char *encodingStart = scursor; |
210 | |
211 | // find next '?' (ending the encoding tag): |
212 | for (; scursor != send; scursor++) { |
213 | if (*scursor == '?') { |
214 | break; |
215 | } |
216 | } |
217 | |
218 | // not found? Can't be an encoded-word! |
219 | if (scursor == send || *scursor != '?') { |
220 | // qCDebug(KCODECS_LOG) << "third"; |
221 | // qCDebug(KCODECS_LOG) << "Premature end of encoded word"; |
222 | return false; |
223 | } |
224 | |
225 | // extract the encoding information: |
226 | QByteArray maybeEncoding(encodingStart, scursor - encodingStart); |
227 | |
228 | // qCDebug(KCODECS_LOG) << "parseEncodedWord: found charset == \"" << maybeCharset |
229 | // << "\"; language == \"" << maybeLanguage |
230 | // << "\"; encoding == \"" << maybeEncoding << "\""; |
231 | |
232 | // |
233 | // STEP 3: |
234 | // scan for encoded-text portion of encoded-word |
235 | // |
236 | |
237 | // remember start of encoded-text (just after the third '?'): |
238 | scursor++; |
239 | const char *encodedTextStart = scursor; |
240 | |
241 | // find the '?=' sequence (ending the encoded-text): |
242 | for (; scursor != send; scursor++) { |
243 | if (*scursor == '?') { |
244 | if (scursor + 1 != send) { |
245 | if (*(scursor + 1) != '=') { // We expect a '=' after the '?', but we got something else; ignore |
246 | // qCDebug(KCODECS_LOG) << "Stray '?' in q-encoded word, ignoring this."; |
247 | continue; |
248 | } else { // yep, found a '?=' sequence |
249 | scursor += 2; |
250 | break; |
251 | } |
252 | } else { // The '?' is the last char, but we need a '=' after it! |
253 | // qCDebug(KCODECS_LOG) << "Premature end of encoded word"; |
254 | return false; |
255 | } |
256 | } |
257 | } |
258 | |
259 | if (*(scursor - 2) != '?' || *(scursor - 1) != '=' || scursor < encodedTextStart + 2) { |
260 | // qCDebug(KCODECS_LOG) << "Premature end of encoded word"; |
261 | return false; |
262 | } |
263 | |
264 | // set end sentinel for encoded-text: |
265 | const char *const encodedTextEnd = scursor - 2; |
266 | |
267 | // |
268 | // STEP 4: |
269 | // setup decoders for the transfer encoding and the charset |
270 | // |
271 | |
272 | // try if there's a codec for the encoding found: |
273 | Codec *codec = Codec::codecForName(name: maybeEncoding); |
274 | if (!codec) { |
275 | // qCDebug(KCODECS_LOG) << "Unknown encoding" << maybeEncoding; |
276 | return false; |
277 | } |
278 | |
279 | // get an instance of a corresponding decoder: |
280 | Decoder *dec = codec->makeDecoder(); |
281 | assert(dec); |
282 | |
283 | // try if there's a (text)codec for the charset found: |
284 | QByteArray cs; |
285 | QStringDecoder textCodec; |
286 | if (charsetOption == KCodecs::ForceDefaultCharset || maybeCharset.isEmpty()) { |
287 | textCodec = QStringDecoder(defaultCS.constData()); |
288 | cs = cachedCharset(name: defaultCS); |
289 | } else { |
290 | textCodec = QStringDecoder(maybeCharset.constData()); |
291 | if (!textCodec.isValid()) { // no suitable codec found => use default charset |
292 | textCodec = QStringDecoder(defaultCS.constData()); |
293 | cs = cachedCharset(name: defaultCS); |
294 | } else { |
295 | cs = cachedCharset(name: maybeCharset); |
296 | } |
297 | } |
298 | if (usedCS) { |
299 | *usedCS = updateEncodingCharset(currentCharset: *usedCS, nextCharset: cs); |
300 | } |
301 | |
302 | if (!textCodec.isValid()) { |
303 | // qCDebug(KCODECS_LOG) << "Unknown charset" << maybeCharset; |
304 | delete dec; |
305 | return false; |
306 | }; |
307 | |
308 | // qCDebug(KCODECS_LOG) << "mimeName(): \"" << textCodec->name() << "\""; |
309 | |
310 | // allocate a temporary buffer to store the 8bit text: |
311 | int encodedTextLength = encodedTextEnd - encodedTextStart; |
312 | QByteArray buffer; |
313 | buffer.resize(size: codec->maxDecodedSizeFor(insize: encodedTextLength)); |
314 | char *bbegin = buffer.data(); |
315 | char *bend = bbegin + buffer.length(); |
316 | |
317 | // |
318 | // STEP 5: |
319 | // do the actual decoding |
320 | // |
321 | |
322 | if (!dec->decode(scursor&: encodedTextStart, send: encodedTextEnd, dcursor&: bbegin, dend: bend)) { |
323 | qWarning() << codec->name() << "codec lies about its maxDecodedSizeFor(" << encodedTextLength << ")\nresult may be truncated" ; |
324 | } |
325 | |
326 | *result = textCodec.decode(ba: QByteArrayView(buffer.data(), bbegin - buffer.data())); |
327 | |
328 | // qCDebug(KCODECS_LOG) << "result now: \"" << result << "\""; |
329 | // cleanup: |
330 | delete dec; |
331 | *language = maybeLanguage; |
332 | |
333 | return true; |
334 | } |
335 | |
336 | } // namespace KCodecs |
337 | |
338 | //@endcond |
339 | |
340 | QString KCodecs::decodeRFC2047String(QStringView msg) |
341 | { |
342 | QByteArray usedCS; |
343 | return decodeRFC2047String(src: msg.toUtf8(), usedCS: &usedCS, defaultCS: CodecNames::utf8(), option: NoOption); |
344 | } |
345 | |
346 | QString KCodecs::decodeRFC2047String(QByteArrayView src, QByteArray *usedCS, const QByteArray &defaultCS, CharsetOption charsetOption) |
347 | { |
348 | QByteArray result; |
349 | QByteArray spaceBuffer; |
350 | const char *scursor = src.constData(); |
351 | const char *send = scursor + src.length(); |
352 | bool onlySpacesSinceLastWord = false; |
353 | if (usedCS) { |
354 | usedCS->clear(); |
355 | } |
356 | |
357 | while (scursor != send) { |
358 | // space |
359 | if (isspace(*scursor) && onlySpacesSinceLastWord) { |
360 | spaceBuffer += *scursor++; |
361 | continue; |
362 | } |
363 | |
364 | // possible start of an encoded word |
365 | if (*scursor == '=') { |
366 | QByteArray language; |
367 | QString decoded; |
368 | ++scursor; |
369 | const char *start = scursor; |
370 | if (parseEncodedWord(scursor, send, result: &decoded, language: &language, usedCS, defaultCS, charsetOption)) { |
371 | result += decoded.toUtf8(); |
372 | onlySpacesSinceLastWord = true; |
373 | spaceBuffer.clear(); |
374 | } else { |
375 | if (onlySpacesSinceLastWord) { |
376 | result += spaceBuffer; |
377 | onlySpacesSinceLastWord = false; |
378 | } |
379 | result += '='; |
380 | scursor = start; // reset cursor after parsing failure |
381 | } |
382 | continue; |
383 | } else { |
384 | // unencoded data |
385 | if (onlySpacesSinceLastWord) { |
386 | result += spaceBuffer; |
387 | onlySpacesSinceLastWord = false; |
388 | } |
389 | result += *scursor; |
390 | ++scursor; |
391 | } |
392 | } |
393 | // If there are any chars that couldn't be decoded in UTF-8, |
394 | // fallback to local codec |
395 | const QString tryUtf8 = QString::fromUtf8(ba: result); |
396 | if (tryUtf8.contains(c: QChar(0xFFFD))) { |
397 | QStringDecoder codec(QStringDecoder::System); |
398 | if (usedCS) { |
399 | *usedCS = updateEncodingCharset(currentCharset: *usedCS, nextCharset: cachedCharset(name: codec.name())); |
400 | } |
401 | return codec.decode(ba: result); |
402 | } else { |
403 | return tryUtf8; |
404 | } |
405 | } |
406 | |
407 | QByteArray KCodecs::encodeRFC2047String(QStringView src, const QByteArray &charset) |
408 | { |
409 | QByteArray result; |
410 | int start = 0; |
411 | int end = 0; |
412 | bool nonAscii = false; |
413 | bool useQEncoding = false; |
414 | |
415 | QStringEncoder codec(charset.constData()); |
416 | |
417 | QByteArray usedCS; |
418 | if (!codec.isValid()) { |
419 | // no codec available => try local8Bit and hope the best ;-) |
420 | codec = QStringEncoder(QStringEncoder::System); |
421 | usedCS = codec.name(); |
422 | } else { |
423 | Q_ASSERT(codec.isValid()); |
424 | if (charset.isEmpty()) { |
425 | usedCS = codec.name(); |
426 | } else { |
427 | usedCS = charset; |
428 | } |
429 | } |
430 | |
431 | QByteArray encoded8Bit = codec.encode(in: src); |
432 | if (codec.hasError()) { |
433 | usedCS = CodecNames::utf8(); |
434 | codec = QStringEncoder(QStringEncoder::Utf8); |
435 | encoded8Bit = codec.encode(in: src); |
436 | } |
437 | |
438 | if (usedCS.contains(bv: "8859-" )) { // use "B"-Encoding for non iso-8859-x charsets |
439 | useQEncoding = true; |
440 | } |
441 | |
442 | uint encoded8BitLength = encoded8Bit.length(); |
443 | for (unsigned int i = 0; i < encoded8BitLength; i++) { |
444 | if (encoded8Bit[i] == ' ') { // encoding starts at word boundaries |
445 | start = i + 1; |
446 | } |
447 | |
448 | // encode escape character, for japanese encodings... |
449 | if (((signed char)encoded8Bit[i] < 0) || (encoded8Bit[i] == '\033')) { |
450 | end = start; // non us-ascii char found, now we determine where to stop encoding |
451 | nonAscii = true; |
452 | break; |
453 | } |
454 | } |
455 | |
456 | if (nonAscii) { |
457 | while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) { |
458 | // we encode complete words |
459 | end++; |
460 | } |
461 | |
462 | for (int x = end; x < encoded8Bit.length(); x++) { |
463 | if (((signed char)encoded8Bit[x] < 0) || (encoded8Bit[x] == '\033')) { |
464 | end = x; // we found another non-ascii word |
465 | |
466 | while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) { |
467 | // we encode complete words |
468 | end++; |
469 | } |
470 | } |
471 | } |
472 | |
473 | result = encoded8Bit.left(len: start) + "=?" + usedCS; |
474 | |
475 | if (useQEncoding) { |
476 | result += "?Q?" ; |
477 | |
478 | char hexcode; // "Q"-encoding implementation described in RFC 2047 |
479 | for (int i = start; i < end; i++) { |
480 | const char c = encoded8Bit[i]; |
481 | if (c == ' ') { // make the result readable with not MIME-capable readers |
482 | result += '_'; |
483 | } else { |
484 | if (((c >= 'a') && (c <= 'z')) || // paranoid mode, encode *all* special chars to avoid problems |
485 | ((c >= 'A') && (c <= 'Z')) || // with "From" & "To" headers |
486 | ((c >= '0') && (c <= '9'))) { |
487 | result += c; |
488 | } else { |
489 | result += '='; // "stolen" from KMail ;-) |
490 | hexcode = ((c & 0xF0) >> 4) + 48; |
491 | if (hexcode >= 58) { |
492 | hexcode += 7; |
493 | } |
494 | result += hexcode; |
495 | hexcode = (c & 0x0F) + 48; |
496 | if (hexcode >= 58) { |
497 | hexcode += 7; |
498 | } |
499 | result += hexcode; |
500 | } |
501 | } |
502 | } |
503 | } else { |
504 | result += "?B?" + encoded8Bit.mid(index: start, len: end - start).toBase64(); |
505 | } |
506 | |
507 | result += "?=" ; |
508 | result += encoded8Bit.right(len: encoded8Bit.length() - end); |
509 | } else { |
510 | result = encoded8Bit; |
511 | } |
512 | |
513 | return result; |
514 | } |
515 | |
516 | /******************************************************************************/ |
517 | /* KCodecs::Codec */ |
518 | |
519 | KCodecs::Codec *KCodecs::Codec::codecForName(QByteArrayView name) |
520 | { |
521 | struct CodecEntry { |
522 | const char *name; |
523 | std::unique_ptr<KCodecs::Codec> codec; |
524 | }; |
525 | // ### has to be sorted by name! |
526 | static const std::array<CodecEntry, 6> s_codecs{._M_elems: { |
527 | {.name: "b" , .codec: std::make_unique<KCodecs::Rfc2047BEncodingCodec>()}, |
528 | {.name: "base64" , .codec: std::make_unique<KCodecs::Base64Codec>()}, |
529 | {.name: "q" , .codec: std::make_unique<KCodecs::Rfc2047QEncodingCodec>()}, |
530 | {.name: "quoted-printable" , .codec: std::make_unique<KCodecs::QuotedPrintableCodec>()}, |
531 | {.name: "x-kmime-rfc2231" , .codec: std::make_unique<KCodecs::Rfc2231EncodingCodec>()}, |
532 | {.name: "x-uuencode" , .codec: std::make_unique<KCodecs::UUCodec>()}, |
533 | }}; |
534 | |
535 | const auto it = std::lower_bound(first: s_codecs.begin(), last: s_codecs.end(), val: name, comp: [](const auto &lhs, auto rhs) { |
536 | return rhs.compare(lhs.name, Qt::CaseInsensitive) > 0; |
537 | }); |
538 | if (it == s_codecs.end() || name.compare(a: (*it).name, cs: Qt::CaseInsensitive) != 0) { |
539 | qWarning() << "Unknown codec \"" << name << "\" requested!" ; |
540 | } |
541 | return (*it).codec.get(); |
542 | } |
543 | |
544 | bool KCodecs::Codec::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend, NewlineType newline) const |
545 | { |
546 | // get an encoder: |
547 | std::unique_ptr<Encoder> enc(makeEncoder(newline)); |
548 | if (!enc) { |
549 | qWarning() << "makeEncoder failed for" << name(); |
550 | return false; |
551 | } |
552 | |
553 | // encode and check for output buffer overflow: |
554 | while (!enc->encode(scursor, send, dcursor, dend)) { |
555 | if (dcursor == dend) { |
556 | return false; // not enough space in output buffer |
557 | } |
558 | } |
559 | |
560 | // finish and check for output buffer overflow: |
561 | while (!enc->finish(dcursor, dend)) { |
562 | if (dcursor == dend) { |
563 | return false; // not enough space in output buffer |
564 | } |
565 | } |
566 | |
567 | return true; // successfully encoded. |
568 | } |
569 | |
570 | QByteArray KCodecs::Codec::encode(QByteArrayView src, NewlineType newline) const |
571 | { |
572 | // allocate buffer for the worst case: |
573 | QByteArray result; |
574 | result.resize(size: maxEncodedSizeFor(insize: src.size(), newline)); |
575 | |
576 | // set up iterators: |
577 | QByteArray::ConstIterator iit = src.begin(); |
578 | QByteArray::ConstIterator iend = src.end(); |
579 | QByteArray::Iterator oit = result.begin(); |
580 | QByteArray::ConstIterator oend = result.end(); |
581 | |
582 | // encode |
583 | if (!encode(scursor&: iit, send: iend, dcursor&: oit, dend: oend, newline)) { |
584 | qCritical() << name() << "codec lies about it's mEncodedSizeFor()" ; |
585 | } |
586 | |
587 | // shrink result to actual size: |
588 | result.truncate(pos: oit - result.begin()); |
589 | |
590 | return result; |
591 | } |
592 | |
593 | QByteArray KCodecs::Codec::decode(QByteArrayView src, NewlineType newline) const |
594 | { |
595 | // allocate buffer for the worst case: |
596 | QByteArray result; |
597 | result.resize(size: maxDecodedSizeFor(insize: src.size(), newline)); |
598 | |
599 | // set up iterators: |
600 | QByteArray::ConstIterator iit = src.begin(); |
601 | QByteArray::ConstIterator iend = src.end(); |
602 | QByteArray::Iterator oit = result.begin(); |
603 | QByteArray::ConstIterator oend = result.end(); |
604 | |
605 | // decode |
606 | if (!decode(scursor&: iit, send: iend, dcursor&: oit, dend: oend, newline)) { |
607 | qCritical() << name() << "codec lies about it's maxDecodedSizeFor()" ; |
608 | } |
609 | |
610 | // shrink result to actual size: |
611 | result.truncate(pos: oit - result.begin()); |
612 | |
613 | return result; |
614 | } |
615 | |
616 | bool KCodecs::Codec::decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend, NewlineType newline) const |
617 | { |
618 | // get a decoder: |
619 | std::unique_ptr<Decoder> dec(makeDecoder(newline)); |
620 | assert(dec); |
621 | |
622 | // decode and check for output buffer overflow: |
623 | while (!dec->decode(scursor, send, dcursor, dend)) { |
624 | if (dcursor == dend) { |
625 | return false; // not enough space in output buffer |
626 | } |
627 | } |
628 | |
629 | // finish and check for output buffer overflow: |
630 | while (!dec->finish(dcursor, dend)) { |
631 | if (dcursor == dend) { |
632 | return false; // not enough space in output buffer |
633 | } |
634 | } |
635 | |
636 | return true; // successfully encoded. |
637 | } |
638 | |
639 | /******************************************************************************/ |
640 | /* KCodecs::Encoder */ |
641 | |
642 | KCodecs::EncoderPrivate::EncoderPrivate(Codec::NewlineType newline) |
643 | : outputBufferCursor(0) |
644 | , newline(newline) |
645 | { |
646 | } |
647 | |
648 | KCodecs::Encoder::Encoder(Codec::NewlineType newline) |
649 | : d(new KCodecs::EncoderPrivate(newline)) |
650 | { |
651 | } |
652 | |
653 | KCodecs::Encoder::~Encoder() = default; |
654 | |
655 | bool KCodecs::Encoder::write(char ch, char *&dcursor, const char *const dend) |
656 | { |
657 | if (dcursor != dend) { |
658 | // if there's space in the output stream, write there: |
659 | *dcursor++ = ch; |
660 | return true; |
661 | } else { |
662 | // else buffer the output: |
663 | if (d->outputBufferCursor >= maxBufferedChars) { |
664 | qCritical() << "KCodecs::Encoder: internal buffer overflow!" ; |
665 | } else { |
666 | d->outputBuffer[d->outputBufferCursor++] = ch; |
667 | } |
668 | return false; |
669 | } |
670 | } |
671 | |
672 | // write as much as possible off the output buffer. Return true if |
673 | // flushing was complete, false if some chars could not be flushed. |
674 | bool KCodecs::Encoder::flushOutputBuffer(char *&dcursor, const char *const dend) |
675 | { |
676 | int i; |
677 | // copy output buffer to output stream: |
678 | for (i = 0; dcursor != dend && i < d->outputBufferCursor; ++i) { |
679 | *dcursor++ = d->outputBuffer[i]; |
680 | } |
681 | |
682 | // calculate the number of missing chars: |
683 | int numCharsLeft = d->outputBufferCursor - i; |
684 | // push the remaining chars to the beginning of the buffer: |
685 | if (numCharsLeft) { |
686 | ::memmove(dest: d->outputBuffer, src: d->outputBuffer + i, n: numCharsLeft); |
687 | } |
688 | // adjust cursor: |
689 | d->outputBufferCursor = numCharsLeft; |
690 | |
691 | return !numCharsLeft; |
692 | } |
693 | |
694 | bool KCodecs::Encoder::writeCRLF(char *&dcursor, const char *const dend) |
695 | { |
696 | if (d->newline == Codec::NewlineCRLF) { |
697 | write(ch: '\r', dcursor, dend); |
698 | } |
699 | return write(ch: '\n', dcursor, dend); |
700 | } |
701 | |
702 | /******************************************************************************/ |
703 | /* KCodecs::Decoder */ |
704 | |
705 | KCodecs::DecoderPrivate::DecoderPrivate(Codec::NewlineType newline) |
706 | : newline(newline) |
707 | { |
708 | } |
709 | |
710 | KCodecs::Decoder::Decoder(Codec::NewlineType newline) |
711 | : d(new KCodecs::DecoderPrivate(newline)) |
712 | { |
713 | } |
714 | |
715 | KCodecs::Decoder::~Decoder() = default; |
716 | |