1 | /* -*- c++ -*- |
2 | SPDX-FileCopyrightText: 2002 Marc Mutz <mutz@kde.org> |
3 | |
4 | SPDX-License-Identifier: LGPL-2.0-or-later |
5 | */ |
6 | /** |
7 | @file |
8 | This file is part of the API for handling @ref MIME data and |
9 | defines the @ref QuotedPrintable, @ref RFC2047Q, and |
10 | @ref RFC2231 @ref Codec classes. |
11 | |
12 | @brief |
13 | Defines the classes QuotedPrintableCodec, Rfc2047QEncodingCodec, and |
14 | Rfc2231EncodingCodec. |
15 | |
16 | @authors Marc Mutz \<mutz@kde.org\> |
17 | */ |
18 | |
19 | #include "kcodecsqp.h" |
20 | #include "kcodecs_p.h" |
21 | |
22 | #include <QDebug> |
23 | |
24 | #include <cassert> |
25 | |
26 | using namespace KCodecs; |
27 | |
28 | namespace KCodecs |
29 | { |
30 | // none except a-zA-Z0-9!*+-/ |
31 | const uchar eTextMap[16] = {0x00, 0x00, 0x00, 0x00, 0x40, 0x35, 0xFF, 0xC0, 0x7F, 0xFF, 0xFF, 0xE0, 0x7F, 0xFF, 0xFF, 0xE0}; |
32 | |
33 | // some helpful functions: |
34 | |
35 | /** |
36 | Converts a 4-bit @p value into its hexadecimal characater representation. |
37 | So input of value [0,15] returns ['0','1',... 'F']. Input values |
38 | greater than 15 will produce undesired results. |
39 | @param value is an unsigned character containing the 4-bit input value. |
40 | */ |
41 | static inline char binToHex(uchar value) |
42 | { |
43 | if (value > 9) { |
44 | return value + 'A' - 10; |
45 | } else { |
46 | return value + '0'; |
47 | } |
48 | } |
49 | |
50 | /** |
51 | Returns the high-order 4 bits of an 8-bit value in another 8-bit value. |
52 | @param ch is an unsigned character containing the 8-bit input value. |
53 | */ |
54 | static inline uchar highNibble(uchar ch) |
55 | { |
56 | return ch >> 4; |
57 | } |
58 | |
59 | /** |
60 | Returns the low-order 4 bits of an 8-bit value in another 8-bit value. |
61 | @param ch is an unsigned character containing the 8-bit input value. |
62 | */ |
63 | static inline uchar lowNibble(uchar ch) |
64 | { |
65 | return ch & 0xF; |
66 | } |
67 | |
68 | /** |
69 | Returns true if the specified value is a not Control character or |
70 | question mark; else true. |
71 | @param ch is an unsigned character containing the 8-bit input value. |
72 | */ |
73 | static inline bool keep(uchar ch) |
74 | { |
75 | // no CTLs, except HT and not '?' |
76 | return !((ch < ' ' && ch != '\t') || ch == '?'); |
77 | } |
78 | |
79 | // |
80 | // QuotedPrintableCodec |
81 | // |
82 | |
83 | class QuotedPrintableEncoder : public Encoder |
84 | { |
85 | char mInputBuffer[16]; |
86 | uchar mCurrentLineLength; // 0..76 |
87 | uchar mAccu; |
88 | uint mInputBufferReadCursor : 4; // 0..15 |
89 | uint mInputBufferWriteCursor : 4; // 0..15 |
90 | enum { |
91 | Never, |
92 | AtBOL, |
93 | Definitely, |
94 | } mAccuNeedsEncoding : 2; |
95 | bool mSawLineEnd : 1; |
96 | bool mSawCR : 1; |
97 | bool mFinishing : 1; |
98 | bool mFinished : 1; |
99 | |
100 | protected: |
101 | friend class QuotedPrintableCodec; |
102 | QuotedPrintableEncoder(Codec::NewlineType newline = Codec::NewlineLF) |
103 | : Encoder(newline) |
104 | , mCurrentLineLength(0) |
105 | , mAccu(0) |
106 | , mInputBufferReadCursor(0) |
107 | , mInputBufferWriteCursor(0) |
108 | , mAccuNeedsEncoding(Never) |
109 | , mSawLineEnd(false) |
110 | , mSawCR(false) |
111 | , mFinishing(false) |
112 | , mFinished(false) |
113 | { |
114 | } |
115 | |
116 | bool needsEncoding(uchar ch) |
117 | { |
118 | return ch > '~' || (ch < ' ' && ch != '\t') || ch == '='; |
119 | } |
120 | bool needsEncodingAtEOL(uchar ch) |
121 | { |
122 | return ch == ' ' || ch == '\t'; |
123 | } |
124 | bool needsEncodingAtBOL(uchar ch) |
125 | { |
126 | return ch == 'F' || ch == '.' || ch == '-'; |
127 | } |
128 | bool fillInputBuffer(const char *&scursor, const char *const send); |
129 | bool processNextChar(); |
130 | void createOutputBuffer(char *&dcursor, const char *const dend); |
131 | |
132 | public: |
133 | ~QuotedPrintableEncoder() override |
134 | { |
135 | } |
136 | |
137 | bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
138 | |
139 | bool finish(char *&dcursor, const char *const dend) override; |
140 | }; |
141 | |
142 | class QuotedPrintableDecoder : public Decoder |
143 | { |
144 | const char mEscapeChar; |
145 | char mBadChar; |
146 | /** @p accu holds the msb nibble of the hexchar or zero. */ |
147 | uchar mAccu; |
148 | /** @p insideHexChar is true iff we're inside an hexchar (=XY). |
149 | Together with @ref mAccu, we can build this states: |
150 | @li @p insideHexChar == @p false: |
151 | normal text |
152 | @li @p insideHexChar == @p true, @p mAccu == 0: |
153 | saw the leading '=' |
154 | @li @p insideHexChar == @p true, @p mAccu != 0: |
155 | saw the first nibble '=X' |
156 | */ |
157 | const bool mQEncoding; |
158 | bool mInsideHexChar; |
159 | bool mFlushing; |
160 | bool mExpectLF; |
161 | bool mHaveAccu; |
162 | /** @p mLastChar holds the first char of an encoded char, so that |
163 | we are able to keep the first char if the second char is invalid. */ |
164 | char mLastChar; |
165 | |
166 | protected: |
167 | friend class QuotedPrintableCodec; |
168 | friend class Rfc2047QEncodingCodec; |
169 | friend class Rfc2231EncodingCodec; |
170 | QuotedPrintableDecoder(Codec::NewlineType newline = Codec::NewlineLF, bool aQEncoding = false, char aEscapeChar = '=') |
171 | : Decoder(newline) |
172 | , mEscapeChar(aEscapeChar) |
173 | , mBadChar(0) |
174 | , mAccu(0) |
175 | , mQEncoding(aQEncoding) |
176 | , mInsideHexChar(false) |
177 | , mFlushing(false) |
178 | , mExpectLF(false) |
179 | , mHaveAccu(false) |
180 | , mLastChar(0) |
181 | { |
182 | } |
183 | |
184 | public: |
185 | ~QuotedPrintableDecoder() override |
186 | { |
187 | } |
188 | |
189 | bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
190 | bool finish(char *&dcursor, const char *const dend) override; |
191 | }; |
192 | |
193 | class Rfc2047QEncodingEncoder : public Encoder |
194 | { |
195 | uchar mAccu; |
196 | uchar mStepNo; |
197 | const char mEscapeChar; |
198 | bool mInsideFinishing : 1; |
199 | |
200 | protected: |
201 | friend class Rfc2047QEncodingCodec; |
202 | friend class Rfc2231EncodingCodec; |
203 | Rfc2047QEncodingEncoder(Codec::NewlineType newline = Codec::NewlineLF, char aEscapeChar = '=') |
204 | : Encoder(newline) |
205 | , mAccu(0) |
206 | , mStepNo(0) |
207 | , mEscapeChar(aEscapeChar) |
208 | , mInsideFinishing(false) |
209 | { |
210 | // else an optimization in ::encode might break. |
211 | assert(aEscapeChar == '=' || aEscapeChar == '%'); |
212 | } |
213 | |
214 | bool isEText(uchar ch) |
215 | { |
216 | return (ch < 128) && (eTextMap[ch / 8] & 0x80 >> ch % 8); |
217 | } |
218 | |
219 | // this code assumes that isEText( mEscapeChar ) == false! |
220 | bool needsEncoding(uchar ch) |
221 | { |
222 | if (ch > 'z') { |
223 | return true; // {|}~ DEL and 8bit chars need |
224 | } |
225 | if (!isEText(ch)) { |
226 | return true; // all but a-zA-Z0-9!/*+- need, too |
227 | } |
228 | if (mEscapeChar == '%' && (ch == '*' || ch == '/')) { |
229 | return true; // not allowed in rfc2231 encoding |
230 | } |
231 | return false; |
232 | } |
233 | |
234 | public: |
235 | ~Rfc2047QEncodingEncoder() override |
236 | { |
237 | } |
238 | |
239 | bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
240 | bool finish(char *&dcursor, const char *const dend) override; |
241 | }; |
242 | |
243 | // this doesn't access any member variables, so it can be defined static |
244 | // but then we can't call it from virtual functions |
245 | static qsizetype QuotedPrintableDecoder_maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) |
246 | { |
247 | // all chars unencoded: |
248 | qsizetype result = insize; |
249 | // but maybe all of them are \n and we need to make them \r\n :-o |
250 | if (newline == Codec::NewlineCRLF) { |
251 | result += insize; |
252 | } |
253 | |
254 | // there might be an accu plus escape |
255 | result += 2; |
256 | |
257 | return result; |
258 | } |
259 | |
260 | Encoder *QuotedPrintableCodec::makeEncoder(Codec::NewlineType newline) const |
261 | { |
262 | return new QuotedPrintableEncoder(newline); |
263 | } |
264 | |
265 | Decoder *QuotedPrintableCodec::makeDecoder(Codec::NewlineType newline) const |
266 | { |
267 | return new QuotedPrintableDecoder(newline); |
268 | } |
269 | |
270 | qsizetype QuotedPrintableCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const |
271 | { |
272 | return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline); |
273 | } |
274 | |
275 | Encoder *Rfc2047QEncodingCodec::makeEncoder(Codec::NewlineType newline) const |
276 | { |
277 | return new Rfc2047QEncodingEncoder(newline); |
278 | } |
279 | |
280 | Decoder *Rfc2047QEncodingCodec::makeDecoder(Codec::NewlineType newline) const |
281 | { |
282 | return new QuotedPrintableDecoder(newline, true); |
283 | } |
284 | |
285 | qsizetype Rfc2047QEncodingCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const |
286 | { |
287 | return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline); |
288 | } |
289 | |
290 | Encoder *Rfc2231EncodingCodec::makeEncoder(Codec::NewlineType newline) const |
291 | { |
292 | return new Rfc2047QEncodingEncoder(newline, '%'); |
293 | } |
294 | |
295 | Decoder *Rfc2231EncodingCodec::makeDecoder(Codec::NewlineType newline) const |
296 | { |
297 | return new QuotedPrintableDecoder(newline, true, '%'); |
298 | } |
299 | |
300 | qsizetype Rfc2231EncodingCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const |
301 | { |
302 | return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline); |
303 | } |
304 | |
305 | /********************************************************/ |
306 | /********************************************************/ |
307 | /********************************************************/ |
308 | |
309 | bool QuotedPrintableDecoder::decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
310 | { |
311 | if (d->newline == Codec::NewlineCRLF) { |
312 | qWarning() << "CRLF output for decoders isn't yet supported!" ; |
313 | } |
314 | |
315 | while (scursor != send && dcursor != dend) { |
316 | if (mFlushing) { |
317 | // we have to flush chars in the aftermath of a decoding |
318 | // error. The way to request a flush is to |
319 | // - store the offending character in mBadChar and |
320 | // - set mFlushing to true. |
321 | // The supported cases are (H: hexchar, X: bad char): |
322 | // =X, =HX, CR |
323 | // mBadChar is only written out if it is not by itself illegal in |
324 | // quoted-printable (e.g. CTLs, 8Bits). |
325 | // A fast way to suppress mBadChar output is to set it to NUL. |
326 | if (mInsideHexChar) { |
327 | // output '=' |
328 | *dcursor++ = mEscapeChar; |
329 | mInsideHexChar = false; |
330 | } else if (mHaveAccu) { |
331 | // output the high nibble of the accumulator: |
332 | *dcursor++ = mLastChar; |
333 | mHaveAccu = false; |
334 | mAccu = 0; |
335 | } else { |
336 | // output mBadChar |
337 | assert(mAccu == 0); |
338 | if (mBadChar) { |
339 | if (mBadChar == '=') { |
340 | mInsideHexChar = true; |
341 | } else { |
342 | *dcursor++ = mBadChar; |
343 | } |
344 | mBadChar = 0; |
345 | } |
346 | mFlushing = false; |
347 | } |
348 | continue; |
349 | } |
350 | assert(mBadChar == 0); |
351 | |
352 | uchar ch = *scursor++; |
353 | |
354 | if (mExpectLF && ch != '\n') { |
355 | // qWarning() << "QuotedPrintableDecoder:" |
356 | // "illegally formed soft linebreak or lonely CR!"; |
357 | mInsideHexChar = false; |
358 | mExpectLF = false; |
359 | if (mAccu != 0) { |
360 | return false; |
361 | } |
362 | } |
363 | |
364 | if (mInsideHexChar) { |
365 | uchar value = 255; |
366 | // next char(s) represent nibble instead of itself: |
367 | if (ch <= '9') { |
368 | if (ch >= '0') { |
369 | value = ch - '0'; |
370 | } else { |
371 | switch (ch) { |
372 | case '\r': |
373 | mExpectLF = true; |
374 | break; |
375 | case '\n': |
376 | // soft line break, but only if mAccu is NUL. |
377 | if (!mHaveAccu) { |
378 | mExpectLF = false; |
379 | mInsideHexChar = false; |
380 | break; |
381 | } |
382 | // else fall through |
383 | default: |
384 | // qWarning() << "QuotedPrintableDecoder:" |
385 | // "illegally formed hex char! Outputting verbatim."; |
386 | mBadChar = ch; |
387 | mFlushing = true; |
388 | } |
389 | continue; |
390 | } |
391 | } else { // ch > '9' |
392 | if (ch <= 'F') { |
393 | if (ch >= 'A') { |
394 | value = 10 + ch - 'A'; |
395 | } else { // [:-@] |
396 | mBadChar = ch; |
397 | mFlushing = true; |
398 | continue; |
399 | } |
400 | } else { // ch > 'F' |
401 | if (ch <= 'f' && ch >= 'a') { |
402 | value = 10 + ch - 'a'; |
403 | } else { |
404 | mBadChar = ch; |
405 | mFlushing = true; |
406 | continue; |
407 | } |
408 | } |
409 | } |
410 | |
411 | assert(value < 16); |
412 | assert(mBadChar == 0); |
413 | assert(!mExpectLF); |
414 | |
415 | if (mHaveAccu) { |
416 | *dcursor++ = char(mAccu | value); |
417 | mAccu = 0; |
418 | mHaveAccu = false; |
419 | mInsideHexChar = false; |
420 | } else { |
421 | mHaveAccu = true; |
422 | mAccu = value << 4; |
423 | mLastChar = ch; |
424 | } |
425 | } else { // not mInsideHexChar |
426 | if ((ch <= '~' && ch >= ' ') || ch == '\t') { |
427 | if (ch == mEscapeChar) { |
428 | mInsideHexChar = true; |
429 | } else if (mQEncoding && ch == '_') { |
430 | *dcursor++ = char(0x20); |
431 | } else { |
432 | *dcursor++ = char(ch); |
433 | } |
434 | } else if (ch == '\n') { |
435 | *dcursor++ = '\n'; |
436 | mExpectLF = false; |
437 | } else if (ch == '\r') { |
438 | mExpectLF = true; |
439 | } else { |
440 | // qWarning() << "QuotedPrintableDecoder:" << ch << |
441 | // "illegal character in input stream!"; |
442 | *dcursor++ = char(ch); |
443 | } |
444 | } |
445 | } |
446 | |
447 | return scursor == send; |
448 | } |
449 | |
450 | bool QuotedPrintableDecoder::finish(char *&dcursor, const char *const dend) |
451 | { |
452 | while ((mInsideHexChar || mHaveAccu || mFlushing) && dcursor != dend) { |
453 | // we have to flush chars |
454 | if (mInsideHexChar) { |
455 | // output '=' |
456 | *dcursor++ = mEscapeChar; |
457 | mInsideHexChar = false; |
458 | } else if (mHaveAccu) { |
459 | // output the high nibble of the accumulator: |
460 | *dcursor++ = mLastChar; |
461 | mHaveAccu = false; |
462 | mAccu = 0; |
463 | } else { |
464 | // output mBadChar |
465 | assert(mAccu == 0); |
466 | if (mBadChar) { |
467 | *dcursor++ = mBadChar; |
468 | mBadChar = 0; |
469 | } |
470 | mFlushing = false; |
471 | } |
472 | } |
473 | |
474 | // return false if we are not finished yet; note that mInsideHexChar is always false |
475 | return !(mHaveAccu || mFlushing); |
476 | } |
477 | |
478 | bool QuotedPrintableEncoder::fillInputBuffer(const char *&scursor, const char *const send) |
479 | { |
480 | // Don't read more if there's still a tail of a line in the buffer: |
481 | if (mSawLineEnd) { |
482 | return true; |
483 | } |
484 | |
485 | // Read until the buffer is full or we have found CRLF or LF (which |
486 | // don't end up in the input buffer): |
487 | for (; (mInputBufferWriteCursor + 1) % 16 != mInputBufferReadCursor && scursor != send; mInputBufferWriteCursor++) { |
488 | char ch = *scursor++; |
489 | if (ch == '\r') { |
490 | mSawCR = true; |
491 | } else if (ch == '\n') { |
492 | // remove the CR from the input buffer (if any) and return that |
493 | // we found a line ending: |
494 | if (mSawCR) { |
495 | mSawCR = false; |
496 | assert(mInputBufferWriteCursor != mInputBufferReadCursor); |
497 | mInputBufferWriteCursor--; |
498 | } |
499 | mSawLineEnd = true; |
500 | return true; // saw CRLF or LF |
501 | } else { |
502 | mSawCR = false; |
503 | } |
504 | mInputBuffer[mInputBufferWriteCursor] = ch; |
505 | } |
506 | mSawLineEnd = false; |
507 | return false; // didn't see a line ending... |
508 | } |
509 | |
510 | bool QuotedPrintableEncoder::processNextChar() |
511 | { |
512 | // If we process a buffer which doesn't end in a line break, we |
513 | // can't process all of it, since the next chars that will be read |
514 | // could be a line break. So we empty the buffer only until a fixed |
515 | // number of chars is left (except when mFinishing, which means that |
516 | // the data doesn't end in newline): |
517 | const int minBufferFillWithoutLineEnd = 4; |
518 | |
519 | assert(d->outputBufferCursor == 0); |
520 | |
521 | int bufferFill = int(mInputBufferWriteCursor) - int(mInputBufferReadCursor); |
522 | if (bufferFill < 0) { |
523 | bufferFill += 16; |
524 | } |
525 | |
526 | assert(bufferFill >= 0 && bufferFill <= 15); |
527 | |
528 | if (!mFinishing // |
529 | && !mSawLineEnd // |
530 | && bufferFill < minBufferFillWithoutLineEnd) { |
531 | return false; |
532 | } |
533 | |
534 | // buffer is empty, return false: |
535 | if (mInputBufferReadCursor == mInputBufferWriteCursor) { |
536 | return false; |
537 | } |
538 | |
539 | // Real processing goes here: |
540 | mAccu = mInputBuffer[mInputBufferReadCursor++]; |
541 | if (needsEncoding(ch: mAccu)) { // always needs encoding or |
542 | mAccuNeedsEncoding = Definitely; |
543 | } else if ((mSawLineEnd || mFinishing) // needs encoding at end of line |
544 | && bufferFill == 1 // or end of buffer |
545 | && needsEncodingAtEOL(ch: mAccu)) { |
546 | mAccuNeedsEncoding = Definitely; |
547 | } else if (needsEncodingAtBOL(ch: mAccu)) { |
548 | mAccuNeedsEncoding = AtBOL; |
549 | } else { |
550 | // never needs encoding |
551 | mAccuNeedsEncoding = Never; |
552 | } |
553 | |
554 | return true; |
555 | } |
556 | |
557 | // Outputs processed (verbatim or hex-encoded) chars and inserts soft |
558 | // line breaks as necessary. Depends on processNextChar's directions |
559 | // on whether to encode the current char, and whether |
560 | // the current char is the last one in it's input line: |
561 | void QuotedPrintableEncoder::createOutputBuffer(char *&dcursor, const char *const dend) |
562 | { |
563 | const int maxLineLength = 76; // rfc 2045 |
564 | |
565 | assert(d->outputBufferCursor == 0); |
566 | |
567 | /* clang-format off */ |
568 | bool lastOneOnThisLine = mSawLineEnd |
569 | && mInputBufferReadCursor == mInputBufferWriteCursor; |
570 | /* clang-format on */ |
571 | |
572 | int neededSpace = 1; |
573 | if (mAccuNeedsEncoding == Definitely) { |
574 | neededSpace = 3; |
575 | } |
576 | |
577 | // reserve space for the soft hyphen (=) |
578 | if (!lastOneOnThisLine) { |
579 | neededSpace++; |
580 | } |
581 | |
582 | if (mCurrentLineLength > maxLineLength - neededSpace) { |
583 | // current line too short, insert soft line break: |
584 | write(ch: '=', dcursor, dend); |
585 | writeCRLF(dcursor, dend); |
586 | mCurrentLineLength = 0; |
587 | } |
588 | |
589 | if (Never == mAccuNeedsEncoding // |
590 | || (AtBOL == mAccuNeedsEncoding && mCurrentLineLength != 0)) { |
591 | write(ch: mAccu, dcursor, dend); |
592 | mCurrentLineLength++; |
593 | } else { |
594 | write(ch: '=', dcursor, dend); |
595 | write(ch: binToHex(value: highNibble(ch: mAccu)), dcursor, dend); |
596 | write(ch: binToHex(value: lowNibble(ch: mAccu)), dcursor, dend); |
597 | mCurrentLineLength += 3; |
598 | } |
599 | } |
600 | |
601 | bool QuotedPrintableEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
602 | { |
603 | // support probing by the caller: |
604 | if (mFinishing) { |
605 | return true; |
606 | } |
607 | |
608 | while (scursor != send && dcursor != dend) { |
609 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
610 | return scursor == send; |
611 | } |
612 | |
613 | assert(d->outputBufferCursor == 0); |
614 | |
615 | // fill input buffer until eol has been reached or until the |
616 | // buffer is full, whatever comes first: |
617 | fillInputBuffer(scursor, send); |
618 | |
619 | if (processNextChar()) { |
620 | // there was one... |
621 | createOutputBuffer(dcursor, dend); |
622 | } else if (mSawLineEnd && mInputBufferWriteCursor == mInputBufferReadCursor) { |
623 | // load a hard line break into output buffer: |
624 | writeCRLF(dcursor, dend); |
625 | // signal fillInputBuffer() we are ready for the next line: |
626 | mSawLineEnd = false; |
627 | mCurrentLineLength = 0; |
628 | } else { |
629 | // we are supposedly finished with this input block: |
630 | break; |
631 | } |
632 | } |
633 | |
634 | // make sure we write as much as possible and don't stop _writing_ |
635 | // just because we have no more _input_: |
636 | if (d->outputBufferCursor) { |
637 | flushOutputBuffer(dcursor, dend); |
638 | } |
639 | |
640 | return scursor == send; |
641 | |
642 | } // encode |
643 | |
644 | bool QuotedPrintableEncoder::finish(char *&dcursor, const char *const dend) |
645 | { |
646 | mFinishing = true; |
647 | |
648 | if (mFinished) { |
649 | return flushOutputBuffer(dcursor, dend); |
650 | } |
651 | |
652 | while (dcursor != dend) { |
653 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
654 | return false; |
655 | } |
656 | |
657 | assert(d->outputBufferCursor == 0); |
658 | |
659 | if (processNextChar()) { |
660 | // there was one... |
661 | createOutputBuffer(dcursor, dend); |
662 | } else if (mSawLineEnd && mInputBufferWriteCursor == mInputBufferReadCursor) { |
663 | // load a hard line break into output buffer: |
664 | writeCRLF(dcursor, dend); |
665 | mSawLineEnd = false; |
666 | mCurrentLineLength = 0; |
667 | } else { |
668 | mFinished = true; |
669 | return flushOutputBuffer(dcursor, dend); |
670 | } |
671 | } |
672 | |
673 | return mFinished && !d->outputBufferCursor; |
674 | |
675 | } // finish |
676 | |
677 | bool Rfc2047QEncodingEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
678 | { |
679 | if (mInsideFinishing) { |
680 | return true; |
681 | } |
682 | |
683 | while (scursor != send && dcursor != dend) { |
684 | uchar value = 0; |
685 | switch (mStepNo) { |
686 | case 0: |
687 | // read the next char and decide if and how do encode: |
688 | mAccu = *scursor++; |
689 | if (!needsEncoding(ch: mAccu)) { |
690 | *dcursor++ = char(mAccu); |
691 | } else if (mEscapeChar == '=' && mAccu == 0x20) { |
692 | // shortcut encoding for 0x20 (latin-1/us-ascii SPACE) |
693 | // (not for rfc2231 encoding) |
694 | *dcursor++ = '_'; |
695 | } else { |
696 | // needs =XY encoding - write escape char: |
697 | *dcursor++ = mEscapeChar; |
698 | mStepNo = 1; |
699 | } |
700 | continue; |
701 | case 1: |
702 | // extract hi-nibble: |
703 | value = highNibble(ch: mAccu); |
704 | mStepNo = 2; |
705 | break; |
706 | case 2: |
707 | // extract lo-nibble: |
708 | value = lowNibble(ch: mAccu); |
709 | mStepNo = 0; |
710 | break; |
711 | default: |
712 | assert(0); |
713 | } |
714 | |
715 | // and write: |
716 | *dcursor++ = binToHex(value); |
717 | } |
718 | |
719 | return scursor == send; |
720 | } // encode |
721 | |
722 | bool Rfc2047QEncodingEncoder::finish(char *&dcursor, const char *const dend) |
723 | { |
724 | mInsideFinishing = true; |
725 | |
726 | // write the last bits of mAccu, if any: |
727 | while (mStepNo != 0 && dcursor != dend) { |
728 | uchar value = 0; |
729 | switch (mStepNo) { |
730 | case 1: |
731 | // extract hi-nibble: |
732 | value = highNibble(ch: mAccu); |
733 | mStepNo = 2; |
734 | break; |
735 | case 2: |
736 | // extract lo-nibble: |
737 | value = lowNibble(ch: mAccu); |
738 | mStepNo = 0; |
739 | break; |
740 | default: |
741 | assert(0); |
742 | } |
743 | |
744 | // and write: |
745 | *dcursor++ = binToHex(value); |
746 | } |
747 | |
748 | return mStepNo == 0; |
749 | } |
750 | |
751 | } // namespace KCodecs |
752 | |