1 | /* -*- c++ -*- |
2 | SPDX-FileCopyrightText: 2002 Marc Mutz <mutz@kde.org> |
3 | |
4 | SPDX-License-Identifier: LGPL-2.0-or-later |
5 | */ |
6 | |
7 | #include "kcodecsqp.h" |
8 | #include "kcodecs_p.h" |
9 | |
10 | #include <QDebug> |
11 | |
12 | #include <cassert> |
13 | |
14 | using namespace KCodecs; |
15 | |
16 | namespace KCodecs |
17 | { |
18 | // none except a-zA-Z0-9!*+-/ |
19 | const uchar eTextMap[16] = {0x00, 0x00, 0x00, 0x00, 0x40, 0x35, 0xFF, 0xC0, 0x7F, 0xFF, 0xFF, 0xE0, 0x7F, 0xFF, 0xFF, 0xE0}; |
20 | |
21 | // some helpful functions: |
22 | |
23 | /** |
24 | Converts a 4-bit @p value into its hexadecimal characater representation. |
25 | So input of value [0,15] returns ['0','1',... 'F']. Input values |
26 | greater than 15 will produce undesired results. |
27 | @param value is an unsigned character containing the 4-bit input value. |
28 | */ |
29 | static inline char binToHex(uchar value) |
30 | { |
31 | if (value > 9) { |
32 | return value + 'A' - 10; |
33 | } else { |
34 | return value + '0'; |
35 | } |
36 | } |
37 | |
38 | /** |
39 | Returns the high-order 4 bits of an 8-bit value in another 8-bit value. |
40 | @param ch is an unsigned character containing the 8-bit input value. |
41 | */ |
42 | static inline uchar highNibble(uchar ch) |
43 | { |
44 | return ch >> 4; |
45 | } |
46 | |
47 | /** |
48 | Returns the low-order 4 bits of an 8-bit value in another 8-bit value. |
49 | @param ch is an unsigned character containing the 8-bit input value. |
50 | */ |
51 | static inline uchar lowNibble(uchar ch) |
52 | { |
53 | return ch & 0xF; |
54 | } |
55 | |
56 | /** |
57 | Returns true if the specified value is a not Control character or |
58 | question mark; else true. |
59 | @param ch is an unsigned character containing the 8-bit input value. |
60 | */ |
61 | static inline bool keep(uchar ch) |
62 | { |
63 | // no CTLs, except HT and not '?' |
64 | return !((ch < ' ' && ch != '\t') || ch == '?'); |
65 | } |
66 | |
67 | // |
68 | // QuotedPrintableCodec |
69 | // |
70 | |
71 | class QuotedPrintableEncoder : public Encoder |
72 | { |
73 | char mInputBuffer[16]; |
74 | uchar mCurrentLineLength; // 0..76 |
75 | uchar mAccu; |
76 | uint mInputBufferReadCursor : 4; // 0..15 |
77 | uint mInputBufferWriteCursor : 4; // 0..15 |
78 | enum { |
79 | Never, |
80 | AtBOL, |
81 | Definitely, |
82 | } mAccuNeedsEncoding; |
83 | bool mSawLineEnd : 1; |
84 | bool mSawCR : 1; |
85 | bool mFinishing : 1; |
86 | bool mFinished : 1; |
87 | |
88 | protected: |
89 | friend class QuotedPrintableCodec; |
90 | QuotedPrintableEncoder(Codec::NewlineType newline = Codec::NewlineLF) |
91 | : Encoder(newline) |
92 | , mCurrentLineLength(0) |
93 | , mAccu(0) |
94 | , mInputBufferReadCursor(0) |
95 | , mInputBufferWriteCursor(0) |
96 | , mAccuNeedsEncoding(Never) |
97 | , mSawLineEnd(false) |
98 | , mSawCR(false) |
99 | , mFinishing(false) |
100 | , mFinished(false) |
101 | { |
102 | } |
103 | |
104 | bool needsEncoding(uchar ch) |
105 | { |
106 | return ch > '~' || (ch < ' ' && ch != '\t') || ch == '='; |
107 | } |
108 | bool needsEncodingAtEOL(uchar ch) |
109 | { |
110 | return ch == ' ' || ch == '\t'; |
111 | } |
112 | bool needsEncodingAtBOL(uchar ch) |
113 | { |
114 | return ch == 'F' || ch == '.' || ch == '-'; |
115 | } |
116 | bool fillInputBuffer(const char *&scursor, const char *const send); |
117 | bool processNextChar(); |
118 | void createOutputBuffer(char *&dcursor, const char *const dend); |
119 | |
120 | public: |
121 | ~QuotedPrintableEncoder() override |
122 | { |
123 | } |
124 | |
125 | bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
126 | |
127 | bool finish(char *&dcursor, const char *const dend) override; |
128 | }; |
129 | |
130 | class QuotedPrintableDecoder : public Decoder |
131 | { |
132 | const char mEscapeChar; |
133 | char mBadChar; |
134 | /** @p accu holds the msb nibble of the hexchar or zero. */ |
135 | uchar mAccu; |
136 | /** @p insideHexChar is true iff we're inside an hexchar (=XY). |
137 | Together with @ref mAccu, we can build this states: |
138 | @li @p insideHexChar == @p false: |
139 | normal text |
140 | @li @p insideHexChar == @p true, @p mAccu == 0: |
141 | saw the leading '=' |
142 | @li @p insideHexChar == @p true, @p mAccu != 0: |
143 | saw the first nibble '=X' |
144 | */ |
145 | const bool mQEncoding; |
146 | bool mInsideHexChar; |
147 | bool mFlushing; |
148 | bool mExpectLF; |
149 | bool mHaveAccu; |
150 | /** @p mLastChar holds the first char of an encoded char, so that |
151 | we are able to keep the first char if the second char is invalid. */ |
152 | char mLastChar; |
153 | |
154 | protected: |
155 | friend class QuotedPrintableCodec; |
156 | friend class Rfc2047QEncodingCodec; |
157 | friend class Rfc2231EncodingCodec; |
158 | QuotedPrintableDecoder(Codec::NewlineType newline = Codec::NewlineLF, bool aQEncoding = false, char aEscapeChar = '=') |
159 | : Decoder(newline) |
160 | , mEscapeChar(aEscapeChar) |
161 | , mBadChar(0) |
162 | , mAccu(0) |
163 | , mQEncoding(aQEncoding) |
164 | , mInsideHexChar(false) |
165 | , mFlushing(false) |
166 | , mExpectLF(false) |
167 | , mHaveAccu(false) |
168 | , mLastChar(0) |
169 | { |
170 | } |
171 | |
172 | public: |
173 | ~QuotedPrintableDecoder() override |
174 | { |
175 | } |
176 | |
177 | bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
178 | bool finish(char *&dcursor, const char *const dend) override; |
179 | }; |
180 | |
181 | class Rfc2047QEncodingEncoder : public Encoder |
182 | { |
183 | uchar mAccu; |
184 | uchar mStepNo; |
185 | const char mEscapeChar; |
186 | bool mInsideFinishing : 1; |
187 | |
188 | protected: |
189 | friend class Rfc2047QEncodingCodec; |
190 | friend class Rfc2231EncodingCodec; |
191 | Rfc2047QEncodingEncoder(Codec::NewlineType newline = Codec::NewlineLF, char aEscapeChar = '=') |
192 | : Encoder(newline) |
193 | , mAccu(0) |
194 | , mStepNo(0) |
195 | , mEscapeChar(aEscapeChar) |
196 | , mInsideFinishing(false) |
197 | { |
198 | // else an optimization in ::encode might break. |
199 | assert(aEscapeChar == '=' || aEscapeChar == '%'); |
200 | } |
201 | |
202 | bool isEText(uchar ch) |
203 | { |
204 | return (ch < 128) && (eTextMap[ch / 8] & 0x80 >> ch % 8); |
205 | } |
206 | |
207 | // this code assumes that isEText( mEscapeChar ) == false! |
208 | bool needsEncoding(uchar ch) |
209 | { |
210 | if (ch > 'z') { |
211 | return true; // {|}~ DEL and 8bit chars need |
212 | } |
213 | if (!isEText(ch)) { |
214 | return true; // all but a-zA-Z0-9!/*+- need, too |
215 | } |
216 | if (mEscapeChar == '%' && (ch == '*' || ch == '/')) { |
217 | return true; // not allowed in rfc2231 encoding |
218 | } |
219 | return false; |
220 | } |
221 | |
222 | public: |
223 | ~Rfc2047QEncodingEncoder() override |
224 | { |
225 | } |
226 | |
227 | bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
228 | bool finish(char *&dcursor, const char *const dend) override; |
229 | }; |
230 | |
231 | // this doesn't access any member variables, so it can be defined static |
232 | // but then we can't call it from virtual functions |
233 | static qsizetype QuotedPrintableDecoder_maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) |
234 | { |
235 | // all chars unencoded: |
236 | qsizetype result = insize; |
237 | // but maybe all of them are \n and we need to make them \r\n :-o |
238 | if (newline == Codec::NewlineCRLF) { |
239 | result += insize; |
240 | } |
241 | |
242 | // there might be an accu plus escape |
243 | result += 2; |
244 | |
245 | return result; |
246 | } |
247 | |
248 | Encoder *QuotedPrintableCodec::makeEncoder(Codec::NewlineType newline) const |
249 | { |
250 | return new QuotedPrintableEncoder(newline); |
251 | } |
252 | |
253 | Decoder *QuotedPrintableCodec::makeDecoder(Codec::NewlineType newline) const |
254 | { |
255 | return new QuotedPrintableDecoder(newline); |
256 | } |
257 | |
258 | qsizetype QuotedPrintableCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const |
259 | { |
260 | return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline); |
261 | } |
262 | |
263 | Encoder *Rfc2047QEncodingCodec::makeEncoder(Codec::NewlineType newline) const |
264 | { |
265 | return new Rfc2047QEncodingEncoder(newline); |
266 | } |
267 | |
268 | Decoder *Rfc2047QEncodingCodec::makeDecoder(Codec::NewlineType newline) const |
269 | { |
270 | return new QuotedPrintableDecoder(newline, true); |
271 | } |
272 | |
273 | qsizetype Rfc2047QEncodingCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const |
274 | { |
275 | return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline); |
276 | } |
277 | |
278 | Encoder *Rfc2231EncodingCodec::makeEncoder(Codec::NewlineType newline) const |
279 | { |
280 | return new Rfc2047QEncodingEncoder(newline, '%'); |
281 | } |
282 | |
283 | Decoder *Rfc2231EncodingCodec::makeDecoder(Codec::NewlineType newline) const |
284 | { |
285 | return new QuotedPrintableDecoder(newline, true, '%'); |
286 | } |
287 | |
288 | qsizetype Rfc2231EncodingCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const |
289 | { |
290 | return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline); |
291 | } |
292 | |
293 | /********************************************************/ |
294 | /********************************************************/ |
295 | /********************************************************/ |
296 | |
297 | bool QuotedPrintableDecoder::decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
298 | { |
299 | if (d->newline == Codec::NewlineCRLF) { |
300 | qWarning() << "CRLF output for decoders isn't yet supported!" ; |
301 | } |
302 | |
303 | while (scursor != send && dcursor != dend) { |
304 | if (mFlushing) { |
305 | // we have to flush chars in the aftermath of a decoding |
306 | // error. The way to request a flush is to |
307 | // - store the offending character in mBadChar and |
308 | // - set mFlushing to true. |
309 | // The supported cases are (H: hexchar, X: bad char): |
310 | // =X, =HX, CR |
311 | // mBadChar is only written out if it is not by itself illegal in |
312 | // quoted-printable (e.g. CTLs, 8Bits). |
313 | // A fast way to suppress mBadChar output is to set it to NUL. |
314 | if (mInsideHexChar) { |
315 | // output '=' |
316 | *dcursor++ = mEscapeChar; |
317 | mInsideHexChar = false; |
318 | } else if (mHaveAccu) { |
319 | // output the high nibble of the accumulator: |
320 | *dcursor++ = mLastChar; |
321 | mHaveAccu = false; |
322 | mAccu = 0; |
323 | } else { |
324 | // output mBadChar |
325 | assert(mAccu == 0); |
326 | if (mBadChar) { |
327 | if (mBadChar == '=') { |
328 | mInsideHexChar = true; |
329 | } else { |
330 | *dcursor++ = mBadChar; |
331 | } |
332 | mBadChar = 0; |
333 | } |
334 | mFlushing = false; |
335 | } |
336 | continue; |
337 | } |
338 | assert(mBadChar == 0); |
339 | |
340 | uchar ch = *scursor++; |
341 | |
342 | if (mExpectLF && ch != '\n') { |
343 | // qWarning() << "QuotedPrintableDecoder:" |
344 | // "illegally formed soft linebreak or lonely CR!"; |
345 | mInsideHexChar = false; |
346 | mExpectLF = false; |
347 | if (mAccu != 0) { |
348 | return false; |
349 | } |
350 | } |
351 | |
352 | if (mInsideHexChar) { |
353 | uchar value = 255; |
354 | // next char(s) represent nibble instead of itself: |
355 | if (ch <= '9') { |
356 | if (ch >= '0') { |
357 | value = ch - '0'; |
358 | } else { |
359 | switch (ch) { |
360 | case '\r': |
361 | mExpectLF = true; |
362 | break; |
363 | case '\n': |
364 | // soft line break, but only if mAccu is NUL. |
365 | if (!mHaveAccu) { |
366 | mExpectLF = false; |
367 | mInsideHexChar = false; |
368 | break; |
369 | } |
370 | // else fall through |
371 | default: |
372 | // qWarning() << "QuotedPrintableDecoder:" |
373 | // "illegally formed hex char! Outputting verbatim."; |
374 | mBadChar = ch; |
375 | mFlushing = true; |
376 | } |
377 | continue; |
378 | } |
379 | } else { // ch > '9' |
380 | if (ch <= 'F') { |
381 | if (ch >= 'A') { |
382 | value = 10 + ch - 'A'; |
383 | } else { // [:-@] |
384 | mBadChar = ch; |
385 | mFlushing = true; |
386 | continue; |
387 | } |
388 | } else { // ch > 'F' |
389 | if (ch <= 'f' && ch >= 'a') { |
390 | value = 10 + ch - 'a'; |
391 | } else { |
392 | mBadChar = ch; |
393 | mFlushing = true; |
394 | continue; |
395 | } |
396 | } |
397 | } |
398 | |
399 | assert(value < 16); |
400 | assert(mBadChar == 0); |
401 | assert(!mExpectLF); |
402 | |
403 | if (mHaveAccu) { |
404 | *dcursor++ = char(mAccu | value); |
405 | mAccu = 0; |
406 | mHaveAccu = false; |
407 | mInsideHexChar = false; |
408 | } else { |
409 | mHaveAccu = true; |
410 | mAccu = value << 4; |
411 | mLastChar = ch; |
412 | } |
413 | } else { // not mInsideHexChar |
414 | if ((ch <= '~' && ch >= ' ') || ch == '\t') { |
415 | if (ch == mEscapeChar) { |
416 | mInsideHexChar = true; |
417 | } else if (mQEncoding && ch == '_') { |
418 | *dcursor++ = char(0x20); |
419 | } else { |
420 | *dcursor++ = char(ch); |
421 | } |
422 | } else if (ch == '\n') { |
423 | *dcursor++ = '\n'; |
424 | mExpectLF = false; |
425 | } else if (ch == '\r') { |
426 | mExpectLF = true; |
427 | } else { |
428 | // qWarning() << "QuotedPrintableDecoder:" << ch << |
429 | // "illegal character in input stream!"; |
430 | *dcursor++ = char(ch); |
431 | } |
432 | } |
433 | } |
434 | |
435 | return scursor == send; |
436 | } |
437 | |
438 | bool QuotedPrintableDecoder::finish(char *&dcursor, const char *const dend) |
439 | { |
440 | while ((mInsideHexChar || mHaveAccu || mFlushing) && dcursor != dend) { |
441 | // we have to flush chars |
442 | if (mInsideHexChar) { |
443 | // output '=' |
444 | *dcursor++ = mEscapeChar; |
445 | mInsideHexChar = false; |
446 | } else if (mHaveAccu) { |
447 | // output the high nibble of the accumulator: |
448 | *dcursor++ = mLastChar; |
449 | mHaveAccu = false; |
450 | mAccu = 0; |
451 | } else { |
452 | // output mBadChar |
453 | assert(mAccu == 0); |
454 | if (mBadChar) { |
455 | *dcursor++ = mBadChar; |
456 | mBadChar = 0; |
457 | } |
458 | mFlushing = false; |
459 | } |
460 | } |
461 | |
462 | // return false if we are not finished yet; note that mInsideHexChar is always false |
463 | return !(mHaveAccu || mFlushing); |
464 | } |
465 | |
466 | bool QuotedPrintableEncoder::fillInputBuffer(const char *&scursor, const char *const send) |
467 | { |
468 | // Don't read more if there's still a tail of a line in the buffer: |
469 | if (mSawLineEnd) { |
470 | return true; |
471 | } |
472 | |
473 | // Read until the buffer is full or we have found CRLF or LF (which |
474 | // don't end up in the input buffer): |
475 | for (; (mInputBufferWriteCursor + 1) % 16 != mInputBufferReadCursor && scursor != send; mInputBufferWriteCursor++) { |
476 | char ch = *scursor++; |
477 | if (ch == '\r') { |
478 | mSawCR = true; |
479 | } else if (ch == '\n') { |
480 | // remove the CR from the input buffer (if any) and return that |
481 | // we found a line ending: |
482 | if (mSawCR) { |
483 | mSawCR = false; |
484 | assert(mInputBufferWriteCursor != mInputBufferReadCursor); |
485 | mInputBufferWriteCursor--; |
486 | } |
487 | mSawLineEnd = true; |
488 | return true; // saw CRLF or LF |
489 | } else { |
490 | mSawCR = false; |
491 | } |
492 | mInputBuffer[mInputBufferWriteCursor] = ch; |
493 | } |
494 | mSawLineEnd = false; |
495 | return false; // didn't see a line ending... |
496 | } |
497 | |
498 | bool QuotedPrintableEncoder::processNextChar() |
499 | { |
500 | // If we process a buffer which doesn't end in a line break, we |
501 | // can't process all of it, since the next chars that will be read |
502 | // could be a line break. So we empty the buffer only until a fixed |
503 | // number of chars is left (except when mFinishing, which means that |
504 | // the data doesn't end in newline): |
505 | const int minBufferFillWithoutLineEnd = 4; |
506 | |
507 | assert(d->outputBufferCursor == 0); |
508 | |
509 | int bufferFill = int(mInputBufferWriteCursor) - int(mInputBufferReadCursor); |
510 | if (bufferFill < 0) { |
511 | bufferFill += 16; |
512 | } |
513 | |
514 | assert(bufferFill >= 0 && bufferFill <= 15); |
515 | |
516 | if (!mFinishing // |
517 | && !mSawLineEnd // |
518 | && bufferFill < minBufferFillWithoutLineEnd) { |
519 | return false; |
520 | } |
521 | |
522 | // buffer is empty, return false: |
523 | if (mInputBufferReadCursor == mInputBufferWriteCursor) { |
524 | return false; |
525 | } |
526 | |
527 | // Real processing goes here: |
528 | mAccu = mInputBuffer[mInputBufferReadCursor++]; |
529 | if (needsEncoding(ch: mAccu)) { // always needs encoding or |
530 | mAccuNeedsEncoding = Definitely; |
531 | } else if ((mSawLineEnd || mFinishing) // needs encoding at end of line |
532 | && bufferFill == 1 // or end of buffer |
533 | && needsEncodingAtEOL(ch: mAccu)) { |
534 | mAccuNeedsEncoding = Definitely; |
535 | } else if (needsEncodingAtBOL(ch: mAccu)) { |
536 | mAccuNeedsEncoding = AtBOL; |
537 | } else { |
538 | // never needs encoding |
539 | mAccuNeedsEncoding = Never; |
540 | } |
541 | |
542 | return true; |
543 | } |
544 | |
545 | // Outputs processed (verbatim or hex-encoded) chars and inserts soft |
546 | // line breaks as necessary. Depends on processNextChar's directions |
547 | // on whether to encode the current char, and whether |
548 | // the current char is the last one in it's input line: |
549 | void QuotedPrintableEncoder::createOutputBuffer(char *&dcursor, const char *const dend) |
550 | { |
551 | const int maxLineLength = 76; // rfc 2045 |
552 | |
553 | assert(d->outputBufferCursor == 0); |
554 | |
555 | /* clang-format off */ |
556 | bool lastOneOnThisLine = mSawLineEnd |
557 | && mInputBufferReadCursor == mInputBufferWriteCursor; |
558 | /* clang-format on */ |
559 | |
560 | int neededSpace = 1; |
561 | if (mAccuNeedsEncoding == Definitely) { |
562 | neededSpace = 3; |
563 | } |
564 | |
565 | // reserve space for the soft hyphen (=) |
566 | if (!lastOneOnThisLine) { |
567 | neededSpace++; |
568 | } |
569 | |
570 | if (mCurrentLineLength > maxLineLength - neededSpace) { |
571 | // current line too short, insert soft line break: |
572 | write(ch: '=', dcursor, dend); |
573 | writeCRLF(dcursor, dend); |
574 | mCurrentLineLength = 0; |
575 | } |
576 | |
577 | if (Never == mAccuNeedsEncoding // |
578 | || (AtBOL == mAccuNeedsEncoding && mCurrentLineLength != 0)) { |
579 | write(ch: mAccu, dcursor, dend); |
580 | mCurrentLineLength++; |
581 | } else { |
582 | write(ch: '=', dcursor, dend); |
583 | write(ch: binToHex(value: highNibble(ch: mAccu)), dcursor, dend); |
584 | write(ch: binToHex(value: lowNibble(ch: mAccu)), dcursor, dend); |
585 | mCurrentLineLength += 3; |
586 | } |
587 | } |
588 | |
589 | bool QuotedPrintableEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
590 | { |
591 | // support probing by the caller: |
592 | if (mFinishing) { |
593 | return true; |
594 | } |
595 | |
596 | while (scursor != send && dcursor != dend) { |
597 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
598 | return scursor == send; |
599 | } |
600 | |
601 | assert(d->outputBufferCursor == 0); |
602 | |
603 | // fill input buffer until eol has been reached or until the |
604 | // buffer is full, whatever comes first: |
605 | fillInputBuffer(scursor, send); |
606 | |
607 | if (processNextChar()) { |
608 | // there was one... |
609 | createOutputBuffer(dcursor, dend); |
610 | } else if (mSawLineEnd && mInputBufferWriteCursor == mInputBufferReadCursor) { |
611 | // load a hard line break into output buffer: |
612 | writeCRLF(dcursor, dend); |
613 | // signal fillInputBuffer() we are ready for the next line: |
614 | mSawLineEnd = false; |
615 | mCurrentLineLength = 0; |
616 | } else { |
617 | // we are supposedly finished with this input block: |
618 | break; |
619 | } |
620 | } |
621 | |
622 | // make sure we write as much as possible and don't stop _writing_ |
623 | // just because we have no more _input_: |
624 | if (d->outputBufferCursor) { |
625 | flushOutputBuffer(dcursor, dend); |
626 | } |
627 | |
628 | return scursor == send; |
629 | |
630 | } // encode |
631 | |
632 | bool QuotedPrintableEncoder::finish(char *&dcursor, const char *const dend) |
633 | { |
634 | mFinishing = true; |
635 | |
636 | if (mFinished) { |
637 | return flushOutputBuffer(dcursor, dend); |
638 | } |
639 | |
640 | while (dcursor != dend) { |
641 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
642 | return false; |
643 | } |
644 | |
645 | assert(d->outputBufferCursor == 0); |
646 | |
647 | if (processNextChar()) { |
648 | // there was one... |
649 | createOutputBuffer(dcursor, dend); |
650 | } else if (mSawLineEnd && mInputBufferWriteCursor == mInputBufferReadCursor) { |
651 | // load a hard line break into output buffer: |
652 | writeCRLF(dcursor, dend); |
653 | mSawLineEnd = false; |
654 | mCurrentLineLength = 0; |
655 | } else { |
656 | mFinished = true; |
657 | return flushOutputBuffer(dcursor, dend); |
658 | } |
659 | } |
660 | |
661 | return mFinished && !d->outputBufferCursor; |
662 | |
663 | } // finish |
664 | |
665 | bool Rfc2047QEncodingEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
666 | { |
667 | if (mInsideFinishing) { |
668 | return true; |
669 | } |
670 | |
671 | while (scursor != send && dcursor != dend) { |
672 | uchar value = 0; |
673 | switch (mStepNo) { |
674 | case 0: |
675 | // read the next char and decide if and how do encode: |
676 | mAccu = *scursor++; |
677 | if (!needsEncoding(ch: mAccu)) { |
678 | *dcursor++ = char(mAccu); |
679 | } else if (mEscapeChar == '=' && mAccu == 0x20) { |
680 | // shortcut encoding for 0x20 (latin-1/us-ascii SPACE) |
681 | // (not for rfc2231 encoding) |
682 | *dcursor++ = '_'; |
683 | } else { |
684 | // needs =XY encoding - write escape char: |
685 | *dcursor++ = mEscapeChar; |
686 | mStepNo = 1; |
687 | } |
688 | continue; |
689 | case 1: |
690 | // extract hi-nibble: |
691 | value = highNibble(ch: mAccu); |
692 | mStepNo = 2; |
693 | break; |
694 | case 2: |
695 | // extract lo-nibble: |
696 | value = lowNibble(ch: mAccu); |
697 | mStepNo = 0; |
698 | break; |
699 | default: |
700 | assert(0); |
701 | } |
702 | |
703 | // and write: |
704 | *dcursor++ = binToHex(value); |
705 | } |
706 | |
707 | return scursor == send; |
708 | } // encode |
709 | |
710 | bool Rfc2047QEncodingEncoder::finish(char *&dcursor, const char *const dend) |
711 | { |
712 | mInsideFinishing = true; |
713 | |
714 | // write the last bits of mAccu, if any: |
715 | while (mStepNo != 0 && dcursor != dend) { |
716 | uchar value = 0; |
717 | switch (mStepNo) { |
718 | case 1: |
719 | // extract hi-nibble: |
720 | value = highNibble(ch: mAccu); |
721 | mStepNo = 2; |
722 | break; |
723 | case 2: |
724 | // extract lo-nibble: |
725 | value = lowNibble(ch: mAccu); |
726 | mStepNo = 0; |
727 | break; |
728 | default: |
729 | assert(0); |
730 | } |
731 | |
732 | // and write: |
733 | *dcursor++ = binToHex(value); |
734 | } |
735 | |
736 | return mStepNo == 0; |
737 | } |
738 | |
739 | } // namespace KCodecs |
740 | |