1 | /* -*- c++ -*- |
2 | SPDX-FileCopyrightText: 2001 Marc Mutz <mutz@kde.org> |
3 | |
4 | SPDX-License-Identifier: LGPL-2.0-or-later |
5 | */ |
6 | /** |
7 | @file |
8 | This file is part of the API for handling @ref MIME data and |
9 | defines the @ref Base64 and @ref RFC2047B @ref Codec classes. |
10 | |
11 | @brief |
12 | Defines the Base64Codec and Rfc2047BEncodingCodec classes. |
13 | |
14 | @authors Marc Mutz \<mutz@kde.org\> |
15 | */ |
16 | |
17 | #include "kcodecsbase64.h" |
18 | #include "kcodecs_p.h" |
19 | |
20 | #include <QDebug> |
21 | |
22 | #include <cassert> |
23 | |
24 | using namespace KCodecs; |
25 | |
26 | namespace KCodecs |
27 | { |
28 | // codec for base64 as specified in RFC 2045 |
29 | // class Base64Codec; |
30 | // class Base64Decoder; |
31 | // class Base64Encoder; |
32 | |
33 | // codec for the B encoding as specified in RFC 2047 |
34 | // class Rfc2047BEncodingCodec; |
35 | // class Rfc2047BEncodingEncoder; |
36 | // class Rfc2047BEncodingDecoder; |
37 | |
38 | //@cond PRIVATE |
39 | static const uchar base64DecodeMap[128] = { |
40 | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, |
41 | |
42 | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64, |
43 | |
44 | 64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64, |
45 | |
46 | 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64}; |
47 | |
48 | static const char base64EncodeMap[64] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', |
49 | 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', |
50 | 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; |
51 | //@endcond |
52 | |
53 | class Base64Decoder : public Decoder |
54 | { |
55 | uint mStepNo; |
56 | uchar mOutbits; |
57 | bool mSawPadding : 1; |
58 | |
59 | protected: |
60 | friend class Base64Codec; |
61 | Base64Decoder(Codec::NewlineType newline = Codec::NewlineLF) |
62 | : Decoder(newline) |
63 | , mStepNo(0) |
64 | , mOutbits(0) |
65 | , mSawPadding(false) |
66 | { |
67 | } |
68 | |
69 | public: |
70 | ~Base64Decoder() override |
71 | { |
72 | } |
73 | |
74 | bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
75 | // ### really needs no finishing??? |
76 | bool finish(char *&dcursor, const char *const dend) override |
77 | { |
78 | Q_UNUSED(dcursor); |
79 | Q_UNUSED(dend); |
80 | return true; |
81 | } |
82 | }; |
83 | |
84 | class Base64Encoder : public Encoder |
85 | { |
86 | uint mStepNo; |
87 | /** number of already written base64-quartets on current line */ |
88 | uint mWrittenPacketsOnThisLine; |
89 | uchar mNextbits; |
90 | bool mInsideFinishing : 1; |
91 | |
92 | protected: |
93 | friend class Rfc2047BEncodingCodec; |
94 | friend class Rfc2047BEncodingEncoder; |
95 | friend class Base64Codec; |
96 | Base64Encoder(Codec::NewlineType newline = Codec::NewlineLF) |
97 | : Encoder(newline) |
98 | , mStepNo(0) |
99 | , mWrittenPacketsOnThisLine(0) |
100 | , mNextbits(0) |
101 | , mInsideFinishing(false) |
102 | { |
103 | } |
104 | |
105 | bool generic_finish(char *&dcursor, const char *const dend, bool withLFatEnd); |
106 | |
107 | public: |
108 | ~Base64Encoder() override |
109 | { |
110 | } |
111 | |
112 | bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
113 | |
114 | bool finish(char *&dcursor, const char *const dend) override; |
115 | |
116 | protected: |
117 | bool writeBase64(uchar ch, char *&dcursor, const char *const dend) |
118 | { |
119 | return write(ch: base64EncodeMap[ch], dcursor, dend); |
120 | } |
121 | }; |
122 | |
123 | class Rfc2047BEncodingEncoder : public Base64Encoder |
124 | { |
125 | protected: |
126 | friend class Rfc2047BEncodingCodec; |
127 | Rfc2047BEncodingEncoder(Codec::NewlineType newline = Codec::NewlineLF) |
128 | : Base64Encoder(newline) |
129 | { |
130 | } |
131 | |
132 | public: |
133 | bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
134 | bool finish(char *&dcursor, const char *const dend) override; |
135 | }; |
136 | |
137 | Encoder *Base64Codec::makeEncoder(Codec::NewlineType newline) const |
138 | { |
139 | return new Base64Encoder(newline); |
140 | } |
141 | |
142 | Decoder *Base64Codec::makeDecoder(Codec::NewlineType newline) const |
143 | { |
144 | return new Base64Decoder(newline); |
145 | } |
146 | |
147 | Encoder *Rfc2047BEncodingCodec::makeEncoder(Codec::NewlineType newline) const |
148 | { |
149 | return new Rfc2047BEncodingEncoder(newline); |
150 | } |
151 | |
152 | /********************************************************/ |
153 | /********************************************************/ |
154 | /********************************************************/ |
155 | |
156 | bool Base64Decoder::decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
157 | { |
158 | while (dcursor != dend && scursor != send) { |
159 | uchar ch = *scursor++; |
160 | uchar value; |
161 | |
162 | // try converting ch to a 6-bit value: |
163 | if (ch < 128) { |
164 | value = base64DecodeMap[ch]; |
165 | } else { |
166 | value = 64; |
167 | } |
168 | |
169 | // ch isn't of the base64 alphabet, check for other significant chars: |
170 | if (value >= 64) { |
171 | if (ch == '=') { |
172 | // padding: |
173 | if (mStepNo == 0 || mStepNo == 1) { |
174 | if (!mSawPadding) { |
175 | // malformed |
176 | // qWarning() << "Base64Decoder: unexpected padding" |
177 | // "character in input stream"; |
178 | } |
179 | mSawPadding = true; |
180 | break; |
181 | } else if (mStepNo == 2) { |
182 | // ok, there should be another one |
183 | } else if (mStepNo == 3) { |
184 | // ok, end of encoded stream |
185 | mSawPadding = true; |
186 | break; |
187 | } |
188 | mSawPadding = true; |
189 | mStepNo = (mStepNo + 1) % 4; |
190 | continue; |
191 | } else { |
192 | // non-base64 alphabet |
193 | continue; |
194 | } |
195 | } |
196 | |
197 | if (mSawPadding) { |
198 | // qWarning() << "Base64Decoder: Embedded padding character" |
199 | // "encountered!"; |
200 | return true; |
201 | } |
202 | |
203 | // add the new bits to the output stream and flush full octets: |
204 | switch (mStepNo) { |
205 | case 0: |
206 | mOutbits = value << 2; |
207 | break; |
208 | case 1: |
209 | *dcursor++ = (char)(mOutbits | value >> 4); |
210 | mOutbits = value << 4; |
211 | break; |
212 | case 2: |
213 | *dcursor++ = (char)(mOutbits | value >> 2); |
214 | mOutbits = value << 6; |
215 | break; |
216 | case 3: |
217 | *dcursor++ = (char)(mOutbits | value); |
218 | mOutbits = 0; |
219 | break; |
220 | default: |
221 | assert(0); |
222 | } |
223 | mStepNo = (mStepNo + 1) % 4; |
224 | } |
225 | |
226 | // return false when caller should call us again: |
227 | return scursor == send; |
228 | } // Base64Decoder::decode() |
229 | |
230 | bool Base64Encoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
231 | { |
232 | const uint maxPacketsPerLine = 76 / 4; |
233 | |
234 | // detect when the caller doesn't adhere to our rules: |
235 | if (mInsideFinishing) { |
236 | return true; |
237 | } |
238 | |
239 | while (scursor != send && dcursor != dend) { |
240 | // properly empty the output buffer before starting something new: |
241 | // ### fixme: we can optimize this away, since the buffer isn't |
242 | // written to anyway (most of the time) |
243 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
244 | return scursor == send; |
245 | } |
246 | |
247 | uchar ch = *scursor++; |
248 | // mNextbits // (part of) value of next sextet |
249 | |
250 | // check for line length; |
251 | if (mStepNo == 0 && mWrittenPacketsOnThisLine >= maxPacketsPerLine) { |
252 | writeCRLF(dcursor, dend); |
253 | mWrittenPacketsOnThisLine = 0; |
254 | } |
255 | |
256 | // depending on mStepNo, extract value and mNextbits from the |
257 | // octet stream: |
258 | switch (mStepNo) { |
259 | case 0: |
260 | assert(mNextbits == 0); |
261 | writeBase64(ch: ch >> 2, dcursor, dend); // top-most 6 bits -> output |
262 | mNextbits = (ch & 0x3) << 4; // 0..1 bits -> 4..5 in mNextbits |
263 | break; |
264 | case 1: |
265 | assert((mNextbits & ~0x30) == 0); |
266 | writeBase64(ch: mNextbits | ch >> 4, dcursor, dend); // 4..7 bits -> 0..3 in value |
267 | mNextbits = (ch & 0xf) << 2; // 0..3 bits -> 2..5 in mNextbits |
268 | break; |
269 | case 2: |
270 | assert((mNextbits & ~0x3C) == 0); |
271 | writeBase64(ch: mNextbits | ch >> 6, dcursor, dend); // 6..7 bits -> 0..1 in value |
272 | writeBase64(ch: ch & 0x3F, dcursor, dend); // 0..5 bits -> output |
273 | mNextbits = 0; |
274 | mWrittenPacketsOnThisLine++; |
275 | break; |
276 | default: |
277 | assert(0); |
278 | } |
279 | mStepNo = (mStepNo + 1) % 3; |
280 | } |
281 | |
282 | if (d->outputBufferCursor) { |
283 | flushOutputBuffer(dcursor, dend); |
284 | } |
285 | |
286 | return scursor == send; |
287 | } |
288 | |
289 | bool Rfc2047BEncodingEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
290 | { |
291 | // detect when the caller doesn't adhere to our rules: |
292 | if (mInsideFinishing) { |
293 | return true; |
294 | } |
295 | |
296 | while (scursor != send && dcursor != dend) { |
297 | // properly empty the output buffer before starting something new: |
298 | // ### fixme: we can optimize this away, since the buffer isn't |
299 | // written to anyway (most of the time) |
300 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
301 | return scursor == send; |
302 | } |
303 | |
304 | uchar ch = *scursor++; |
305 | // mNextbits // (part of) value of next sextet |
306 | |
307 | // depending on mStepNo, extract value and mNextbits from the |
308 | // octet stream: |
309 | switch (mStepNo) { |
310 | case 0: |
311 | assert(mNextbits == 0); |
312 | writeBase64(ch: ch >> 2, dcursor, dend); // top-most 6 bits -> output |
313 | mNextbits = (ch & 0x3) << 4; // 0..1 bits -> 4..5 in mNextbits |
314 | break; |
315 | case 1: |
316 | assert((mNextbits & ~0x30) == 0); |
317 | writeBase64(ch: mNextbits | ch >> 4, dcursor, dend); // 4..7 bits -> 0..3 in value |
318 | mNextbits = (ch & 0xf) << 2; // 0..3 bits -> 2..5 in mNextbits |
319 | break; |
320 | case 2: |
321 | assert((mNextbits & ~0x3C) == 0); |
322 | writeBase64(ch: mNextbits | ch >> 6, dcursor, dend); // 6..7 bits -> 0..1 in value |
323 | writeBase64(ch: ch & 0x3F, dcursor, dend); // 0..5 bits -> output |
324 | mNextbits = 0; |
325 | break; |
326 | default: |
327 | assert(0); |
328 | } |
329 | mStepNo = (mStepNo + 1) % 3; |
330 | } |
331 | |
332 | if (d->outputBufferCursor) { |
333 | flushOutputBuffer(dcursor, dend); |
334 | } |
335 | |
336 | return scursor == send; |
337 | } |
338 | |
339 | bool Base64Encoder::finish(char *&dcursor, const char *const dend) |
340 | { |
341 | return generic_finish(dcursor, dend, withLFatEnd: true); |
342 | } |
343 | |
344 | bool Rfc2047BEncodingEncoder::finish(char *&dcursor, const char *const dend) |
345 | { |
346 | return generic_finish(dcursor, dend, withLFatEnd: false); |
347 | } |
348 | |
349 | bool Base64Encoder::generic_finish(char *&dcursor, const char *const dend, bool withLFatEnd) |
350 | { |
351 | if (mInsideFinishing) { |
352 | return flushOutputBuffer(dcursor, dend); |
353 | } |
354 | |
355 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
356 | return false; |
357 | } |
358 | |
359 | mInsideFinishing = true; |
360 | |
361 | // |
362 | // writing out the last mNextbits... |
363 | // |
364 | switch (mStepNo) { |
365 | case 1: // 2 mNextbits waiting to be written. Needs two padding chars: |
366 | case 2: // 4 or 6 mNextbits waiting to be written. Completes a block |
367 | writeBase64(ch: mNextbits, dcursor, dend); |
368 | mNextbits = 0; |
369 | break; |
370 | case 0: // no padding, nothing to be written, except possibly the CRLF |
371 | assert(mNextbits == 0); |
372 | break; |
373 | default: |
374 | assert(0); |
375 | } |
376 | |
377 | // |
378 | // adding padding... |
379 | // |
380 | switch (mStepNo) { |
381 | case 1: |
382 | write(ch: '=', dcursor, dend); |
383 | Q_FALLTHROUGH(); |
384 | // fall through: |
385 | case 2: |
386 | write(ch: '=', dcursor, dend); |
387 | Q_FALLTHROUGH(); |
388 | // fall through: |
389 | case 0: // completed a quartet - add CRLF |
390 | if (withLFatEnd) { |
391 | writeCRLF(dcursor, dend); |
392 | } |
393 | return flushOutputBuffer(dcursor, dend); |
394 | default: |
395 | assert(0); |
396 | } |
397 | return true; // asserts get compiled out |
398 | } |
399 | |
400 | } // namespace KCodecs |
401 | |