1 | /* -*- c++ -*- |
2 | SPDX-FileCopyrightText: 2001 Marc Mutz <mutz@kde.org> |
3 | |
4 | SPDX-License-Identifier: LGPL-2.0-or-later |
5 | */ |
6 | |
7 | #include "kcodecsbase64.h" |
8 | #include "kcodecs_p.h" |
9 | |
10 | #include <QDebug> |
11 | |
12 | #include <cassert> |
13 | |
14 | using namespace KCodecs; |
15 | |
16 | namespace KCodecs |
17 | { |
18 | // codec for base64 as specified in RFC 2045 |
19 | // class Base64Codec; |
20 | // class Base64Decoder; |
21 | // class Base64Encoder; |
22 | |
23 | // codec for the B encoding as specified in RFC 2047 |
24 | // class Rfc2047BEncodingCodec; |
25 | // class Rfc2047BEncodingEncoder; |
26 | // class Rfc2047BEncodingDecoder; |
27 | |
28 | //@cond PRIVATE |
29 | static const uchar base64DecodeMap[128] = { |
30 | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, |
31 | |
32 | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64, |
33 | |
34 | 64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64, |
35 | |
36 | 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64}; |
37 | |
38 | static const char base64EncodeMap[64] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', |
39 | 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', |
40 | 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; |
41 | //@endcond |
42 | |
43 | class Base64Decoder : public Decoder |
44 | { |
45 | uint mStepNo; |
46 | uchar mOutbits; |
47 | bool mSawPadding : 1; |
48 | |
49 | protected: |
50 | friend class Base64Codec; |
51 | Base64Decoder(Codec::NewlineType newline = Codec::NewlineLF) |
52 | : Decoder(newline) |
53 | , mStepNo(0) |
54 | , mOutbits(0) |
55 | , mSawPadding(false) |
56 | { |
57 | } |
58 | |
59 | public: |
60 | ~Base64Decoder() override |
61 | { |
62 | } |
63 | |
64 | bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
65 | // ### really needs no finishing??? |
66 | bool finish(char *&dcursor, const char *const dend) override |
67 | { |
68 | Q_UNUSED(dcursor); |
69 | Q_UNUSED(dend); |
70 | return true; |
71 | } |
72 | }; |
73 | |
74 | class Base64Encoder : public Encoder |
75 | { |
76 | uint mStepNo; |
77 | /** number of already written base64-quartets on current line */ |
78 | uint mWrittenPacketsOnThisLine; |
79 | uchar mNextbits; |
80 | bool mInsideFinishing : 1; |
81 | |
82 | protected: |
83 | friend class Rfc2047BEncodingCodec; |
84 | friend class Rfc2047BEncodingEncoder; |
85 | friend class Base64Codec; |
86 | Base64Encoder(Codec::NewlineType newline = Codec::NewlineLF) |
87 | : Encoder(newline) |
88 | , mStepNo(0) |
89 | , mWrittenPacketsOnThisLine(0) |
90 | , mNextbits(0) |
91 | , mInsideFinishing(false) |
92 | { |
93 | } |
94 | |
95 | bool generic_finish(char *&dcursor, const char *const dend, bool withLFatEnd); |
96 | |
97 | public: |
98 | ~Base64Encoder() override |
99 | { |
100 | } |
101 | |
102 | bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
103 | |
104 | bool finish(char *&dcursor, const char *const dend) override; |
105 | |
106 | protected: |
107 | bool writeBase64(uchar ch, char *&dcursor, const char *const dend) |
108 | { |
109 | return write(ch: base64EncodeMap[ch], dcursor, dend); |
110 | } |
111 | }; |
112 | |
113 | class Rfc2047BEncodingEncoder : public Base64Encoder |
114 | { |
115 | protected: |
116 | friend class Rfc2047BEncodingCodec; |
117 | Rfc2047BEncodingEncoder(Codec::NewlineType newline = Codec::NewlineLF) |
118 | : Base64Encoder(newline) |
119 | { |
120 | } |
121 | |
122 | public: |
123 | bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
124 | bool finish(char *&dcursor, const char *const dend) override; |
125 | }; |
126 | |
127 | Encoder *Base64Codec::makeEncoder(Codec::NewlineType newline) const |
128 | { |
129 | return new Base64Encoder(newline); |
130 | } |
131 | |
132 | Decoder *Base64Codec::makeDecoder(Codec::NewlineType newline) const |
133 | { |
134 | return new Base64Decoder(newline); |
135 | } |
136 | |
137 | Encoder *Rfc2047BEncodingCodec::makeEncoder(Codec::NewlineType newline) const |
138 | { |
139 | return new Rfc2047BEncodingEncoder(newline); |
140 | } |
141 | |
142 | /********************************************************/ |
143 | /********************************************************/ |
144 | /********************************************************/ |
145 | |
146 | bool Base64Decoder::decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
147 | { |
148 | while (dcursor != dend && scursor != send) { |
149 | uchar ch = *scursor++; |
150 | uchar value; |
151 | |
152 | // try converting ch to a 6-bit value: |
153 | if (ch < 128) { |
154 | value = base64DecodeMap[ch]; |
155 | } else { |
156 | value = 64; |
157 | } |
158 | |
159 | // ch isn't of the base64 alphabet, check for other significant chars: |
160 | if (value >= 64) { |
161 | if (ch == '=') { |
162 | // padding: |
163 | if (mStepNo == 0 || mStepNo == 1) { |
164 | if (!mSawPadding) { |
165 | // malformed |
166 | // qWarning() << "Base64Decoder: unexpected padding" |
167 | // "character in input stream"; |
168 | } |
169 | mSawPadding = true; |
170 | break; |
171 | } else if (mStepNo == 2) { |
172 | // ok, there should be another one |
173 | } else if (mStepNo == 3) { |
174 | // ok, end of encoded stream |
175 | mSawPadding = true; |
176 | break; |
177 | } |
178 | mSawPadding = true; |
179 | mStepNo = (mStepNo + 1) % 4; |
180 | continue; |
181 | } else { |
182 | // non-base64 alphabet |
183 | continue; |
184 | } |
185 | } |
186 | |
187 | if (mSawPadding) { |
188 | // qWarning() << "Base64Decoder: Embedded padding character" |
189 | // "encountered!"; |
190 | return true; |
191 | } |
192 | |
193 | // add the new bits to the output stream and flush full octets: |
194 | switch (mStepNo) { |
195 | case 0: |
196 | mOutbits = value << 2; |
197 | break; |
198 | case 1: |
199 | *dcursor++ = (char)(mOutbits | value >> 4); |
200 | mOutbits = value << 4; |
201 | break; |
202 | case 2: |
203 | *dcursor++ = (char)(mOutbits | value >> 2); |
204 | mOutbits = value << 6; |
205 | break; |
206 | case 3: |
207 | *dcursor++ = (char)(mOutbits | value); |
208 | mOutbits = 0; |
209 | break; |
210 | default: |
211 | assert(0); |
212 | } |
213 | mStepNo = (mStepNo + 1) % 4; |
214 | } |
215 | |
216 | // return false when caller should call us again: |
217 | return scursor == send; |
218 | } // Base64Decoder::decode() |
219 | |
220 | bool Base64Encoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
221 | { |
222 | const uint maxPacketsPerLine = 76 / 4; |
223 | |
224 | // detect when the caller doesn't adhere to our rules: |
225 | if (mInsideFinishing) { |
226 | return true; |
227 | } |
228 | |
229 | while (scursor != send && dcursor != dend) { |
230 | // properly empty the output buffer before starting something new: |
231 | // ### fixme: we can optimize this away, since the buffer isn't |
232 | // written to anyway (most of the time) |
233 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
234 | return scursor == send; |
235 | } |
236 | |
237 | uchar ch = *scursor++; |
238 | // mNextbits // (part of) value of next sextet |
239 | |
240 | // check for line length; |
241 | if (mStepNo == 0 && mWrittenPacketsOnThisLine >= maxPacketsPerLine) { |
242 | writeCRLF(dcursor, dend); |
243 | mWrittenPacketsOnThisLine = 0; |
244 | } |
245 | |
246 | // depending on mStepNo, extract value and mNextbits from the |
247 | // octet stream: |
248 | switch (mStepNo) { |
249 | case 0: |
250 | assert(mNextbits == 0); |
251 | writeBase64(ch: ch >> 2, dcursor, dend); // top-most 6 bits -> output |
252 | mNextbits = (ch & 0x3) << 4; // 0..1 bits -> 4..5 in mNextbits |
253 | break; |
254 | case 1: |
255 | assert((mNextbits & ~0x30) == 0); |
256 | writeBase64(ch: mNextbits | ch >> 4, dcursor, dend); // 4..7 bits -> 0..3 in value |
257 | mNextbits = (ch & 0xf) << 2; // 0..3 bits -> 2..5 in mNextbits |
258 | break; |
259 | case 2: |
260 | assert((mNextbits & ~0x3C) == 0); |
261 | writeBase64(ch: mNextbits | ch >> 6, dcursor, dend); // 6..7 bits -> 0..1 in value |
262 | writeBase64(ch: ch & 0x3F, dcursor, dend); // 0..5 bits -> output |
263 | mNextbits = 0; |
264 | mWrittenPacketsOnThisLine++; |
265 | break; |
266 | default: |
267 | assert(0); |
268 | } |
269 | mStepNo = (mStepNo + 1) % 3; |
270 | } |
271 | |
272 | if (d->outputBufferCursor) { |
273 | flushOutputBuffer(dcursor, dend); |
274 | } |
275 | |
276 | return scursor == send; |
277 | } |
278 | |
279 | bool Rfc2047BEncodingEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
280 | { |
281 | // detect when the caller doesn't adhere to our rules: |
282 | if (mInsideFinishing) { |
283 | return true; |
284 | } |
285 | |
286 | while (scursor != send && dcursor != dend) { |
287 | // properly empty the output buffer before starting something new: |
288 | // ### fixme: we can optimize this away, since the buffer isn't |
289 | // written to anyway (most of the time) |
290 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
291 | return scursor == send; |
292 | } |
293 | |
294 | uchar ch = *scursor++; |
295 | // mNextbits // (part of) value of next sextet |
296 | |
297 | // depending on mStepNo, extract value and mNextbits from the |
298 | // octet stream: |
299 | switch (mStepNo) { |
300 | case 0: |
301 | assert(mNextbits == 0); |
302 | writeBase64(ch: ch >> 2, dcursor, dend); // top-most 6 bits -> output |
303 | mNextbits = (ch & 0x3) << 4; // 0..1 bits -> 4..5 in mNextbits |
304 | break; |
305 | case 1: |
306 | assert((mNextbits & ~0x30) == 0); |
307 | writeBase64(ch: mNextbits | ch >> 4, dcursor, dend); // 4..7 bits -> 0..3 in value |
308 | mNextbits = (ch & 0xf) << 2; // 0..3 bits -> 2..5 in mNextbits |
309 | break; |
310 | case 2: |
311 | assert((mNextbits & ~0x3C) == 0); |
312 | writeBase64(ch: mNextbits | ch >> 6, dcursor, dend); // 6..7 bits -> 0..1 in value |
313 | writeBase64(ch: ch & 0x3F, dcursor, dend); // 0..5 bits -> output |
314 | mNextbits = 0; |
315 | break; |
316 | default: |
317 | assert(0); |
318 | } |
319 | mStepNo = (mStepNo + 1) % 3; |
320 | } |
321 | |
322 | if (d->outputBufferCursor) { |
323 | flushOutputBuffer(dcursor, dend); |
324 | } |
325 | |
326 | return scursor == send; |
327 | } |
328 | |
329 | bool Base64Encoder::finish(char *&dcursor, const char *const dend) |
330 | { |
331 | return generic_finish(dcursor, dend, withLFatEnd: true); |
332 | } |
333 | |
334 | bool Rfc2047BEncodingEncoder::finish(char *&dcursor, const char *const dend) |
335 | { |
336 | return generic_finish(dcursor, dend, withLFatEnd: false); |
337 | } |
338 | |
339 | bool Base64Encoder::generic_finish(char *&dcursor, const char *const dend, bool withLFatEnd) |
340 | { |
341 | if (mInsideFinishing) { |
342 | return flushOutputBuffer(dcursor, dend); |
343 | } |
344 | |
345 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
346 | return false; |
347 | } |
348 | |
349 | mInsideFinishing = true; |
350 | |
351 | // |
352 | // writing out the last mNextbits... |
353 | // |
354 | switch (mStepNo) { |
355 | case 1: // 2 mNextbits waiting to be written. Needs two padding chars: |
356 | case 2: // 4 or 6 mNextbits waiting to be written. Completes a block |
357 | writeBase64(ch: mNextbits, dcursor, dend); |
358 | mNextbits = 0; |
359 | break; |
360 | case 0: // no padding, nothing to be written, except possibly the CRLF |
361 | assert(mNextbits == 0); |
362 | break; |
363 | default: |
364 | assert(0); |
365 | } |
366 | |
367 | // |
368 | // adding padding... |
369 | // |
370 | switch (mStepNo) { |
371 | case 1: |
372 | write(ch: '=', dcursor, dend); |
373 | Q_FALLTHROUGH(); |
374 | // fall through: |
375 | case 2: |
376 | write(ch: '=', dcursor, dend); |
377 | Q_FALLTHROUGH(); |
378 | // fall through: |
379 | case 0: // completed a quartet - add CRLF |
380 | if (withLFatEnd) { |
381 | writeCRLF(dcursor, dend); |
382 | } |
383 | return flushOutputBuffer(dcursor, dend); |
384 | default: |
385 | assert(0); |
386 | } |
387 | return true; // asserts get compiled out |
388 | } |
389 | |
390 | } // namespace KCodecs |
391 | |