| 1 | /* -*- c++ -*- |
| 2 | SPDX-FileCopyrightText: 2001 Marc Mutz <mutz@kde.org> |
| 3 | |
| 4 | SPDX-License-Identifier: LGPL-2.0-or-later |
| 5 | */ |
| 6 | |
| 7 | #include "kcodecsbase64.h" |
| 8 | #include "kcodecs_p.h" |
| 9 | |
| 10 | #include <QDebug> |
| 11 | |
| 12 | #include <cassert> |
| 13 | |
| 14 | using namespace KCodecs; |
| 15 | |
| 16 | namespace KCodecs |
| 17 | { |
| 18 | // codec for base64 as specified in RFC 2045 |
| 19 | // class Base64Codec; |
| 20 | // class Base64Decoder; |
| 21 | // class Base64Encoder; |
| 22 | |
| 23 | // codec for the B encoding as specified in RFC 2047 |
| 24 | // class Rfc2047BEncodingCodec; |
| 25 | // class Rfc2047BEncodingEncoder; |
| 26 | // class Rfc2047BEncodingDecoder; |
| 27 | |
| 28 | //@cond PRIVATE |
| 29 | static const uchar base64DecodeMap[128] = { |
| 30 | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, |
| 31 | |
| 32 | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64, |
| 33 | |
| 34 | 64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64, |
| 35 | |
| 36 | 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64}; |
| 37 | |
| 38 | static const char base64EncodeMap[64] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', |
| 39 | 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', |
| 40 | 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; |
| 41 | //@endcond |
| 42 | |
| 43 | class Base64Decoder : public Decoder |
| 44 | { |
| 45 | uint mStepNo; |
| 46 | uchar mOutbits; |
| 47 | bool mSawPadding : 1; |
| 48 | |
| 49 | protected: |
| 50 | friend class Base64Codec; |
| 51 | Base64Decoder(Codec::NewlineType newline = Codec::NewlineLF) |
| 52 | : Decoder(newline) |
| 53 | , mStepNo(0) |
| 54 | , mOutbits(0) |
| 55 | , mSawPadding(false) |
| 56 | { |
| 57 | } |
| 58 | |
| 59 | public: |
| 60 | ~Base64Decoder() override |
| 61 | { |
| 62 | } |
| 63 | |
| 64 | bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
| 65 | // ### really needs no finishing??? |
| 66 | bool finish(char *&dcursor, const char *const dend) override |
| 67 | { |
| 68 | Q_UNUSED(dcursor); |
| 69 | Q_UNUSED(dend); |
| 70 | return true; |
| 71 | } |
| 72 | }; |
| 73 | |
| 74 | class Base64Encoder : public Encoder |
| 75 | { |
| 76 | uint mStepNo; |
| 77 | /** number of already written base64-quartets on current line */ |
| 78 | uint mWrittenPacketsOnThisLine; |
| 79 | uchar mNextbits; |
| 80 | bool mInsideFinishing : 1; |
| 81 | |
| 82 | protected: |
| 83 | friend class Rfc2047BEncodingCodec; |
| 84 | friend class Rfc2047BEncodingEncoder; |
| 85 | friend class Base64Codec; |
| 86 | Base64Encoder(Codec::NewlineType newline = Codec::NewlineLF) |
| 87 | : Encoder(newline) |
| 88 | , mStepNo(0) |
| 89 | , mWrittenPacketsOnThisLine(0) |
| 90 | , mNextbits(0) |
| 91 | , mInsideFinishing(false) |
| 92 | { |
| 93 | } |
| 94 | |
| 95 | bool generic_finish(char *&dcursor, const char *const dend, bool withLFatEnd); |
| 96 | |
| 97 | public: |
| 98 | ~Base64Encoder() override |
| 99 | { |
| 100 | } |
| 101 | |
| 102 | bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
| 103 | |
| 104 | bool finish(char *&dcursor, const char *const dend) override; |
| 105 | |
| 106 | protected: |
| 107 | bool writeBase64(uchar ch, char *&dcursor, const char *const dend) |
| 108 | { |
| 109 | return write(ch: base64EncodeMap[ch], dcursor, dend); |
| 110 | } |
| 111 | }; |
| 112 | |
| 113 | class Rfc2047BEncodingEncoder : public Base64Encoder |
| 114 | { |
| 115 | protected: |
| 116 | friend class Rfc2047BEncodingCodec; |
| 117 | Rfc2047BEncodingEncoder(Codec::NewlineType newline = Codec::NewlineLF) |
| 118 | : Base64Encoder(newline) |
| 119 | { |
| 120 | } |
| 121 | |
| 122 | public: |
| 123 | bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
| 124 | bool finish(char *&dcursor, const char *const dend) override; |
| 125 | }; |
| 126 | |
| 127 | Encoder *Base64Codec::makeEncoder(Codec::NewlineType newline) const |
| 128 | { |
| 129 | return new Base64Encoder(newline); |
| 130 | } |
| 131 | |
| 132 | Decoder *Base64Codec::makeDecoder(Codec::NewlineType newline) const |
| 133 | { |
| 134 | return new Base64Decoder(newline); |
| 135 | } |
| 136 | |
| 137 | Encoder *Rfc2047BEncodingCodec::makeEncoder(Codec::NewlineType newline) const |
| 138 | { |
| 139 | return new Rfc2047BEncodingEncoder(newline); |
| 140 | } |
| 141 | |
| 142 | /********************************************************/ |
| 143 | /********************************************************/ |
| 144 | /********************************************************/ |
| 145 | |
| 146 | bool Base64Decoder::decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
| 147 | { |
| 148 | while (dcursor != dend && scursor != send) { |
| 149 | uchar ch = *scursor++; |
| 150 | uchar value; |
| 151 | |
| 152 | // try converting ch to a 6-bit value: |
| 153 | if (ch < 128) { |
| 154 | value = base64DecodeMap[ch]; |
| 155 | } else { |
| 156 | value = 64; |
| 157 | } |
| 158 | |
| 159 | // ch isn't of the base64 alphabet, check for other significant chars: |
| 160 | if (value >= 64) { |
| 161 | if (ch == '=') { |
| 162 | // padding: |
| 163 | if (mStepNo == 0 || mStepNo == 1) { |
| 164 | if (!mSawPadding) { |
| 165 | // malformed |
| 166 | // qWarning() << "Base64Decoder: unexpected padding" |
| 167 | // "character in input stream"; |
| 168 | } |
| 169 | mSawPadding = true; |
| 170 | break; |
| 171 | } else if (mStepNo == 2) { |
| 172 | // ok, there should be another one |
| 173 | } else if (mStepNo == 3) { |
| 174 | // ok, end of encoded stream |
| 175 | mSawPadding = true; |
| 176 | break; |
| 177 | } |
| 178 | mSawPadding = true; |
| 179 | mStepNo = (mStepNo + 1) % 4; |
| 180 | continue; |
| 181 | } else { |
| 182 | // non-base64 alphabet |
| 183 | continue; |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | if (mSawPadding) { |
| 188 | // qWarning() << "Base64Decoder: Embedded padding character" |
| 189 | // "encountered!"; |
| 190 | return true; |
| 191 | } |
| 192 | |
| 193 | // add the new bits to the output stream and flush full octets: |
| 194 | switch (mStepNo) { |
| 195 | case 0: |
| 196 | mOutbits = value << 2; |
| 197 | break; |
| 198 | case 1: |
| 199 | *dcursor++ = (char)(mOutbits | value >> 4); |
| 200 | mOutbits = value << 4; |
| 201 | break; |
| 202 | case 2: |
| 203 | *dcursor++ = (char)(mOutbits | value >> 2); |
| 204 | mOutbits = value << 6; |
| 205 | break; |
| 206 | case 3: |
| 207 | *dcursor++ = (char)(mOutbits | value); |
| 208 | mOutbits = 0; |
| 209 | break; |
| 210 | default: |
| 211 | assert(0); |
| 212 | } |
| 213 | mStepNo = (mStepNo + 1) % 4; |
| 214 | } |
| 215 | |
| 216 | // return false when caller should call us again: |
| 217 | return scursor == send; |
| 218 | } // Base64Decoder::decode() |
| 219 | |
| 220 | bool Base64Encoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
| 221 | { |
| 222 | const uint maxPacketsPerLine = 76 / 4; |
| 223 | |
| 224 | // detect when the caller doesn't adhere to our rules: |
| 225 | if (mInsideFinishing) { |
| 226 | return true; |
| 227 | } |
| 228 | |
| 229 | while (scursor != send && dcursor != dend) { |
| 230 | // properly empty the output buffer before starting something new: |
| 231 | // ### fixme: we can optimize this away, since the buffer isn't |
| 232 | // written to anyway (most of the time) |
| 233 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
| 234 | return scursor == send; |
| 235 | } |
| 236 | |
| 237 | uchar ch = *scursor++; |
| 238 | // mNextbits // (part of) value of next sextet |
| 239 | |
| 240 | // check for line length; |
| 241 | if (mStepNo == 0 && mWrittenPacketsOnThisLine >= maxPacketsPerLine) { |
| 242 | writeCRLF(dcursor, dend); |
| 243 | mWrittenPacketsOnThisLine = 0; |
| 244 | } |
| 245 | |
| 246 | // depending on mStepNo, extract value and mNextbits from the |
| 247 | // octet stream: |
| 248 | switch (mStepNo) { |
| 249 | case 0: |
| 250 | assert(mNextbits == 0); |
| 251 | writeBase64(ch: ch >> 2, dcursor, dend); // top-most 6 bits -> output |
| 252 | mNextbits = (ch & 0x3) << 4; // 0..1 bits -> 4..5 in mNextbits |
| 253 | break; |
| 254 | case 1: |
| 255 | assert((mNextbits & ~0x30) == 0); |
| 256 | writeBase64(ch: mNextbits | ch >> 4, dcursor, dend); // 4..7 bits -> 0..3 in value |
| 257 | mNextbits = (ch & 0xf) << 2; // 0..3 bits -> 2..5 in mNextbits |
| 258 | break; |
| 259 | case 2: |
| 260 | assert((mNextbits & ~0x3C) == 0); |
| 261 | writeBase64(ch: mNextbits | ch >> 6, dcursor, dend); // 6..7 bits -> 0..1 in value |
| 262 | writeBase64(ch: ch & 0x3F, dcursor, dend); // 0..5 bits -> output |
| 263 | mNextbits = 0; |
| 264 | mWrittenPacketsOnThisLine++; |
| 265 | break; |
| 266 | default: |
| 267 | assert(0); |
| 268 | } |
| 269 | mStepNo = (mStepNo + 1) % 3; |
| 270 | } |
| 271 | |
| 272 | if (d->outputBufferCursor) { |
| 273 | flushOutputBuffer(dcursor, dend); |
| 274 | } |
| 275 | |
| 276 | return scursor == send; |
| 277 | } |
| 278 | |
| 279 | bool Rfc2047BEncodingEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
| 280 | { |
| 281 | // detect when the caller doesn't adhere to our rules: |
| 282 | if (mInsideFinishing) { |
| 283 | return true; |
| 284 | } |
| 285 | |
| 286 | while (scursor != send && dcursor != dend) { |
| 287 | // properly empty the output buffer before starting something new: |
| 288 | // ### fixme: we can optimize this away, since the buffer isn't |
| 289 | // written to anyway (most of the time) |
| 290 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
| 291 | return scursor == send; |
| 292 | } |
| 293 | |
| 294 | uchar ch = *scursor++; |
| 295 | // mNextbits // (part of) value of next sextet |
| 296 | |
| 297 | // depending on mStepNo, extract value and mNextbits from the |
| 298 | // octet stream: |
| 299 | switch (mStepNo) { |
| 300 | case 0: |
| 301 | assert(mNextbits == 0); |
| 302 | writeBase64(ch: ch >> 2, dcursor, dend); // top-most 6 bits -> output |
| 303 | mNextbits = (ch & 0x3) << 4; // 0..1 bits -> 4..5 in mNextbits |
| 304 | break; |
| 305 | case 1: |
| 306 | assert((mNextbits & ~0x30) == 0); |
| 307 | writeBase64(ch: mNextbits | ch >> 4, dcursor, dend); // 4..7 bits -> 0..3 in value |
| 308 | mNextbits = (ch & 0xf) << 2; // 0..3 bits -> 2..5 in mNextbits |
| 309 | break; |
| 310 | case 2: |
| 311 | assert((mNextbits & ~0x3C) == 0); |
| 312 | writeBase64(ch: mNextbits | ch >> 6, dcursor, dend); // 6..7 bits -> 0..1 in value |
| 313 | writeBase64(ch: ch & 0x3F, dcursor, dend); // 0..5 bits -> output |
| 314 | mNextbits = 0; |
| 315 | break; |
| 316 | default: |
| 317 | assert(0); |
| 318 | } |
| 319 | mStepNo = (mStepNo + 1) % 3; |
| 320 | } |
| 321 | |
| 322 | if (d->outputBufferCursor) { |
| 323 | flushOutputBuffer(dcursor, dend); |
| 324 | } |
| 325 | |
| 326 | return scursor == send; |
| 327 | } |
| 328 | |
| 329 | bool Base64Encoder::finish(char *&dcursor, const char *const dend) |
| 330 | { |
| 331 | return generic_finish(dcursor, dend, withLFatEnd: true); |
| 332 | } |
| 333 | |
| 334 | bool Rfc2047BEncodingEncoder::finish(char *&dcursor, const char *const dend) |
| 335 | { |
| 336 | return generic_finish(dcursor, dend, withLFatEnd: false); |
| 337 | } |
| 338 | |
| 339 | bool Base64Encoder::generic_finish(char *&dcursor, const char *const dend, bool withLFatEnd) |
| 340 | { |
| 341 | if (mInsideFinishing) { |
| 342 | return flushOutputBuffer(dcursor, dend); |
| 343 | } |
| 344 | |
| 345 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
| 346 | return false; |
| 347 | } |
| 348 | |
| 349 | mInsideFinishing = true; |
| 350 | |
| 351 | // |
| 352 | // writing out the last mNextbits... |
| 353 | // |
| 354 | switch (mStepNo) { |
| 355 | case 1: // 2 mNextbits waiting to be written. Needs two padding chars: |
| 356 | case 2: // 4 or 6 mNextbits waiting to be written. Completes a block |
| 357 | writeBase64(ch: mNextbits, dcursor, dend); |
| 358 | mNextbits = 0; |
| 359 | break; |
| 360 | case 0: // no padding, nothing to be written, except possibly the CRLF |
| 361 | assert(mNextbits == 0); |
| 362 | break; |
| 363 | default: |
| 364 | assert(0); |
| 365 | } |
| 366 | |
| 367 | // |
| 368 | // adding padding... |
| 369 | // |
| 370 | switch (mStepNo) { |
| 371 | case 1: |
| 372 | write(ch: '=', dcursor, dend); |
| 373 | Q_FALLTHROUGH(); |
| 374 | // fall through: |
| 375 | case 2: |
| 376 | write(ch: '=', dcursor, dend); |
| 377 | Q_FALLTHROUGH(); |
| 378 | // fall through: |
| 379 | case 0: // completed a quartet - add CRLF |
| 380 | if (withLFatEnd) { |
| 381 | writeCRLF(dcursor, dend); |
| 382 | } |
| 383 | return flushOutputBuffer(dcursor, dend); |
| 384 | default: |
| 385 | assert(0); |
| 386 | } |
| 387 | return true; // asserts get compiled out |
| 388 | } |
| 389 | |
| 390 | } // namespace KCodecs |
| 391 | |