| 1 | /* |
| 2 | SPDX-FileCopyrightText: 2000-2001 Dawit Alemayehu <adawit@kde.org> |
| 3 | SPDX-FileCopyrightText: 2001 Rik Hemsley (rikkus) <rik@kde.org> |
| 4 | SPDX-FileCopyrightText: 2001-2002 Marc Mutz <mutz@kde.org> |
| 5 | |
| 6 | SPDX-License-Identifier: LGPL-2.0-only |
| 7 | |
| 8 | The quoted-printable codec as described in RFC 2045, section 6.7. is by |
| 9 | Rik Hemsley (C) 2001. |
| 10 | */ |
| 11 | |
| 12 | #ifndef KCODECS_H |
| 13 | #define KCODECS_H |
| 14 | |
| 15 | #include <kcodecs_export.h> |
| 16 | |
| 17 | #include <QString> |
| 18 | |
| 19 | #include <memory> |
| 20 | |
| 21 | class QByteArray; |
| 22 | class QIODevice; |
| 23 | |
| 24 | /*! |
| 25 | * \namespace KCodecs |
| 26 | * \inmodule KCodecs |
| 27 | * |
| 28 | * A wrapper class for the most commonly used encoding and |
| 29 | * decoding algorithms. |
| 30 | * |
| 31 | * Currently there is support for encoding |
| 32 | * and decoding input using base64, uu and the quoted-printable |
| 33 | * specifications. |
| 34 | * |
| 35 | * \code |
| 36 | * QByteArray input = "Aladdin:open sesame"; |
| 37 | * QByteArray result = KCodecs::base64Encode(input); |
| 38 | * cout << "Result: " << result.data() << endl; |
| 39 | * \endcode |
| 40 | * |
| 41 | * Output should be |
| 42 | * \badcode |
| 43 | * Result: QWxhZGRpbjpvcGVuIHNlc2FtZQ== |
| 44 | * \endcode |
| 45 | * |
| 46 | * The above example makes use of the convenience functions |
| 47 | * (ones that accept/return null-terminated strings) to encode/decode |
| 48 | * a string. If what you need is to encode or decode binary data, then |
| 49 | * it is highly recommended that you use the functions that take an input |
| 50 | * and output QByteArray as arguments. These functions are specifically |
| 51 | * tailored for encoding and decoding binary data. |
| 52 | * |
| 53 | * \brief A collection of commonly used encoding and decoding algorithms. |
| 54 | */ |
| 55 | namespace KCodecs |
| 56 | { |
| 57 | /*! |
| 58 | * Encodes the given data using the quoted-printable algorithm. |
| 59 | * |
| 60 | * \a in the data to be encoded. |
| 61 | * |
| 62 | * \a useCRLF if true the input data is expected to have |
| 63 | * CRLF line breaks and the output will have CRLF line |
| 64 | * breaks, too. |
| 65 | * |
| 66 | * Returns quoted-printable encoded string. |
| 67 | */ |
| 68 | KCODECS_EXPORT QByteArray quotedPrintableEncode(QByteArrayView in, bool useCRLF = true); |
| 69 | |
| 70 | /*! |
| 71 | * Encodes the given data using the quoted-printable algorithm. |
| 72 | * |
| 73 | * Use this function if you want the result of the encoding |
| 74 | * to be placed in another array which cuts down the number |
| 75 | * of copy operation that have to be performed in the process. |
| 76 | * This is also the preferred method for encoding binary data. |
| 77 | * |
| 78 | * \note the output array is first reset and then resized |
| 79 | * appropriately before use, hence, all data stored in the |
| 80 | * output array will be lost. |
| 81 | * |
| 82 | * \a in data to be encoded. |
| 83 | * |
| 84 | * \a out encoded data. |
| 85 | * |
| 86 | * \a useCRLF if true the input data is expected to have |
| 87 | * CRLF line breaks and the output will have CRLF line |
| 88 | * breaks, too. |
| 89 | */ |
| 90 | KCODECS_EXPORT void quotedPrintableEncode(QByteArrayView in, QByteArray &out, bool useCRLF); |
| 91 | |
| 92 | /*! |
| 93 | * Decodes a quoted-printable encoded data. |
| 94 | * |
| 95 | * Accepts data with CRLF or standard unix line breaks. |
| 96 | * |
| 97 | * \a in data to be decoded. |
| 98 | * |
| 99 | * Returns decoded string. |
| 100 | * |
| 101 | * \since 5.5 |
| 102 | */ |
| 103 | KCODECS_EXPORT QByteArray quotedPrintableDecode(QByteArrayView in); |
| 104 | |
| 105 | /*! |
| 106 | * Decodes a quoted-printable encoded data. |
| 107 | * |
| 108 | * Accepts data with CRLF or standard unix line breaks. |
| 109 | * Use this function if you want the result of the decoding |
| 110 | * to be placed in another array which cuts down the number |
| 111 | * of copy operation that have to be performed in the process. |
| 112 | * This is also the preferred method for decoding an encoded |
| 113 | * binary data. |
| 114 | * |
| 115 | * \note the output array is first reset and then resized |
| 116 | * appropriately before use, hence, all data stored in the |
| 117 | * output array will be lost. |
| 118 | * |
| 119 | * \a in data to be decoded. |
| 120 | * |
| 121 | * \a out decoded data. |
| 122 | */ |
| 123 | KCODECS_EXPORT void quotedPrintableDecode(QByteArrayView in, QByteArray &out); |
| 124 | |
| 125 | /*! |
| 126 | * Decodes the given data using the uudecode algorithm. |
| 127 | * |
| 128 | * Any 'begin' and 'end' lines like those generated by |
| 129 | * the utilities in unix and unix-like OS will be |
| 130 | * automatically ignored. |
| 131 | * |
| 132 | * \a in the data to be decoded. |
| 133 | * |
| 134 | * Returns the decoded string. |
| 135 | */ |
| 136 | KCODECS_EXPORT QByteArray uudecode(QByteArrayView in); |
| 137 | |
| 138 | /*! |
| 139 | * Decodes the given data using the uudecode algorithm. |
| 140 | * |
| 141 | * Use this function if you want the result of the decoding |
| 142 | * to be placed in another array which cuts down the number |
| 143 | * of copy operation that have to be performed in the process. |
| 144 | * This is the preferred method for decoding binary data. |
| 145 | * |
| 146 | * Any 'begin' and 'end' lines like those generated by |
| 147 | * the utilities in unix and unix-like OS will be |
| 148 | * automatically ignored. |
| 149 | * |
| 150 | * \note the output array is first reset and then resized |
| 151 | * appropriately before use, hence, all data stored in the |
| 152 | * output array will be lost. |
| 153 | * |
| 154 | * \a in data to be decoded. |
| 155 | * |
| 156 | * \a out uudecoded data. |
| 157 | */ |
| 158 | KCODECS_EXPORT void uudecode(QByteArrayView in, QByteArray &out); |
| 159 | |
| 160 | /*! |
| 161 | * Encodes the given data using the base64 algorithm. |
| 162 | * |
| 163 | * The boolean argument determines if the encoded data is |
| 164 | * going to be restricted to 76 characters or less per line |
| 165 | * as specified by RFC 2045. |
| 166 | * |
| 167 | * \a in data to be encoded. |
| 168 | * |
| 169 | * Returns base64 encoded string. |
| 170 | * \since 5.5 |
| 171 | */ |
| 172 | KCODECS_EXPORT QByteArray base64Encode(QByteArrayView in); |
| 173 | |
| 174 | /*! |
| 175 | * Encodes the given data using the base64 algorithm. |
| 176 | * |
| 177 | * Use this function if you want the result of the encoding |
| 178 | * to be placed in another array which cuts down the number |
| 179 | * of copy operation that have to be performed in the process. |
| 180 | * This is also the preferred method for encoding binary data. |
| 181 | * |
| 182 | * The boolean argument determines if the encoded data is going |
| 183 | * to be restricted to 76 characters or less per line as specified |
| 184 | * by RFC 2045. If \a insertLFs is true, then there will be 76 |
| 185 | * characters or less per line. |
| 186 | * |
| 187 | * \note the output array is first reset and then resized |
| 188 | * appropriately before use, hence, all data stored in the |
| 189 | * output array will be lost. |
| 190 | * |
| 191 | * \a in data to be encoded. |
| 192 | * |
| 193 | * \a out encoded data. |
| 194 | * |
| 195 | * \a insertLFs limit the number of characters per line. |
| 196 | */ |
| 197 | KCODECS_EXPORT void base64Encode(QByteArrayView in, QByteArray &out, bool insertLFs = false); |
| 198 | |
| 199 | /*! |
| 200 | * Decodes the given data that was encoded using the |
| 201 | * base64 algorithm. |
| 202 | * |
| 203 | * \a in data to be decoded. |
| 204 | * |
| 205 | * Returns decoded string. |
| 206 | */ |
| 207 | KCODECS_EXPORT QByteArray base64Decode(QByteArrayView in); |
| 208 | |
| 209 | /*! |
| 210 | * Decodes the given data that was encoded with the base64 |
| 211 | * algorithm. |
| 212 | * |
| 213 | * Use this function if you want the result of the decoding |
| 214 | * to be placed in another array which cuts down the number |
| 215 | * of copy operation that have to be performed in the process. |
| 216 | * This is also the preferred method for decoding an encoded |
| 217 | * binary data. |
| 218 | * |
| 219 | * \note the output array is first reset and then resized |
| 220 | * appropriately before use, hence, all data stored in the |
| 221 | * output array will be lost. |
| 222 | * |
| 223 | * \a in data to be decoded. |
| 224 | * |
| 225 | * \a out decoded data. |
| 226 | */ |
| 227 | KCODECS_EXPORT void base64Decode(QByteArrayView in, QByteArray &out); |
| 228 | |
| 229 | /*! |
| 230 | * Decodes string \a text according to RFC2047, |
| 231 | * i.e., the construct =?charset?[qb]?encoded?= |
| 232 | * |
| 233 | * \a text source string |
| 234 | * |
| 235 | * Returns the decoded string |
| 236 | */ |
| 237 | KCODECS_EXPORT QString decodeRFC2047String(QStringView text); |
| 238 | |
| 239 | /*! |
| 240 | * Charset options for RFC2047 encoder |
| 241 | * \since 5.5 |
| 242 | * |
| 243 | * \value NoOption No special option |
| 244 | * \value ForceDefaultCharset Force use of the default charset |
| 245 | */ |
| 246 | enum CharsetOption { |
| 247 | NoOption = 0, |
| 248 | ForceDefaultCharset = 1, |
| 249 | }; |
| 250 | |
| 251 | /*! |
| 252 | * Decodes string \a src according to RFC2047, i.e. the construct |
| 253 | * =?charset?[qb]?encoded?= |
| 254 | * |
| 255 | * \a src source string. |
| 256 | * |
| 257 | * \a usedCS the name of any detected charset or, in case of multiple |
| 258 | * different ones, "UTF-8" as that of a super charset is |
| 259 | * returned here |
| 260 | * |
| 261 | * \a defaultCS the charset to use in case the detected |
| 262 | * one isn't known to us. |
| 263 | * |
| 264 | * \a option options for the encoder |
| 265 | * |
| 266 | * Returns the decoded string. |
| 267 | * \since 5.5 |
| 268 | */ |
| 269 | KCODECS_EXPORT QString decodeRFC2047String(QByteArrayView src, QByteArray *usedCS, const QByteArray &defaultCS = QByteArray(), CharsetOption option = NoOption); |
| 270 | |
| 271 | /*! |
| 272 | * Encodes string \a src according to RFC2047 using charset \a charset. |
| 273 | * |
| 274 | * This function also makes commas, quotes and other characters part of the encoded name, for example |
| 275 | * the string "Jöhn Döe" <john@example.com"> would be encoded as <encoded word for "Jöhn Döe"> <john@example.com>, |
| 276 | * i.e. the opening and closing quote mark would be part of the encoded word. |
| 277 | * Therefore don't use this function for input strings that contain semantically meaningful characters, |
| 278 | * like the quoting marks in this example. |
| 279 | * |
| 280 | * \a src source string. |
| 281 | * |
| 282 | * \a charset charset to use. If it can't encode the string, UTF-8 will be used instead. |
| 283 | * |
| 284 | * Returns the encoded string. |
| 285 | * \since 5.5 |
| 286 | */ |
| 287 | KCODECS_EXPORT QByteArray encodeRFC2047String(QStringView src, const QByteArray &charset); |
| 288 | |
| 289 | /*! |
| 290 | * Decodes the given data that was encoded using the |
| 291 | * base45 codec. |
| 292 | * |
| 293 | * \a in data to be decoded. |
| 294 | * |
| 295 | * Returns decoded string. |
| 296 | * |
| 297 | * \since 5.84 |
| 298 | * |
| 299 | * \sa https://datatracker.ietf.org/doc/draft-faltstrom-base45/ |
| 300 | */ |
| 301 | KCODECS_EXPORT QByteArray base45Decode(QByteArrayView in); |
| 302 | |
| 303 | class Encoder; |
| 304 | class EncoderPrivate; |
| 305 | class Decoder; |
| 306 | class DecoderPrivate; |
| 307 | |
| 308 | /*! |
| 309 | \class KCodecs::Codec |
| 310 | \inheaderfile KCodecs |
| 311 | \inmodule KCodecs |
| 312 | |
| 313 | \section1 Glossary: |
| 314 | \section2 MIME: |
| 315 | Multipurpose Internet Mail Extensions or MIME is an |
| 316 | Internet Standard that extends the format of e-mail to support text in |
| 317 | character sets other than US-ASCII, non-text attachments, multi-part message |
| 318 | bodies, and header information in non-ASCII character sets. Virtually all |
| 319 | human-written Internet e-mail and a fairly large proportion of automated |
| 320 | e-mail is transmitted via SMTP in MIME format. Internet e-mail is |
| 321 | so closely associated with the SMTP and MIME standards that it is sometimes |
| 322 | called SMTP/MIME e-mail. The content types defined by MIME standards are |
| 323 | also of growing importance outside of e-mail, such as in communication |
| 324 | protocols like HTTP for the World Wide Web. MIME is also a |
| 325 | fundamental component of communication protocols such as HTTP, which |
| 326 | requires that data be transmitted in the context of e-mail-like messages, |
| 327 | even though the data may not actually be e-mail. |
| 328 | |
| 329 | \section2 Codec: |
| 330 | a program capable of performing encoding and decoding on a digital data |
| 331 | stream. Codecs encode data for storage or encryption and decode it for |
| 332 | viewing or editing. |
| 333 | |
| 334 | \section2 CRLF: |
| 335 | A "Carriage Return (0x0D)" followed by a |
| 336 | "Line Feed (0x0A)", two ASCII control characters used to represent a |
| 337 | newline on some operating systems, notably DOS and Microsoft Windows. |
| 338 | |
| 339 | \section2 LF: |
| 340 | a "Line Feed (0x0A)" ASCII control character used |
| 341 | to represent a newline on some operating systems, notably Unix, Unix-like, |
| 342 | and Linux. |
| 343 | |
| 344 | \brief An abstract base class of codecs for common mail transfer encodings. |
| 345 | |
| 346 | Provides an abstract base class of codecs like base64 and quoted-printable. |
| 347 | Implemented as a singleton. |
| 348 | |
| 349 | \since 5.5 |
| 350 | */ |
| 351 | class KCODECS_EXPORT Codec |
| 352 | { |
| 353 | public: |
| 354 | /*! |
| 355 | * \value NewlineLF Line Feed |
| 356 | * \value NewlineCRLF Carriage Return Line Feed |
| 357 | */ |
| 358 | enum NewlineType { |
| 359 | NewlineLF, |
| 360 | NewlineCRLF, |
| 361 | }; |
| 362 | |
| 363 | /*! |
| 364 | Returns a codec associated with the specified \a name. |
| 365 | |
| 366 | \a name is a valid codec name. |
| 367 | */ |
| 368 | static Codec *codecForName(QByteArrayView name); |
| 369 | |
| 370 | /*! |
| 371 | Computes the maximum size, in characters, needed for the encoding. |
| 372 | |
| 373 | \a insize is the number of input characters to be encoded. |
| 374 | |
| 375 | \a newline whether make new lines using CRLF, or LF (default is LF). |
| 376 | |
| 377 | Returns the maximum number of characters in the encoding. |
| 378 | */ |
| 379 | virtual qsizetype maxEncodedSizeFor(qsizetype insize, NewlineType newline = NewlineLF) const = 0; |
| 380 | |
| 381 | /*! |
| 382 | Computes the maximum size, in characters, needed for the deccoding. |
| 383 | |
| 384 | \a insize is the number of input characters to be decoded. |
| 385 | |
| 386 | \a newline whether make new lines using CRLF, or LF (default is LF). |
| 387 | |
| 388 | Returns the maximum number of characters in the decoding. |
| 389 | */ |
| 390 | virtual qsizetype maxDecodedSizeFor(qsizetype insize, NewlineType newline = NewlineLF) const = 0; |
| 391 | |
| 392 | /*! |
| 393 | Creates the encoder for the codec. |
| 394 | |
| 395 | \a newline whether make new lines using CRLF, or LF (default is LF). |
| 396 | |
| 397 | Returns a pointer to an instance of the codec's encoder. |
| 398 | */ |
| 399 | virtual Encoder *makeEncoder(NewlineType newline = NewlineLF) const = 0; |
| 400 | |
| 401 | /*! |
| 402 | Creates the decoder for the codec. |
| 403 | |
| 404 | \a newline whether make new lines using CRLF, or LF (default is LF). |
| 405 | |
| 406 | Returns a pointer to an instance of the codec's decoder. |
| 407 | */ |
| 408 | virtual Decoder *makeDecoder(NewlineType newline = NewlineLF) const = 0; |
| 409 | |
| 410 | /*! |
| 411 | Convenience wrapper that can be used for small chunks of data |
| 412 | when you can provide a large enough buffer. The default |
| 413 | implementation creates an Encoder and uses it. |
| 414 | |
| 415 | Encodes a chunk of bytes starting at \a scursor and extending to |
| 416 | \a send into the buffer described by \a dcursor and \a dend. |
| 417 | |
| 418 | This function doesn't support chaining of blocks. The returned |
| 419 | block cannot be added to, but you don't need to finalize it, too. |
| 420 | |
| 421 | Example usage (\c in contains the input data): |
| 422 | \code |
| 423 | KCodecs::Codec *codec = KCodecs::Codec::codecForName("base64"); |
| 424 | if (!codec) { |
| 425 | qFatal() << "no base64 codec found!?"; |
| 426 | } |
| 427 | QByteArray out(in.size() * 1.4); // crude maximal size of b64 encoding |
| 428 | QByteArray::Iterator iit = in.begin(); |
| 429 | QByteArray::Iterator oit = out.begin(); |
| 430 | if (!codec->encode(iit, in.end(), oit, out.end())) { |
| 431 | qDebug() << "output buffer too small"; |
| 432 | return; |
| 433 | } |
| 434 | qDebug() << "Size of encoded data:" << oit - out.begin(); |
| 435 | \endcode |
| 436 | |
| 437 | \a scursor is a pointer to the start of the input buffer. |
| 438 | |
| 439 | \a send is a pointer to the end of the input buffer. |
| 440 | |
| 441 | \a dcursor is a pointer to the start of the output buffer. |
| 442 | |
| 443 | \a dend is a pointer to the end of the output buffer. |
| 444 | |
| 445 | \a newline whether make new lines using CRLF, or LF (default is LF). |
| 446 | |
| 447 | Returns false if the encoded data didn't fit into the output buffer; |
| 448 | true otherwise. |
| 449 | */ |
| 450 | virtual bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend, NewlineType newline = NewlineLF) const; |
| 451 | |
| 452 | /*! |
| 453 | Convenience wrapper that can be used for small chunks of data |
| 454 | when you can provide a large enough buffer. The default |
| 455 | implementation creates a Decoder and uses it. |
| 456 | |
| 457 | Decodes a chunk of bytes starting at \a scursor and extending to |
| 458 | \a send into the buffer described by \a dcursor and \a dend. |
| 459 | |
| 460 | This function doesn't support chaining of blocks. The returned |
| 461 | block cannot be added to, but you don't need to finalize it, too. |
| 462 | |
| 463 | Example usage (\c in contains the input data): |
| 464 | \code |
| 465 | KCodecs::Codec *codec = KCodecs::Codec::codecForName("base64"); |
| 466 | if (!codec) { |
| 467 | qFatal() << "no base64 codec found!?"; |
| 468 | } |
| 469 | QByteArray out(in.size()); // good guess for any encoding... |
| 470 | QByteArray::Iterator iit = in.begin(); |
| 471 | QByteArray::Iterator oit = out.begin(); |
| 472 | if (!codec->decode(iit, in.end(), oit, out.end())) { |
| 473 | qDebug() << "output buffer too small"; |
| 474 | return; |
| 475 | } |
| 476 | qDebug() << "Size of decoded data:" << oit - out.begin(); |
| 477 | \endcode |
| 478 | |
| 479 | \a scursor is a pointer to the start of the input buffer. |
| 480 | |
| 481 | \a send is a pointer to the end of the input buffer. |
| 482 | |
| 483 | \a dcursor is a pointer to the start of the output buffer. |
| 484 | |
| 485 | \a dend is a pointer to the end of the output buffer. |
| 486 | |
| 487 | \a newline whether make new lines using CRLF, or LF (default is LF). |
| 488 | |
| 489 | Returns false if the decoded data didn't fit into the output buffer; |
| 490 | true otherwise. |
| 491 | */ |
| 492 | virtual bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend, NewlineType newline = NewlineLF) const; |
| 493 | |
| 494 | /*! |
| 495 | Even more convenient, but also a bit slower and more memory |
| 496 | intensive, since it allocates storage for the worst case and then |
| 497 | shrinks the result QByteArray to the actual size again. |
| 498 | |
| 499 | For use with small \a src. |
| 500 | |
| 501 | \a src is the data to encode. |
| 502 | |
| 503 | \a newline whether make new lines using CRLF, or LF (default is LF). |
| 504 | */ |
| 505 | QByteArray encode(QByteArrayView src, NewlineType newline = NewlineLF) const; |
| 506 | |
| 507 | /*! |
| 508 | Even more convenient, but also a bit slower and more memory |
| 509 | intensive, since it allocates storage for the worst case and then |
| 510 | shrinks the result QByteArray to the actual size again. |
| 511 | |
| 512 | For use with small \a src. |
| 513 | |
| 514 | \a src is the data to decode. |
| 515 | |
| 516 | \a newline whether make new lines using CRLF, or LF (default is LF). |
| 517 | */ |
| 518 | QByteArray decode(QByteArrayView src, NewlineType newline = NewlineLF) const; |
| 519 | |
| 520 | /*! |
| 521 | Returns the name of the encoding. Guaranteed to be lowercase. |
| 522 | */ |
| 523 | virtual const char *name() const = 0; |
| 524 | |
| 525 | virtual ~Codec() |
| 526 | { |
| 527 | } |
| 528 | |
| 529 | protected: |
| 530 | Codec() |
| 531 | { |
| 532 | } |
| 533 | }; |
| 534 | |
| 535 | /*! |
| 536 | \class KCodecs::Decoder |
| 537 | \inheaderfile KCodecs |
| 538 | \inmodule KCodecs |
| 539 | |
| 540 | \brief Stateful CTE decoder class. |
| 541 | |
| 542 | Stateful decoder class, modelled after QTextDecoder. |
| 543 | |
| 544 | \section1 Overview |
| 545 | |
| 546 | KCodecs decoders are designed to be able to process encoded data in |
| 547 | chunks of arbitrary size and to work with output buffers of also |
| 548 | arbitrary size. They maintain any state necessary to go on where |
| 549 | the previous call left off. |
| 550 | |
| 551 | The class consists of only two methods of interest: see decode, |
| 552 | which decodes an input block and finalize, which flushes any |
| 553 | remaining data to the output stream. |
| 554 | |
| 555 | Typically, you will create a decoder instance, call decode as |
| 556 | often as necessary, then call finalize (most often a single |
| 557 | call suffices, but it might be that during that call the output |
| 558 | buffer is filled, so you should be prepared to call finalize |
| 559 | as often as necessary, i.e. until it returns \c true). |
| 560 | |
| 561 | \section1 Return Values |
| 562 | |
| 563 | Both methods return \c true to indicate that they've finished their |
| 564 | job. For decode, a return value of \c true means that the |
| 565 | current input block has been finished (\c false most often means |
| 566 | that the output buffer is full, but that isn't required |
| 567 | behavior. The decode call is free to return at arbitrary |
| 568 | times during processing). |
| 569 | |
| 570 | For finalize, a return value of \c true means that all data |
| 571 | implicitly or explicitly stored in the decoder instance has been |
| 572 | flushed to the output buffer. A \c false return value should be |
| 573 | interpreted as "check if the output buffer is full and call me |
| 574 | again", just as with decode. |
| 575 | |
| 576 | \section1 Usage Pattern |
| 577 | |
| 578 | Since the decoder maintains state, you can only use it once. After |
| 579 | a sequence of input blocks has been processed, you finalize |
| 580 | the output and then delete the decoder instance. If you want to |
| 581 | process another input block sequence, you create a new instance. |
| 582 | |
| 583 | Typical usage (\a in contains the (base64-encoded) input data), |
| 584 | taking into account all the conventions detailed above: |
| 585 | |
| 586 | \code |
| 587 | KCodecs::Codec *codec = KCodecs::Codec::codecForName("base64"); |
| 588 | if (!codec) { |
| 589 | qFatal() << "No codec found for base64!"; |
| 590 | } |
| 591 | KCodecs::Decoder *dec = codec->makeDecoder(); |
| 592 | Q_ASSERT(dec); // should not happen |
| 593 | QByteArray out(256); // small buffer is enough ;-) |
| 594 | QByteArray::Iterator iit = in.begin(); |
| 595 | QByteArray::Iterator oit = out.begin(); |
| 596 | // decode the chunk |
| 597 | while (!dec->decode(iit, in.end(), oit, out.end())) |
| 598 | if (oit == out.end()) { // output buffer full, process contents |
| 599 | do_something_with(out); |
| 600 | oit = out.begin(); |
| 601 | } |
| 602 | // repeat while loop for each input block |
| 603 | // ... |
| 604 | // finish (flush remaining data from decoder): |
| 605 | while (!dec->finish(oit, out.end())) |
| 606 | if (oit == out.end()) { // output buffer full, process contents |
| 607 | do_something_with(out); |
| 608 | oit = out.begin(); |
| 609 | } |
| 610 | // now process last chunk: |
| 611 | out.resize(oit - out.begin()); |
| 612 | do_something_with(out); |
| 613 | // _delete_ the decoder, but not the codec: |
| 614 | delete dec; |
| 615 | \endcode |
| 616 | |
| 617 | \since 5.5 |
| 618 | */ |
| 619 | class KCODECS_EXPORT Decoder |
| 620 | { |
| 621 | protected: |
| 622 | friend class Codec; |
| 623 | friend class DecoderPrivate; |
| 624 | |
| 625 | /*! |
| 626 | Protected constructor. Use KCodecs::Codec::makeDecoder to create an |
| 627 | instance. |
| 628 | |
| 629 | \a newline whether make new lines using CRLF, or LF (default is LF). |
| 630 | */ |
| 631 | Decoder(Codec::NewlineType newline = Codec::NewlineLF); |
| 632 | |
| 633 | public: |
| 634 | virtual ~Decoder(); |
| 635 | |
| 636 | /*! |
| 637 | Decodes a chunk of data, maintaining state information between |
| 638 | calls. See class decumentation for calling conventions. |
| 639 | |
| 640 | \a scursor is a pointer to the start of the input buffer. |
| 641 | |
| 642 | \a send is a pointer to the end of the input buffer. |
| 643 | |
| 644 | \a dcursor is a pointer to the start of the output buffer. |
| 645 | |
| 646 | \a dend is a pointer to the end of the output buffer. |
| 647 | |
| 648 | Returns true on success |
| 649 | */ |
| 650 | virtual bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) = 0; |
| 651 | |
| 652 | /*! |
| 653 | Call this method to finalize the output stream. Writes all |
| 654 | remaining data and resets the decoder. See KCodecs::Codec for |
| 655 | calling conventions. |
| 656 | |
| 657 | \a dcursor is a pointer to the start of the output buffer. |
| 658 | |
| 659 | \a dend is a pointer to the end of the output buffer. |
| 660 | |
| 661 | Returns true on success |
| 662 | */ |
| 663 | virtual bool finish(char *&dcursor, const char *const dend) = 0; |
| 664 | |
| 665 | protected: |
| 666 | //@cond PRIVATE |
| 667 | std::unique_ptr<DecoderPrivate> const d; |
| 668 | //@endcond |
| 669 | }; |
| 670 | |
| 671 | /*! |
| 672 | \class KCodecs::Encoder |
| 673 | \inheaderfile KCodecs |
| 674 | \inmodule KCodecs |
| 675 | |
| 676 | \brief Stateful encoder class. |
| 677 | |
| 678 | Stateful encoder class, modeled after QTextEncoder. |
| 679 | |
| 680 | \since 5.5 |
| 681 | */ |
| 682 | class KCODECS_EXPORT Encoder |
| 683 | { |
| 684 | protected: |
| 685 | friend class Codec; |
| 686 | friend class EncoderPrivate; |
| 687 | |
| 688 | /*! |
| 689 | Protected constructor. Use KCodecs::Codec::makeEncoder if you want one. |
| 690 | |
| 691 | \a newline whether make new lines using CRLF, or LF (default is LF). |
| 692 | */ |
| 693 | explicit Encoder(Codec::NewlineType newline = Codec::NewlineLF); |
| 694 | |
| 695 | public: |
| 696 | virtual ~Encoder(); |
| 697 | |
| 698 | /*! |
| 699 | Encodes a chunk of data, maintaining state information between |
| 700 | calls. See KCodecs::Codec for calling conventions. |
| 701 | |
| 702 | \a scursor is a pointer to the start of the input buffer. |
| 703 | |
| 704 | \a send is a pointer to the end of the input buffer. |
| 705 | |
| 706 | \a dcursor is a pointer to the start of the output buffer. |
| 707 | |
| 708 | \a dend is a pointer to the end of the output buffer. |
| 709 | |
| 710 | Returns true on success |
| 711 | */ |
| 712 | virtual bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) = 0; |
| 713 | |
| 714 | /*! |
| 715 | Call this method to finalize the output stream. Writes all remaining |
| 716 | data and resets the encoder. See KCodecs::Codec for calling conventions. |
| 717 | |
| 718 | \a dcursor is a pointer to the start of the output buffer. |
| 719 | |
| 720 | \a dend is a pointer to the end of the output buffer. |
| 721 | |
| 722 | Returns true on success. |
| 723 | */ |
| 724 | virtual bool finish(char *&dcursor, const char *const dend) = 0; |
| 725 | |
| 726 | protected: |
| 727 | /* |
| 728 | The maximum number of characters permitted in the output buffer. |
| 729 | */ |
| 730 | enum { |
| 731 | maxBufferedChars = 8, |
| 732 | }; |
| 733 | |
| 734 | /*! |
| 735 | Writes character \a ch to the output stream or the output buffer, |
| 736 | depending on whether or not the output stream has space left. |
| 737 | |
| 738 | \a ch is the character to write. |
| 739 | |
| 740 | \a dcursor is a pointer to the start of the output buffer. |
| 741 | |
| 742 | \a dend is a pointer to the end of the output buffer. |
| 743 | |
| 744 | Returns true if written to the output stream; else false if buffered. |
| 745 | */ |
| 746 | bool write(char ch, char *&dcursor, const char *const dend); |
| 747 | |
| 748 | /*! |
| 749 | Writes characters from the output buffer to the output stream. |
| 750 | Implementations of encode and finish should call this |
| 751 | at the very beginning and for each iteration of the while loop. |
| 752 | |
| 753 | \a dcursor is a pointer to the start of the output buffer. |
| 754 | |
| 755 | \a dend is a pointer to the end of the output buffer. |
| 756 | |
| 757 | Returns true if all chars could be written, false otherwise |
| 758 | */ |
| 759 | bool flushOutputBuffer(char *&dcursor, const char *const dend); |
| 760 | |
| 761 | /*! |
| 762 | Convenience function. Outputs LF or CRLF, based on the |
| 763 | state of mWithCRLF. |
| 764 | |
| 765 | \a dcursor is a pointer to the start of the output buffer. |
| 766 | |
| 767 | \a dend is a pointer to the end of the output buffer. |
| 768 | |
| 769 | Returns true on success |
| 770 | */ |
| 771 | bool writeCRLF(char *&dcursor, const char *const dend); |
| 772 | |
| 773 | protected: |
| 774 | //@cond PRIVATE |
| 775 | std::unique_ptr<EncoderPrivate> const d; |
| 776 | //@endcond |
| 777 | }; |
| 778 | |
| 779 | } // namespace KCodecs |
| 780 | |
| 781 | #endif // KCODECS_H |
| 782 | |