1 | /* gbase64.c - Base64 encoding/decoding |
2 | * |
3 | * Copyright (C) 2006 Alexander Larsson <alexl@redhat.com> |
4 | * Copyright (C) 2000-2003 Ximian Inc. |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public License |
17 | * along with this library; if not, see <http://www.gnu.org/licenses/>. |
18 | * |
19 | * This is based on code in camel, written by: |
20 | * Michael Zucchi <notzed@ximian.com> |
21 | * Jeffrey Stedfast <fejj@ximian.com> |
22 | */ |
23 | |
24 | #include "config.h" |
25 | |
26 | #include <string.h> |
27 | |
28 | #include "gbase64.h" |
29 | #include "gtestutils.h" |
30 | #include "glibintl.h" |
31 | |
32 | |
33 | /** |
34 | * SECTION:base64 |
35 | * @title: Base64 Encoding |
36 | * @short_description: encodes and decodes data in Base64 format |
37 | * |
38 | * Base64 is an encoding that allows a sequence of arbitrary bytes to be |
39 | * encoded as a sequence of printable ASCII characters. For the definition |
40 | * of Base64, see |
41 | * [RFC 1421](http://www.ietf.org/rfc/rfc1421.txt) |
42 | * or |
43 | * [RFC 2045](http://www.ietf.org/rfc/rfc2045.txt). |
44 | * Base64 is most commonly used as a MIME transfer encoding |
45 | * for email. |
46 | * |
47 | * GLib supports incremental encoding using g_base64_encode_step() and |
48 | * g_base64_encode_close(). Incremental decoding can be done with |
49 | * g_base64_decode_step(). To encode or decode data in one go, use |
50 | * g_base64_encode() or g_base64_decode(). To avoid memory allocation when |
51 | * decoding, you can use g_base64_decode_inplace(). |
52 | * |
53 | * Support for Base64 encoding has been added in GLib 2.12. |
54 | */ |
55 | |
56 | static const char base64_alphabet[] = |
57 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ; |
58 | |
59 | /** |
60 | * g_base64_encode_step: |
61 | * @in: (array length=len) (element-type guint8): the binary data to encode |
62 | * @len: the length of @in |
63 | * @break_lines: whether to break long lines |
64 | * @out: (out) (array) (element-type guint8): pointer to destination buffer |
65 | * @state: (inout): Saved state between steps, initialize to 0 |
66 | * @save: (inout): Saved state between steps, initialize to 0 |
67 | * |
68 | * Incrementally encode a sequence of binary data into its Base-64 stringified |
69 | * representation. By calling this function multiple times you can convert |
70 | * data in chunks to avoid having to have the full encoded data in memory. |
71 | * |
72 | * When all of the data has been converted you must call |
73 | * g_base64_encode_close() to flush the saved state. |
74 | * |
75 | * The output buffer must be large enough to fit all the data that will |
76 | * be written to it. Due to the way base64 encodes you will need |
77 | * at least: (@len / 3 + 1) * 4 + 4 bytes (+ 4 may be needed in case of |
78 | * non-zero state). If you enable line-breaking you will need at least: |
79 | * ((@len / 3 + 1) * 4 + 4) / 76 + 1 bytes of extra space. |
80 | * |
81 | * @break_lines is typically used when putting base64-encoded data in emails. |
82 | * It breaks the lines at 76 columns instead of putting all of the text on |
83 | * the same line. This avoids problems with long lines in the email system. |
84 | * Note however that it breaks the lines with `LF` characters, not |
85 | * `CR LF` sequences, so the result cannot be passed directly to SMTP |
86 | * or certain other protocols. |
87 | * |
88 | * Returns: The number of bytes of output that was written |
89 | * |
90 | * Since: 2.12 |
91 | */ |
92 | gsize |
93 | g_base64_encode_step (const guchar *in, |
94 | gsize len, |
95 | gboolean break_lines, |
96 | gchar *out, |
97 | gint *state, |
98 | gint *save) |
99 | { |
100 | char *outptr; |
101 | const guchar *inptr; |
102 | |
103 | g_return_val_if_fail (in != NULL || len == 0, 0); |
104 | g_return_val_if_fail (out != NULL, 0); |
105 | g_return_val_if_fail (state != NULL, 0); |
106 | g_return_val_if_fail (save != NULL, 0); |
107 | |
108 | if (len == 0) |
109 | return 0; |
110 | |
111 | inptr = in; |
112 | outptr = out; |
113 | |
114 | if (len + ((char *) save) [0] > 2) |
115 | { |
116 | const guchar *inend = in+len-2; |
117 | int c1, c2, c3; |
118 | int already; |
119 | |
120 | already = *state; |
121 | |
122 | switch (((char *) save) [0]) |
123 | { |
124 | case 1: |
125 | c1 = ((unsigned char *) save) [1]; |
126 | goto skip1; |
127 | case 2: |
128 | c1 = ((unsigned char *) save) [1]; |
129 | c2 = ((unsigned char *) save) [2]; |
130 | goto skip2; |
131 | } |
132 | |
133 | /* |
134 | * yes, we jump into the loop, no i'm not going to change it, |
135 | * it's beautiful! |
136 | */ |
137 | while (inptr < inend) |
138 | { |
139 | c1 = *inptr++; |
140 | skip1: |
141 | c2 = *inptr++; |
142 | skip2: |
143 | c3 = *inptr++; |
144 | *outptr++ = base64_alphabet [ c1 >> 2 ]; |
145 | *outptr++ = base64_alphabet [ c2 >> 4 | |
146 | ((c1&0x3) << 4) ]; |
147 | *outptr++ = base64_alphabet [ ((c2 &0x0f) << 2) | |
148 | (c3 >> 6) ]; |
149 | *outptr++ = base64_alphabet [ c3 & 0x3f ]; |
150 | /* this is a bit ugly ... */ |
151 | if (break_lines && (++already) >= 19) |
152 | { |
153 | *outptr++ = '\n'; |
154 | already = 0; |
155 | } |
156 | } |
157 | |
158 | ((char *)save)[0] = 0; |
159 | len = 2 - (inptr - inend); |
160 | *state = already; |
161 | } |
162 | |
163 | g_assert (len == 0 || len == 1 || len == 2); |
164 | |
165 | { |
166 | char *saveout; |
167 | |
168 | /* points to the slot for the next char to save */ |
169 | saveout = & (((char *)save)[1]) + ((char *)save)[0]; |
170 | |
171 | /* len can only be 0 1 or 2 */ |
172 | switch(len) |
173 | { |
174 | case 2: |
175 | *saveout++ = *inptr++; |
176 | G_GNUC_FALLTHROUGH; |
177 | case 1: |
178 | *saveout++ = *inptr++; |
179 | } |
180 | ((char *)save)[0] += len; |
181 | } |
182 | |
183 | return outptr - out; |
184 | } |
185 | |
186 | /** |
187 | * g_base64_encode_close: |
188 | * @break_lines: whether to break long lines |
189 | * @out: (out) (array) (element-type guint8): pointer to destination buffer |
190 | * @state: (inout): Saved state from g_base64_encode_step() |
191 | * @save: (inout): Saved state from g_base64_encode_step() |
192 | * |
193 | * Flush the status from a sequence of calls to g_base64_encode_step(). |
194 | * |
195 | * The output buffer must be large enough to fit all the data that will |
196 | * be written to it. It will need up to 4 bytes, or up to 5 bytes if |
197 | * line-breaking is enabled. |
198 | * |
199 | * The @out array will not be automatically nul-terminated. |
200 | * |
201 | * Returns: The number of bytes of output that was written |
202 | * |
203 | * Since: 2.12 |
204 | */ |
205 | gsize |
206 | g_base64_encode_close (gboolean break_lines, |
207 | gchar *out, |
208 | gint *state, |
209 | gint *save) |
210 | { |
211 | int c1, c2; |
212 | char *outptr = out; |
213 | |
214 | g_return_val_if_fail (out != NULL, 0); |
215 | g_return_val_if_fail (state != NULL, 0); |
216 | g_return_val_if_fail (save != NULL, 0); |
217 | |
218 | c1 = ((unsigned char *) save) [1]; |
219 | c2 = ((unsigned char *) save) [2]; |
220 | |
221 | switch (((char *) save) [0]) |
222 | { |
223 | case 2: |
224 | outptr [2] = base64_alphabet[ ( (c2 &0x0f) << 2 ) ]; |
225 | g_assert (outptr [2] != 0); |
226 | goto skip; |
227 | case 1: |
228 | outptr[2] = '='; |
229 | c2 = 0; /* saved state here is not relevant */ |
230 | skip: |
231 | outptr [0] = base64_alphabet [ c1 >> 2 ]; |
232 | outptr [1] = base64_alphabet [ c2 >> 4 | ( (c1&0x3) << 4 )]; |
233 | outptr [3] = '='; |
234 | outptr += 4; |
235 | break; |
236 | } |
237 | if (break_lines) |
238 | *outptr++ = '\n'; |
239 | |
240 | *save = 0; |
241 | *state = 0; |
242 | |
243 | return outptr - out; |
244 | } |
245 | |
246 | /** |
247 | * g_base64_encode: |
248 | * @data: (array length=len) (element-type guint8) (nullable): the binary data to encode |
249 | * @len: the length of @data |
250 | * |
251 | * Encode a sequence of binary data into its Base-64 stringified |
252 | * representation. |
253 | * |
254 | * Returns: (transfer full): a newly allocated, zero-terminated Base-64 |
255 | * encoded string representing @data. The returned string must |
256 | * be freed with g_free(). |
257 | * |
258 | * Since: 2.12 |
259 | */ |
260 | gchar * |
261 | g_base64_encode (const guchar *data, |
262 | gsize len) |
263 | { |
264 | gchar *out; |
265 | gint state = 0, outlen; |
266 | gint save = 0; |
267 | |
268 | g_return_val_if_fail (data != NULL || len == 0, NULL); |
269 | |
270 | /* We can use a smaller limit here, since we know the saved state is 0, |
271 | +1 is needed for trailing \0, also check for unlikely integer overflow */ |
272 | g_return_val_if_fail (len < ((G_MAXSIZE - 1) / 4 - 1) * 3, NULL); |
273 | |
274 | out = g_malloc (n_bytes: (len / 3 + 1) * 4 + 1); |
275 | |
276 | outlen = g_base64_encode_step (in: data, len, FALSE, out, state: &state, save: &save); |
277 | outlen += g_base64_encode_close (FALSE, out: out + outlen, state: &state, save: &save); |
278 | out[outlen] = '\0'; |
279 | |
280 | return (gchar *) out; |
281 | } |
282 | |
283 | static const unsigned char mime_base64_rank[256] = { |
284 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
285 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
286 | 255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63, |
287 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255, 0,255,255, |
288 | 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
289 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255, |
290 | 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, |
291 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255, |
292 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
293 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
294 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
295 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
296 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
297 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
298 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
299 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
300 | }; |
301 | |
302 | /** |
303 | * g_base64_decode_step: (skip) |
304 | * @in: (array length=len) (element-type guint8): binary input data |
305 | * @len: max length of @in data to decode |
306 | * @out: (out caller-allocates) (array) (element-type guint8): output buffer |
307 | * @state: (inout): Saved state between steps, initialize to 0 |
308 | * @save: (inout): Saved state between steps, initialize to 0 |
309 | * |
310 | * Incrementally decode a sequence of binary data from its Base-64 stringified |
311 | * representation. By calling this function multiple times you can convert |
312 | * data in chunks to avoid having to have the full encoded data in memory. |
313 | * |
314 | * The output buffer must be large enough to fit all the data that will |
315 | * be written to it. Since base64 encodes 3 bytes in 4 chars you need |
316 | * at least: (@len / 4) * 3 + 3 bytes (+ 3 may be needed in case of non-zero |
317 | * state). |
318 | * |
319 | * Returns: The number of bytes of output that was written |
320 | * |
321 | * Since: 2.12 |
322 | **/ |
323 | gsize |
324 | g_base64_decode_step (const gchar *in, |
325 | gsize len, |
326 | guchar *out, |
327 | gint *state, |
328 | guint *save) |
329 | { |
330 | const guchar *inptr; |
331 | guchar *outptr; |
332 | const guchar *inend; |
333 | guchar c, rank; |
334 | guchar last[2]; |
335 | unsigned int v; |
336 | int i; |
337 | |
338 | g_return_val_if_fail (in != NULL || len == 0, 0); |
339 | g_return_val_if_fail (out != NULL, 0); |
340 | g_return_val_if_fail (state != NULL, 0); |
341 | g_return_val_if_fail (save != NULL, 0); |
342 | |
343 | if (len == 0) |
344 | return 0; |
345 | |
346 | inend = (const guchar *)in+len; |
347 | outptr = out; |
348 | |
349 | /* convert 4 base64 bytes to 3 normal bytes */ |
350 | v=*save; |
351 | i=*state; |
352 | |
353 | last[0] = last[1] = 0; |
354 | |
355 | /* we use the sign in the state to determine if we got a padding character |
356 | in the previous sequence */ |
357 | if (i < 0) |
358 | { |
359 | i = -i; |
360 | last[0] = '='; |
361 | } |
362 | |
363 | inptr = (const guchar *)in; |
364 | while (inptr < inend) |
365 | { |
366 | c = *inptr++; |
367 | rank = mime_base64_rank [c]; |
368 | if (rank != 0xff) |
369 | { |
370 | last[1] = last[0]; |
371 | last[0] = c; |
372 | v = (v<<6) | rank; |
373 | i++; |
374 | if (i==4) |
375 | { |
376 | *outptr++ = v>>16; |
377 | if (last[1] != '=') |
378 | *outptr++ = v>>8; |
379 | if (last[0] != '=') |
380 | *outptr++ = v; |
381 | i=0; |
382 | } |
383 | } |
384 | } |
385 | |
386 | *save = v; |
387 | *state = last[0] == '=' ? -i : i; |
388 | |
389 | return outptr - out; |
390 | } |
391 | |
392 | /** |
393 | * g_base64_decode: |
394 | * @text: (not nullable): zero-terminated string with base64 text to decode |
395 | * @out_len: (out): The length of the decoded data is written here |
396 | * |
397 | * Decode a sequence of Base-64 encoded text into binary data. Note |
398 | * that the returned binary data is not necessarily zero-terminated, |
399 | * so it should not be used as a character string. |
400 | * |
401 | * Returns: (transfer full) (array length=out_len) (element-type guint8): |
402 | * newly allocated buffer containing the binary data |
403 | * that @text represents. The returned buffer must |
404 | * be freed with g_free(). |
405 | * |
406 | * Since: 2.12 |
407 | */ |
408 | guchar * |
409 | g_base64_decode (const gchar *text, |
410 | gsize *out_len) |
411 | { |
412 | guchar *ret; |
413 | gsize input_length; |
414 | gint state = 0; |
415 | guint save = 0; |
416 | |
417 | g_return_val_if_fail (text != NULL, NULL); |
418 | g_return_val_if_fail (out_len != NULL, NULL); |
419 | |
420 | input_length = strlen (s: text); |
421 | |
422 | /* We can use a smaller limit here, since we know the saved state is 0, |
423 | +1 used to avoid calling g_malloc0(0), and hence returning NULL */ |
424 | ret = g_malloc0 (n_bytes: (input_length / 4) * 3 + 1); |
425 | |
426 | *out_len = g_base64_decode_step (in: text, len: input_length, out: ret, state: &state, save: &save); |
427 | |
428 | return ret; |
429 | } |
430 | |
431 | /** |
432 | * g_base64_decode_inplace: |
433 | * @text: (inout) (array length=out_len) (element-type guint8): zero-terminated |
434 | * string with base64 text to decode |
435 | * @out_len: (inout): The length of the decoded data is written here |
436 | * |
437 | * Decode a sequence of Base-64 encoded text into binary data |
438 | * by overwriting the input data. |
439 | * |
440 | * Returns: (transfer none): The binary data that @text responds. This pointer |
441 | * is the same as the input @text. |
442 | * |
443 | * Since: 2.20 |
444 | */ |
445 | guchar * |
446 | g_base64_decode_inplace (gchar *text, |
447 | gsize *out_len) |
448 | { |
449 | gint input_length, state = 0; |
450 | guint save = 0; |
451 | |
452 | g_return_val_if_fail (text != NULL, NULL); |
453 | g_return_val_if_fail (out_len != NULL, NULL); |
454 | |
455 | input_length = strlen (s: text); |
456 | |
457 | g_return_val_if_fail (input_length > 1, NULL); |
458 | |
459 | *out_len = g_base64_decode_step (in: text, len: input_length, out: (guchar *) text, state: &state, save: &save); |
460 | |
461 | return (guchar *) text; |
462 | } |
463 | |