gbase64.c source code [gtk/subprojects/glib/glib/gbase64.c]

1	/ gbase64.c - Base64 encoding/decoding*
2	*
3	* Copyright (C) 2006 Alexander Larsson <alexl@redhat.com>
4	* Copyright (C) 2000-2003 Ximian Inc.
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Lesser General Public
8	* License as published by the Free Software Foundation; either
9	* version 2.1 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Lesser General Public License for more details.
15	*
16	* You should have received a copy of the GNU Lesser General Public License
17	* along with this library; if not, see <http://www.gnu.org/licenses/>.
18	*
19	* This is based on code in camel, written by:
20	* Michael Zucchi <notzed@ximian.com>
21	* Jeffrey Stedfast <fejj@ximian.com>
22	*/
23
24	#include "config.h"
25
26	#include <string.h>
27
28	#include "gbase64.h"
29	#include "gtestutils.h"
30	#include "glibintl.h"
31
32
33	/**
34	* SECTION:base64
35	* @title: Base64 Encoding
36	* @short_description: encodes and decodes data in Base64 format
37	*
38	* Base64 is an encoding that allows a sequence of arbitrary bytes to be
39	* encoded as a sequence of printable ASCII characters. For the definition
40	* of Base64, see
41	* [RFC 1421](http://www.ietf.org/rfc/rfc1421.txt)
42	* or
43	* [RFC 2045](http://www.ietf.org/rfc/rfc2045.txt).
44	* Base64 is most commonly used as a MIME transfer encoding
45	* for email.
46	*
47	* GLib supports incremental encoding using g_base64_encode_step() and
48	* g_base64_encode_close(). Incremental decoding can be done with
49	* g_base64_decode_step(). To encode or decode data in one go, use
50	* g_base64_encode() or g_base64_decode(). To avoid memory allocation when
51	* decoding, you can use g_base64_decode_inplace().
52	*
53	* Support for Base64 encoding has been added in GLib 2.12.
54	*/
55
56	static const char base64_alphabet[] =
57	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
58
59	/**
60	* g_base64_encode_step:
61	* @in: (array length=len) (element-type guint8): the binary data to encode
62	* @len: the length of @in
63	* @break_lines: whether to break long lines
64	* @out: (out) (array) (element-type guint8): pointer to destination buffer
65	* @state: (inout): Saved state between steps, initialize to 0
66	* @save: (inout): Saved state between steps, initialize to 0
67	*
68	* Incrementally encode a sequence of binary data into its Base-64 stringified
69	* representation. By calling this function multiple times you can convert
70	* data in chunks to avoid having to have the full encoded data in memory.
71	*
72	* When all of the data has been converted you must call
73	* g_base64_encode_close() to flush the saved state.
74	*
75	* The output buffer must be large enough to fit all the data that will
76	* be written to it. Due to the way base64 encodes you will need
77	* at least: (@len / 3 + 1) * 4 + 4 bytes (+ 4 may be needed in case of
78	* non-zero state). If you enable line-breaking you will need at least:
79	* ((@len / 3 + 1) * 4 + 4) / 76 + 1 bytes of extra space.
80	*
81	* @break_lines is typically used when putting base64-encoded data in emails.
82	* It breaks the lines at 76 columns instead of putting all of the text on
83	* the same line. This avoids problems with long lines in the email system.
84	* Note however that it breaks the lines with `LF` characters, not
85	* `CR LF` sequences, so the result cannot be passed directly to SMTP
86	* or certain other protocols.
87	*
88	* Returns: The number of bytes of output that was written
89	*
90	* Since: 2.12
91	*/
92	gsize
93	g_base64_encode_step (const guchar *in,
94	gsize len,
95	gboolean break_lines,
96	gchar *out,
97	gint *state,
98	gint *save)
99	{
100	char *outptr;
101	const guchar *inptr;
102
103	g_return_val_if_fail (in != NULL \|\| len == `0`, `0`);
104	g_return_val_if_fail (out != NULL, `0`);
105	g_return_val_if_fail (state != NULL, `0`);
106	g_return_val_if_fail (save != NULL, `0`);
107
108	if (len == `0`)
109	return `0`;
110
111	inptr = in;
112	outptr = out;
113
114	if (len + ((char *) save) [`0`] > `2`)
115	{
116	const guchar *inend = in+len-`2`;
117	int c1, c2, c3;
118	int already;
119
120	already = *state;
121
122	switch (((char *) save) [`0`])
123	{
124	case `1`:
125	c1 = ((unsigned char *) save) [`1`];
126	goto skip1;
127	case `2`:
128	c1 = ((unsigned char *) save) [`1`];
129	c2 = ((unsigned char *) save) [`2`];
130	goto skip2;
131	}
132
133	/*
134	* yes, we jump into the loop, no i'm not going to change it,
135	* it's beautiful!
136	*/
137	while (inptr < inend)
138	{
139	c1 = *inptr++;
140	skip1:
141	c2 = *inptr++;
142	skip2:
143	c3 = *inptr++;
144	*outptr++ = base64_alphabet [ c1 >> `2` ];
145	*outptr++ = base64_alphabet [ c2 >> `4` \|
146	((c1&`0x3`) << `4`) ];
147	*outptr++ = base64_alphabet [ ((c2 &`0x0f`) << `2`) \|
148	(c3 >> `6`) ];
149	*outptr++ = base64_alphabet [ c3 & `0x3f` ];
150	/ this is a bit ugly ... /
151	if (break_lines && (++already) >= `19`)
152	{
153	*outptr++ = `'\n'`;
154	already = `0`;
155	}
156	}
157
158	((char *)save)[`0`] = `0`;
159	len = `2` - (inptr - inend);
160	*state = already;
161	}
162
163	g_assert (len == `0` \|\| len == `1` \|\| len == `2`);
164
165	{
166	char *saveout;
167
168	/ points to the slot for the next char to save /
169	saveout = & (((char )save)[`1`]) + ((char* *)save)[`0`];
170
171	/ len can only be 0 1 or 2 /
172	switch(len)
173	{
174	case `2`:
175	saveout++ = inptr++;
176	G_GNUC_FALLTHROUGH;
177	case `1`:
178	saveout++ = inptr++;
179	}
180	((char *)save)[`0`] += len;
181	}
182
183	return outptr - out;
184	}
185
186	/**
187	* g_base64_encode_close:
188	* @break_lines: whether to break long lines
189	* @out: (out) (array) (element-type guint8): pointer to destination buffer
190	* @state: (inout): Saved state from g_base64_encode_step()
191	* @save: (inout): Saved state from g_base64_encode_step()
192	*
193	* Flush the status from a sequence of calls to g_base64_encode_step().
194	*
195	* The output buffer must be large enough to fit all the data that will
196	* be written to it. It will need up to 4 bytes, or up to 5 bytes if
197	* line-breaking is enabled.
198	*
199	* The @out array will not be automatically nul-terminated.
200	*
201	* Returns: The number of bytes of output that was written
202	*
203	* Since: 2.12
204	*/
205	gsize
206	g_base64_encode_close (gboolean break_lines,
207	gchar *out,
208	gint *state,
209	gint *save)
210	{
211	int c1, c2;
212	char *outptr = out;
213
214	g_return_val_if_fail (out != NULL, `0`);
215	g_return_val_if_fail (state != NULL, `0`);
216	g_return_val_if_fail (save != NULL, `0`);
217
218	c1 = ((unsigned char *) save) [`1`];
219	c2 = ((unsigned char *) save) [`2`];
220
221	switch (((char *) save) [`0`])
222	{
223	case `2`:
224	outptr [`2`] = base64_alphabet[ ( (c2 &`0x0f`) << `2` ) ];
225	g_assert (outptr [`2`] != `0`);
226	goto skip;
227	case `1`:
228	outptr[`2`] = `'='`;
229	c2 = `0`; / saved state here is not relevant /
230	skip:
231	outptr [`0`] = base64_alphabet [ c1 >> `2` ];
232	outptr [`1`] = base64_alphabet [ c2 >> `4` \| ( (c1&`0x3`) << `4` )];
233	outptr [`3`] = `'='`;
234	outptr += `4`;
235	break;
236	}
237	if (break_lines)
238	*outptr++ = `'\n'`;
239
240	*save = `0`;
241	*state = `0`;
242
243	return outptr - out;
244	}
245
246	/**
247	* g_base64_encode:
248	* @data: (array length=len) (element-type guint8) (nullable): the binary data to encode
249	* @len: the length of @data
250	*
251	* Encode a sequence of binary data into its Base-64 stringified
252	* representation.
253	*
254	* Returns: (transfer full): a newly allocated, zero-terminated Base-64
255	* encoded string representing @data. The returned string must
256	* be freed with g_free().
257	*
258	* Since: 2.12
259	*/
260	gchar *
261	g_base64_encode (const guchar *data,
262	gsize len)
263	{
264	gchar *out;
265	gint state = `0`, outlen;
266	gint save = `0`;
267
268	g_return_val_if_fail (data != NULL \|\| len == `0`, NULL);
269
270	/ We can use a smaller limit here, since we know the saved state is 0,*
271	+1 is needed for trailing \0, also check for unlikely integer overflow /*
272	g_return_val_if_fail (len < ((G_MAXSIZE - `1`) / `4` - `1`) * `3`, NULL);
273
274	out = g_malloc (n_bytes: (len / `3` + `1`) * `4` + `1`);
275
276	outlen = g_base64_encode_step (in: data, len, FALSE, out, state: &state, save: &save);
277	outlen += g_base64_encode_close (FALSE, out: out + outlen, state: &state, save: &save);
278	out[outlen] = `'\0'`;
279
280	return (gchar *) out;
281	}
282
283	static const unsigned char mime_base64_rank[`256`] = {
284	`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,
285	`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,
286	`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`, `62`,`255`,`255`,`255`, `63`,
287	`52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`,`255`,`255`,`255`, `0`,`255`,`255`,
288	`255`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`,
289	`15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,`255`,`255`,`255`,`255`,`255`,
290	`255`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`,
291	`41`, `42`, `43`, `44`, `45`, `46`, `47`, `48`, `49`, `50`, `51`,`255`,`255`,`255`,`255`,`255`,
292	`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,
293	`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,
294	`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,
295	`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,
296	`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,
297	`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,
298	`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,
299	`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,`255`,
300	};
301
302	/**
303	* g_base64_decode_step: (skip)
304	* @in: (array length=len) (element-type guint8): binary input data
305	* @len: max length of @in data to decode
306	* @out: (out caller-allocates) (array) (element-type guint8): output buffer
307	* @state: (inout): Saved state between steps, initialize to 0
308	* @save: (inout): Saved state between steps, initialize to 0
309	*
310	* Incrementally decode a sequence of binary data from its Base-64 stringified
311	* representation. By calling this function multiple times you can convert
312	* data in chunks to avoid having to have the full encoded data in memory.
313	*
314	* The output buffer must be large enough to fit all the data that will
315	* be written to it. Since base64 encodes 3 bytes in 4 chars you need
316	* at least: (@len / 4) * 3 + 3 bytes (+ 3 may be needed in case of non-zero
317	* state).
318	*
319	* Returns: The number of bytes of output that was written
320	*
321	* Since: 2.12
322	**/
323	gsize
324	g_base64_decode_step (const gchar *in,
325	gsize len,
326	guchar *out,
327	gint *state,
328	guint *save)
329	{
330	const guchar *inptr;
331	guchar *outptr;
332	const guchar *inend;
333	guchar c, rank;
334	guchar last[`2`];
335	unsigned int v;
336	int i;
337
338	g_return_val_if_fail (in != NULL \|\| len == `0`, `0`);
339	g_return_val_if_fail (out != NULL, `0`);
340	g_return_val_if_fail (state != NULL, `0`);
341	g_return_val_if_fail (save != NULL, `0`);
342
343	if (len == `0`)
344	return `0`;
345
346	inend = (const guchar *)in+len;
347	outptr = out;
348
349	/ convert 4 base64 bytes to 3 normal bytes /
350	v=*save;
351	i=*state;
352
353	last[`0`] = last[`1`] = `0`;
354
355	/ we use the sign in the state to determine if we got a padding character*
356	in the previous sequence /*
357	if (i < `0`)
358	{
359	i = -i;
360	last[`0`] = `'='`;
361	}
362
363	inptr = (const guchar *)in;
364	while (inptr < inend)
365	{
366	c = *inptr++;
367	rank = mime_base64_rank [c];
368	if (rank != `0xff`)
369	{
370	last[`1`] = last[`0`];
371	last[`0`] = c;
372	v = (v<<`6`) \| rank;
373	i++;
374	if (i==`4`)
375	{
376	*outptr++ = v>>`16`;
377	if (last[`1`] != `'='`)
378	*outptr++ = v>>`8`;
379	if (last[`0`] != `'='`)
380	*outptr++ = v;
381	i=`0`;
382	}
383	}
384	}
385
386	*save = v;
387	*state = last[`0`] == `'='` ? -i : i;
388
389	return outptr - out;
390	}
391
392	/**
393	* g_base64_decode:
394	* @text: (not nullable): zero-terminated string with base64 text to decode
395	* @out_len: (out): The length of the decoded data is written here
396	*
397	* Decode a sequence of Base-64 encoded text into binary data. Note
398	* that the returned binary data is not necessarily zero-terminated,
399	* so it should not be used as a character string.
400	*
401	* Returns: (transfer full) (array length=out_len) (element-type guint8):
402	* newly allocated buffer containing the binary data
403	* that @text represents. The returned buffer must
404	* be freed with g_free().
405	*
406	* Since: 2.12
407	*/
408	guchar *
409	g_base64_decode (const gchar *text,
410	gsize *out_len)
411	{
412	guchar *ret;
413	gsize input_length;
414	gint state = `0`;
415	guint save = `0`;
416
417	g_return_val_if_fail (text != NULL, NULL);
418	g_return_val_if_fail (out_len != NULL, NULL);
419
420	input_length = strlen (s: text);
421
422	/ We can use a smaller limit here, since we know the saved state is 0,*
423	+1 used to avoid calling g_malloc0(0), and hence returning NULL /*
424	ret = g_malloc0 (n_bytes: (input_length / `4`) * `3` + `1`);
425
426	*out_len = g_base64_decode_step (in: text, len: input_length, out: ret, state: &state, save: &save);
427
428	return ret;
429	}
430
431	/**
432	* g_base64_decode_inplace:
433	* @text: (inout) (array length=out_len) (element-type guint8): zero-terminated
434	* string with base64 text to decode
435	* @out_len: (inout): The length of the decoded data is written here
436	*
437	* Decode a sequence of Base-64 encoded text into binary data
438	* by overwriting the input data.
439	*
440	* Returns: (transfer none): The binary data that @text responds. This pointer
441	* is the same as the input @text.
442	*
443	* Since: 2.20
444	*/
445	guchar *
446	g_base64_decode_inplace (gchar *text,
447	gsize *out_len)
448	{
449	gint input_length, state = `0`;
450	guint save = `0`;
451
452	g_return_val_if_fail (text != NULL, NULL);
453	g_return_val_if_fail (out_len != NULL, NULL);
454
455	input_length = strlen (s: text);
456
457	g_return_val_if_fail (input_length > `1`, NULL);
458
459	out_len = g_base64_decode_step (in: text, len: input_length, out: (guchar ) text, state: &state, save: &save);
460
461	return (guchar *) text;
462	}
463

source code of gtk/subprojects/glib/glib/gbase64.c