1/* GIO - GLib Input, Output and Streaming Library
2 *
3 * Copyright (C) 2009 Red Hat, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General
16 * Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 *
18 * Author: Alexander Larsson <alexl@redhat.com>
19 */
20
21#include "config.h"
22
23#include "gcharsetconverter.h"
24
25#include <errno.h>
26
27#include "ginitable.h"
28#include "gioerror.h"
29#include "glibintl.h"
30
31
32enum {
33 PROP_0,
34 PROP_FROM_CHARSET,
35 PROP_TO_CHARSET,
36 PROP_USE_FALLBACK
37};
38
39/**
40 * SECTION:gcharsetconverter
41 * @short_description: Convert between charsets
42 * @include: gio/gio.h
43 *
44 * #GCharsetConverter is an implementation of #GConverter based on
45 * GIConv.
46 */
47
48static void g_charset_converter_iface_init (GConverterIface *iface);
49static void g_charset_converter_initable_iface_init (GInitableIface *iface);
50
51/**
52 * GCharsetConverter:
53 *
54 * Conversions between character sets.
55 */
56struct _GCharsetConverter
57{
58 GObject parent_instance;
59
60 char *from;
61 char *to;
62 GIConv iconv;
63 gboolean use_fallback;
64 guint n_fallback_errors;
65};
66
67G_DEFINE_TYPE_WITH_CODE (GCharsetConverter, g_charset_converter, G_TYPE_OBJECT,
68 G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
69 g_charset_converter_iface_init);
70 G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,
71 g_charset_converter_initable_iface_init))
72
73static void
74g_charset_converter_finalize (GObject *object)
75{
76 GCharsetConverter *conv;
77
78 conv = G_CHARSET_CONVERTER (object);
79
80 g_free (mem: conv->from);
81 g_free (mem: conv->to);
82 if (conv->iconv)
83 g_iconv_close (converter: conv->iconv);
84
85 G_OBJECT_CLASS (g_charset_converter_parent_class)->finalize (object);
86}
87
88static void
89g_charset_converter_set_property (GObject *object,
90 guint prop_id,
91 const GValue *value,
92 GParamSpec *pspec)
93{
94 GCharsetConverter *conv;
95
96 conv = G_CHARSET_CONVERTER (object);
97
98 switch (prop_id)
99 {
100 case PROP_TO_CHARSET:
101 g_free (mem: conv->to);
102 conv->to = g_value_dup_string (value);
103 break;
104
105 case PROP_FROM_CHARSET:
106 g_free (mem: conv->from);
107 conv->from = g_value_dup_string (value);
108 break;
109
110 case PROP_USE_FALLBACK:
111 conv->use_fallback = g_value_get_boolean (value);
112 break;
113
114 default:
115 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
116 break;
117 }
118
119}
120
121static void
122g_charset_converter_get_property (GObject *object,
123 guint prop_id,
124 GValue *value,
125 GParamSpec *pspec)
126{
127 GCharsetConverter *conv;
128
129 conv = G_CHARSET_CONVERTER (object);
130
131 switch (prop_id)
132 {
133 case PROP_TO_CHARSET:
134 g_value_set_string (value, v_string: conv->to);
135 break;
136
137 case PROP_FROM_CHARSET:
138 g_value_set_string (value, v_string: conv->from);
139 break;
140
141 case PROP_USE_FALLBACK:
142 g_value_set_boolean (value, v_boolean: conv->use_fallback);
143 break;
144
145 default:
146 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
147 break;
148 }
149}
150
151static void
152g_charset_converter_class_init (GCharsetConverterClass *klass)
153{
154 GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
155
156 gobject_class->finalize = g_charset_converter_finalize;
157 gobject_class->get_property = g_charset_converter_get_property;
158 gobject_class->set_property = g_charset_converter_set_property;
159
160 g_object_class_install_property (oclass: gobject_class,
161 property_id: PROP_TO_CHARSET,
162 pspec: g_param_spec_string (name: "to-charset",
163 P_("To Charset"),
164 P_("The character encoding to convert to"),
165 NULL,
166 flags: G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
167 G_PARAM_STATIC_STRINGS));
168 g_object_class_install_property (oclass: gobject_class,
169 property_id: PROP_FROM_CHARSET,
170 pspec: g_param_spec_string (name: "from-charset",
171 P_("From Charset"),
172 P_("The character encoding to convert from"),
173 NULL,
174 flags: G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
175 G_PARAM_STATIC_STRINGS));
176 g_object_class_install_property (oclass: gobject_class,
177 property_id: PROP_USE_FALLBACK,
178 pspec: g_param_spec_boolean (name: "use-fallback",
179 P_("Fallback enabled"),
180 P_("Use fallback (of form \\<hexval>) for invalid bytes"),
181 FALSE,
182 flags: G_PARAM_READWRITE |
183 G_PARAM_CONSTRUCT |
184 G_PARAM_STATIC_STRINGS));
185}
186
187static void
188g_charset_converter_init (GCharsetConverter *local)
189{
190}
191
192
193/**
194 * g_charset_converter_new:
195 * @to_charset: destination charset
196 * @from_charset: source charset
197 * @error: #GError for error reporting, or %NULL to ignore.
198 *
199 * Creates a new #GCharsetConverter.
200 *
201 * Returns: a new #GCharsetConverter or %NULL on error.
202 *
203 * Since: 2.24
204 **/
205GCharsetConverter *
206g_charset_converter_new (const gchar *to_charset,
207 const gchar *from_charset,
208 GError **error)
209{
210 GCharsetConverter *conv;
211
212 conv = g_initable_new (G_TYPE_CHARSET_CONVERTER,
213 NULL, error,
214 first_property_name: "to-charset", to_charset,
215 "from-charset", from_charset,
216 NULL);
217
218 return conv;
219}
220
221static void
222g_charset_converter_reset (GConverter *converter)
223{
224 GCharsetConverter *conv = G_CHARSET_CONVERTER (converter);
225
226 if (conv->iconv == NULL)
227 {
228 g_warning ("Invalid object, not initialized");
229 return;
230 }
231
232 g_iconv (converter: conv->iconv, NULL, NULL, NULL, NULL);
233 conv->n_fallback_errors = 0;
234}
235
236static GConverterResult
237g_charset_converter_convert (GConverter *converter,
238 const void *inbuf,
239 gsize inbuf_size,
240 void *outbuf,
241 gsize outbuf_size,
242 GConverterFlags flags,
243 gsize *bytes_read,
244 gsize *bytes_written,
245 GError **error)
246{
247 GCharsetConverter *conv;
248 gsize res;
249 GConverterResult ret;
250 gchar *inbufp, *outbufp;
251 gsize in_left, out_left;
252 int errsv;
253 gboolean reset;
254
255 conv = G_CHARSET_CONVERTER (converter);
256
257 if (conv->iconv == NULL)
258 {
259 g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_NOT_INITIALIZED,
260 _("Invalid object, not initialized"));
261 return G_CONVERTER_ERROR;
262 }
263
264 inbufp = (char *)inbuf;
265 outbufp = (char *)outbuf;
266 in_left = inbuf_size;
267 out_left = outbuf_size;
268 reset = FALSE;
269
270 /* if there is not input try to flush the data */
271 if (inbuf_size == 0)
272 {
273 if (flags & G_CONVERTER_INPUT_AT_END ||
274 flags & G_CONVERTER_FLUSH)
275 {
276 reset = TRUE;
277 }
278 else
279 {
280 g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_PARTIAL_INPUT,
281 _("Incomplete multibyte sequence in input"));
282 return G_CONVERTER_ERROR;
283 }
284 }
285
286 if (reset)
287 /* call g_iconv with NULL inbuf to cleanup shift state */
288 res = g_iconv (converter: conv->iconv,
289 NULL, inbytes_left: &in_left,
290 outbuf: &outbufp, outbytes_left: &out_left);
291 else
292 res = g_iconv (converter: conv->iconv,
293 inbuf: &inbufp, inbytes_left: &in_left,
294 outbuf: &outbufp, outbytes_left: &out_left);
295
296 *bytes_read = inbufp - (char *)inbuf;
297 *bytes_written = outbufp - (char *)outbuf;
298
299 /* Don't report error if we converted anything */
300 if (res == (gsize) -1 && *bytes_read == 0)
301 {
302 errsv = errno;
303
304 switch (errsv)
305 {
306 case EINVAL:
307 /* Incomplete input text */
308 g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_PARTIAL_INPUT,
309 _("Incomplete multibyte sequence in input"));
310 break;
311
312 case E2BIG:
313 /* Not enough destination space */
314 g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_NO_SPACE,
315 _("Not enough space in destination"));
316 break;
317
318 case EILSEQ:
319 /* Invalid code sequence */
320 if (conv->use_fallback)
321 {
322 if (outbuf_size < 3)
323 g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_NO_SPACE,
324 _("Not enough space in destination"));
325 else
326 {
327 const char hex[] = "0123456789ABCDEF";
328 guint8 v = *(guint8 *)inbuf;
329 guint8 *out = (guint8 *)outbuf;
330 out[0] = '\\';
331 out[1] = hex[(v & 0xf0) >> 4];
332 out[2] = hex[(v & 0x0f) >> 0];
333 *bytes_read = 1;
334 *bytes_written = 3;
335 in_left--;
336 conv->n_fallback_errors++;
337 goto ok;
338 }
339 }
340 else
341 g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_INVALID_DATA,
342 _("Invalid byte sequence in conversion input"));
343 break;
344
345 default:
346 g_set_error (err: error, G_IO_ERROR, code: G_IO_ERROR_FAILED,
347 _("Error during conversion: %s"),
348 g_strerror (errnum: errsv));
349 break;
350 }
351 ret = G_CONVERTER_ERROR;
352 }
353 else
354 {
355 ok:
356 ret = G_CONVERTER_CONVERTED;
357
358 if (reset &&
359 (flags & G_CONVERTER_INPUT_AT_END))
360 ret = G_CONVERTER_FINISHED;
361 else if (reset &&
362 (flags & G_CONVERTER_FLUSH))
363 ret = G_CONVERTER_FLUSHED;
364 }
365
366 return ret;
367}
368
369/**
370 * g_charset_converter_set_use_fallback:
371 * @converter: a #GCharsetConverter
372 * @use_fallback: %TRUE to use fallbacks
373 *
374 * Sets the #GCharsetConverter:use-fallback property.
375 *
376 * Since: 2.24
377 */
378void
379g_charset_converter_set_use_fallback (GCharsetConverter *converter,
380 gboolean use_fallback)
381{
382 use_fallback = !!use_fallback;
383
384 if (converter->use_fallback != use_fallback)
385 {
386 converter->use_fallback = use_fallback;
387 g_object_notify (G_OBJECT (converter), property_name: "use-fallback");
388 }
389}
390
391/**
392 * g_charset_converter_get_use_fallback:
393 * @converter: a #GCharsetConverter
394 *
395 * Gets the #GCharsetConverter:use-fallback property.
396 *
397 * Returns: %TRUE if fallbacks are used by @converter
398 *
399 * Since: 2.24
400 */
401gboolean
402g_charset_converter_get_use_fallback (GCharsetConverter *converter)
403{
404 return converter->use_fallback;
405}
406
407/**
408 * g_charset_converter_get_num_fallbacks:
409 * @converter: a #GCharsetConverter
410 *
411 * Gets the number of fallbacks that @converter has applied so far.
412 *
413 * Returns: the number of fallbacks that @converter has applied
414 *
415 * Since: 2.24
416 */
417guint
418g_charset_converter_get_num_fallbacks (GCharsetConverter *converter)
419{
420 return converter->n_fallback_errors;
421}
422
423static void
424g_charset_converter_iface_init (GConverterIface *iface)
425{
426 iface->convert = g_charset_converter_convert;
427 iface->reset = g_charset_converter_reset;
428}
429
430static gboolean
431g_charset_converter_initable_init (GInitable *initable,
432 GCancellable *cancellable,
433 GError **error)
434{
435 GCharsetConverter *conv;
436 int errsv;
437
438 g_return_val_if_fail (G_IS_CHARSET_CONVERTER (initable), FALSE);
439
440 conv = G_CHARSET_CONVERTER (initable);
441
442 if (cancellable != NULL)
443 {
444 g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_NOT_SUPPORTED,
445 _("Cancellable initialization not supported"));
446 return FALSE;
447 }
448
449 conv->iconv = g_iconv_open (to_codeset: conv->to, from_codeset: conv->from);
450 errsv = errno;
451
452 if (conv->iconv == (GIConv)-1)
453 {
454 if (errsv == EINVAL)
455 g_set_error (err: error, G_IO_ERROR, code: G_IO_ERROR_NOT_SUPPORTED,
456 _("Conversion from character set “%s” to “%s” is not supported"),
457 conv->from, conv->to);
458 else
459 g_set_error (err: error, G_IO_ERROR, code: G_IO_ERROR_FAILED,
460 _("Could not open converter from “%s” to “%s”"),
461 conv->from, conv->to);
462 return FALSE;
463 }
464
465 return TRUE;
466}
467
468static void
469g_charset_converter_initable_iface_init (GInitableIface *iface)
470{
471 iface->init = g_charset_converter_initable_init;
472}
473

source code of gtk/subprojects/glib/gio/gcharsetconverter.c