1 | /* GIO - GLib Input, Output and Streaming Library |
2 | * |
3 | * Copyright (C) 2009 Red Hat, Inc. |
4 | * |
5 | * This library is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU Lesser General Public |
7 | * License as published by the Free Software Foundation; either |
8 | * version 2.1 of the License, or (at your option) any later version. |
9 | * |
10 | * This library is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * Lesser General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU Lesser General |
16 | * Public License along with this library; if not, see <http://www.gnu.org/licenses/>. |
17 | * |
18 | * Author: Alexander Larsson <alexl@redhat.com> |
19 | */ |
20 | |
21 | #include "config.h" |
22 | |
23 | #include "gcharsetconverter.h" |
24 | |
25 | #include <errno.h> |
26 | |
27 | #include "ginitable.h" |
28 | #include "gioerror.h" |
29 | #include "glibintl.h" |
30 | |
31 | |
32 | enum { |
33 | PROP_0, |
34 | PROP_FROM_CHARSET, |
35 | PROP_TO_CHARSET, |
36 | PROP_USE_FALLBACK |
37 | }; |
38 | |
39 | /** |
40 | * SECTION:gcharsetconverter |
41 | * @short_description: Convert between charsets |
42 | * @include: gio/gio.h |
43 | * |
44 | * #GCharsetConverter is an implementation of #GConverter based on |
45 | * GIConv. |
46 | */ |
47 | |
48 | static void g_charset_converter_iface_init (GConverterIface *iface); |
49 | static void g_charset_converter_initable_iface_init (GInitableIface *iface); |
50 | |
51 | /** |
52 | * GCharsetConverter: |
53 | * |
54 | * Conversions between character sets. |
55 | */ |
56 | struct _GCharsetConverter |
57 | { |
58 | GObject parent_instance; |
59 | |
60 | char *from; |
61 | char *to; |
62 | GIConv iconv; |
63 | gboolean use_fallback; |
64 | guint n_fallback_errors; |
65 | }; |
66 | |
67 | G_DEFINE_TYPE_WITH_CODE (GCharsetConverter, g_charset_converter, G_TYPE_OBJECT, |
68 | G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER, |
69 | g_charset_converter_iface_init); |
70 | G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE, |
71 | g_charset_converter_initable_iface_init)) |
72 | |
73 | static void |
74 | g_charset_converter_finalize (GObject *object) |
75 | { |
76 | GCharsetConverter *conv; |
77 | |
78 | conv = G_CHARSET_CONVERTER (object); |
79 | |
80 | g_free (mem: conv->from); |
81 | g_free (mem: conv->to); |
82 | if (conv->iconv) |
83 | g_iconv_close (converter: conv->iconv); |
84 | |
85 | G_OBJECT_CLASS (g_charset_converter_parent_class)->finalize (object); |
86 | } |
87 | |
88 | static void |
89 | g_charset_converter_set_property (GObject *object, |
90 | guint prop_id, |
91 | const GValue *value, |
92 | GParamSpec *pspec) |
93 | { |
94 | GCharsetConverter *conv; |
95 | |
96 | conv = G_CHARSET_CONVERTER (object); |
97 | |
98 | switch (prop_id) |
99 | { |
100 | case PROP_TO_CHARSET: |
101 | g_free (mem: conv->to); |
102 | conv->to = g_value_dup_string (value); |
103 | break; |
104 | |
105 | case PROP_FROM_CHARSET: |
106 | g_free (mem: conv->from); |
107 | conv->from = g_value_dup_string (value); |
108 | break; |
109 | |
110 | case PROP_USE_FALLBACK: |
111 | conv->use_fallback = g_value_get_boolean (value); |
112 | break; |
113 | |
114 | default: |
115 | G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); |
116 | break; |
117 | } |
118 | |
119 | } |
120 | |
121 | static void |
122 | g_charset_converter_get_property (GObject *object, |
123 | guint prop_id, |
124 | GValue *value, |
125 | GParamSpec *pspec) |
126 | { |
127 | GCharsetConverter *conv; |
128 | |
129 | conv = G_CHARSET_CONVERTER (object); |
130 | |
131 | switch (prop_id) |
132 | { |
133 | case PROP_TO_CHARSET: |
134 | g_value_set_string (value, v_string: conv->to); |
135 | break; |
136 | |
137 | case PROP_FROM_CHARSET: |
138 | g_value_set_string (value, v_string: conv->from); |
139 | break; |
140 | |
141 | case PROP_USE_FALLBACK: |
142 | g_value_set_boolean (value, v_boolean: conv->use_fallback); |
143 | break; |
144 | |
145 | default: |
146 | G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); |
147 | break; |
148 | } |
149 | } |
150 | |
151 | static void |
152 | g_charset_converter_class_init (GCharsetConverterClass *klass) |
153 | { |
154 | GObjectClass *gobject_class = G_OBJECT_CLASS (klass); |
155 | |
156 | gobject_class->finalize = g_charset_converter_finalize; |
157 | gobject_class->get_property = g_charset_converter_get_property; |
158 | gobject_class->set_property = g_charset_converter_set_property; |
159 | |
160 | g_object_class_install_property (oclass: gobject_class, |
161 | property_id: PROP_TO_CHARSET, |
162 | pspec: g_param_spec_string (name: "to-charset" , |
163 | P_("To Charset" ), |
164 | P_("The character encoding to convert to" ), |
165 | NULL, |
166 | flags: G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | |
167 | G_PARAM_STATIC_STRINGS)); |
168 | g_object_class_install_property (oclass: gobject_class, |
169 | property_id: PROP_FROM_CHARSET, |
170 | pspec: g_param_spec_string (name: "from-charset" , |
171 | P_("From Charset" ), |
172 | P_("The character encoding to convert from" ), |
173 | NULL, |
174 | flags: G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | |
175 | G_PARAM_STATIC_STRINGS)); |
176 | g_object_class_install_property (oclass: gobject_class, |
177 | property_id: PROP_USE_FALLBACK, |
178 | pspec: g_param_spec_boolean (name: "use-fallback" , |
179 | P_("Fallback enabled" ), |
180 | P_("Use fallback (of form \\<hexval>) for invalid bytes" ), |
181 | FALSE, |
182 | flags: G_PARAM_READWRITE | |
183 | G_PARAM_CONSTRUCT | |
184 | G_PARAM_STATIC_STRINGS)); |
185 | } |
186 | |
187 | static void |
188 | g_charset_converter_init (GCharsetConverter *local) |
189 | { |
190 | } |
191 | |
192 | |
193 | /** |
194 | * g_charset_converter_new: |
195 | * @to_charset: destination charset |
196 | * @from_charset: source charset |
197 | * @error: #GError for error reporting, or %NULL to ignore. |
198 | * |
199 | * Creates a new #GCharsetConverter. |
200 | * |
201 | * Returns: a new #GCharsetConverter or %NULL on error. |
202 | * |
203 | * Since: 2.24 |
204 | **/ |
205 | GCharsetConverter * |
206 | g_charset_converter_new (const gchar *to_charset, |
207 | const gchar *from_charset, |
208 | GError **error) |
209 | { |
210 | GCharsetConverter *conv; |
211 | |
212 | conv = g_initable_new (G_TYPE_CHARSET_CONVERTER, |
213 | NULL, error, |
214 | first_property_name: "to-charset" , to_charset, |
215 | "from-charset" , from_charset, |
216 | NULL); |
217 | |
218 | return conv; |
219 | } |
220 | |
221 | static void |
222 | g_charset_converter_reset (GConverter *converter) |
223 | { |
224 | GCharsetConverter *conv = G_CHARSET_CONVERTER (converter); |
225 | |
226 | if (conv->iconv == NULL) |
227 | { |
228 | g_warning ("Invalid object, not initialized" ); |
229 | return; |
230 | } |
231 | |
232 | g_iconv (converter: conv->iconv, NULL, NULL, NULL, NULL); |
233 | conv->n_fallback_errors = 0; |
234 | } |
235 | |
236 | static GConverterResult |
237 | g_charset_converter_convert (GConverter *converter, |
238 | const void *inbuf, |
239 | gsize inbuf_size, |
240 | void *outbuf, |
241 | gsize outbuf_size, |
242 | GConverterFlags flags, |
243 | gsize *bytes_read, |
244 | gsize *bytes_written, |
245 | GError **error) |
246 | { |
247 | GCharsetConverter *conv; |
248 | gsize res; |
249 | GConverterResult ret; |
250 | gchar *inbufp, *outbufp; |
251 | gsize in_left, out_left; |
252 | int errsv; |
253 | gboolean reset; |
254 | |
255 | conv = G_CHARSET_CONVERTER (converter); |
256 | |
257 | if (conv->iconv == NULL) |
258 | { |
259 | g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_NOT_INITIALIZED, |
260 | _("Invalid object, not initialized" )); |
261 | return G_CONVERTER_ERROR; |
262 | } |
263 | |
264 | inbufp = (char *)inbuf; |
265 | outbufp = (char *)outbuf; |
266 | in_left = inbuf_size; |
267 | out_left = outbuf_size; |
268 | reset = FALSE; |
269 | |
270 | /* if there is not input try to flush the data */ |
271 | if (inbuf_size == 0) |
272 | { |
273 | if (flags & G_CONVERTER_INPUT_AT_END || |
274 | flags & G_CONVERTER_FLUSH) |
275 | { |
276 | reset = TRUE; |
277 | } |
278 | else |
279 | { |
280 | g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_PARTIAL_INPUT, |
281 | _("Incomplete multibyte sequence in input" )); |
282 | return G_CONVERTER_ERROR; |
283 | } |
284 | } |
285 | |
286 | if (reset) |
287 | /* call g_iconv with NULL inbuf to cleanup shift state */ |
288 | res = g_iconv (converter: conv->iconv, |
289 | NULL, inbytes_left: &in_left, |
290 | outbuf: &outbufp, outbytes_left: &out_left); |
291 | else |
292 | res = g_iconv (converter: conv->iconv, |
293 | inbuf: &inbufp, inbytes_left: &in_left, |
294 | outbuf: &outbufp, outbytes_left: &out_left); |
295 | |
296 | *bytes_read = inbufp - (char *)inbuf; |
297 | *bytes_written = outbufp - (char *)outbuf; |
298 | |
299 | /* Don't report error if we converted anything */ |
300 | if (res == (gsize) -1 && *bytes_read == 0) |
301 | { |
302 | errsv = errno; |
303 | |
304 | switch (errsv) |
305 | { |
306 | case EINVAL: |
307 | /* Incomplete input text */ |
308 | g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_PARTIAL_INPUT, |
309 | _("Incomplete multibyte sequence in input" )); |
310 | break; |
311 | |
312 | case E2BIG: |
313 | /* Not enough destination space */ |
314 | g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_NO_SPACE, |
315 | _("Not enough space in destination" )); |
316 | break; |
317 | |
318 | case EILSEQ: |
319 | /* Invalid code sequence */ |
320 | if (conv->use_fallback) |
321 | { |
322 | if (outbuf_size < 3) |
323 | g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_NO_SPACE, |
324 | _("Not enough space in destination" )); |
325 | else |
326 | { |
327 | const char hex[] = "0123456789ABCDEF" ; |
328 | guint8 v = *(guint8 *)inbuf; |
329 | guint8 *out = (guint8 *)outbuf; |
330 | out[0] = '\\'; |
331 | out[1] = hex[(v & 0xf0) >> 4]; |
332 | out[2] = hex[(v & 0x0f) >> 0]; |
333 | *bytes_read = 1; |
334 | *bytes_written = 3; |
335 | in_left--; |
336 | conv->n_fallback_errors++; |
337 | goto ok; |
338 | } |
339 | } |
340 | else |
341 | g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_INVALID_DATA, |
342 | _("Invalid byte sequence in conversion input" )); |
343 | break; |
344 | |
345 | default: |
346 | g_set_error (err: error, G_IO_ERROR, code: G_IO_ERROR_FAILED, |
347 | _("Error during conversion: %s" ), |
348 | g_strerror (errnum: errsv)); |
349 | break; |
350 | } |
351 | ret = G_CONVERTER_ERROR; |
352 | } |
353 | else |
354 | { |
355 | ok: |
356 | ret = G_CONVERTER_CONVERTED; |
357 | |
358 | if (reset && |
359 | (flags & G_CONVERTER_INPUT_AT_END)) |
360 | ret = G_CONVERTER_FINISHED; |
361 | else if (reset && |
362 | (flags & G_CONVERTER_FLUSH)) |
363 | ret = G_CONVERTER_FLUSHED; |
364 | } |
365 | |
366 | return ret; |
367 | } |
368 | |
369 | /** |
370 | * g_charset_converter_set_use_fallback: |
371 | * @converter: a #GCharsetConverter |
372 | * @use_fallback: %TRUE to use fallbacks |
373 | * |
374 | * Sets the #GCharsetConverter:use-fallback property. |
375 | * |
376 | * Since: 2.24 |
377 | */ |
378 | void |
379 | g_charset_converter_set_use_fallback (GCharsetConverter *converter, |
380 | gboolean use_fallback) |
381 | { |
382 | use_fallback = !!use_fallback; |
383 | |
384 | if (converter->use_fallback != use_fallback) |
385 | { |
386 | converter->use_fallback = use_fallback; |
387 | g_object_notify (G_OBJECT (converter), property_name: "use-fallback" ); |
388 | } |
389 | } |
390 | |
391 | /** |
392 | * g_charset_converter_get_use_fallback: |
393 | * @converter: a #GCharsetConverter |
394 | * |
395 | * Gets the #GCharsetConverter:use-fallback property. |
396 | * |
397 | * Returns: %TRUE if fallbacks are used by @converter |
398 | * |
399 | * Since: 2.24 |
400 | */ |
401 | gboolean |
402 | g_charset_converter_get_use_fallback (GCharsetConverter *converter) |
403 | { |
404 | return converter->use_fallback; |
405 | } |
406 | |
407 | /** |
408 | * g_charset_converter_get_num_fallbacks: |
409 | * @converter: a #GCharsetConverter |
410 | * |
411 | * Gets the number of fallbacks that @converter has applied so far. |
412 | * |
413 | * Returns: the number of fallbacks that @converter has applied |
414 | * |
415 | * Since: 2.24 |
416 | */ |
417 | guint |
418 | g_charset_converter_get_num_fallbacks (GCharsetConverter *converter) |
419 | { |
420 | return converter->n_fallback_errors; |
421 | } |
422 | |
423 | static void |
424 | g_charset_converter_iface_init (GConverterIface *iface) |
425 | { |
426 | iface->convert = g_charset_converter_convert; |
427 | iface->reset = g_charset_converter_reset; |
428 | } |
429 | |
430 | static gboolean |
431 | g_charset_converter_initable_init (GInitable *initable, |
432 | GCancellable *cancellable, |
433 | GError **error) |
434 | { |
435 | GCharsetConverter *conv; |
436 | int errsv; |
437 | |
438 | g_return_val_if_fail (G_IS_CHARSET_CONVERTER (initable), FALSE); |
439 | |
440 | conv = G_CHARSET_CONVERTER (initable); |
441 | |
442 | if (cancellable != NULL) |
443 | { |
444 | g_set_error_literal (err: error, G_IO_ERROR, code: G_IO_ERROR_NOT_SUPPORTED, |
445 | _("Cancellable initialization not supported" )); |
446 | return FALSE; |
447 | } |
448 | |
449 | conv->iconv = g_iconv_open (to_codeset: conv->to, from_codeset: conv->from); |
450 | errsv = errno; |
451 | |
452 | if (conv->iconv == (GIConv)-1) |
453 | { |
454 | if (errsv == EINVAL) |
455 | g_set_error (err: error, G_IO_ERROR, code: G_IO_ERROR_NOT_SUPPORTED, |
456 | _("Conversion from character set “%s” to “%s” is not supported" ), |
457 | conv->from, conv->to); |
458 | else |
459 | g_set_error (err: error, G_IO_ERROR, code: G_IO_ERROR_FAILED, |
460 | _("Could not open converter from “%s” to “%s”" ), |
461 | conv->from, conv->to); |
462 | return FALSE; |
463 | } |
464 | |
465 | return TRUE; |
466 | } |
467 | |
468 | static void |
469 | g_charset_converter_initable_iface_init (GInitableIface *iface) |
470 | { |
471 | iface->init = g_charset_converter_initable_init; |
472 | } |
473 | |