1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::{io, os::raw::c_char, path::PathBuf, ptr};
4
5use crate::{
6 translate::*, ConvertError, Error, GString, IntoGStr, IntoOptionalGStr, NormalizeMode, Slice,
7};
8
9// rustdoc-stripper-ignore-next
10/// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input
11/// string.
12#[derive(thiserror::Error, Debug)]
13pub enum CvtError {
14 #[error(transparent)]
15 Convert(#[from] Error),
16 #[error("{source} at offset {offset}")]
17 IllegalSequence {
18 #[source]
19 source: Error,
20 offset: usize,
21 },
22}
23
24impl CvtError {
25 #[inline]
26 fn new(err: Error, bytes_read: usize) -> Self {
27 if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) {
28 Self::IllegalSequence {
29 source: err,
30 offset: bytes_read,
31 }
32 } else {
33 err.into()
34 }
35 }
36}
37
38#[doc(alias = "g_convert")]
39pub fn convert(
40 str_: &[u8],
41 to_codeset: impl IntoGStr,
42 from_codeset: impl IntoGStr,
43) -> Result<(Slice<u8>, usize), CvtError> {
44 assert!(str_.len() <= isize::MAX as usize);
45 let mut bytes_read = 0;
46 let mut bytes_written = 0;
47 let mut error = ptr::null_mut();
48 let result = to_codeset.run_with_gstr(|to_codeset| {
49 from_codeset.run_with_gstr(|from_codeset| unsafe {
50 ffi::g_convert(
51 str_.as_ptr(),
52 str_.len() as isize,
53 to_codeset.to_glib_none().0,
54 from_codeset.to_glib_none().0,
55 &mut bytes_read,
56 &mut bytes_written,
57 &mut error,
58 )
59 })
60 });
61 if result.is_null() {
62 Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
63 } else {
64 let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
65 Ok((slice, bytes_read))
66 }
67}
68
69#[doc(alias = "g_convert_with_fallback")]
70pub fn convert_with_fallback(
71 str_: &[u8],
72 to_codeset: impl IntoGStr,
73 from_codeset: impl IntoGStr,
74 fallback: Option<impl IntoGStr>,
75) -> Result<(Slice<u8>, usize), CvtError> {
76 assert!(str_.len() <= isize::MAX as usize);
77 let mut bytes_read = 0;
78 let mut bytes_written = 0;
79 let mut error = ptr::null_mut();
80 let result = to_codeset.run_with_gstr(|to_codeset| {
81 from_codeset.run_with_gstr(|from_codeset| {
82 fallback.run_with_gstr(|fallback| unsafe {
83 ffi::g_convert_with_fallback(
84 str_.as_ptr(),
85 str_.len() as isize,
86 to_codeset.to_glib_none().0,
87 from_codeset.to_glib_none().0,
88 fallback.to_glib_none().0,
89 &mut bytes_read,
90 &mut bytes_written,
91 &mut error,
92 )
93 })
94 })
95 });
96 if result.is_null() {
97 Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
98 } else {
99 let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
100 Ok((slice, bytes_read))
101 }
102}
103
104// rustdoc-stripper-ignore-next
105/// A wrapper for [`std::io::Error`] that can hold an offset into an input string.
106#[derive(thiserror::Error, Debug)]
107pub enum IConvError {
108 #[error(transparent)]
109 Error(#[from] io::Error),
110 #[error("{source} at offset {offset}")]
111 WithOffset {
112 #[source]
113 source: io::Error,
114 offset: usize,
115 },
116}
117
118#[derive(Debug)]
119#[repr(transparent)]
120#[doc(alias = "GIConv")]
121pub struct IConv(ffi::GIConv);
122
123unsafe impl Send for IConv {}
124
125impl IConv {
126 #[doc(alias = "g_iconv_open")]
127 #[allow(clippy::unnecessary_lazy_evaluations)]
128 pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> {
129 let iconv = to_codeset.run_with_gstr(|to_codeset| {
130 from_codeset.run_with_gstr(|from_codeset| unsafe {
131 ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0)
132 })
133 });
134 (iconv as isize != -1).then(|| Self(iconv))
135 }
136 #[doc(alias = "g_convert_with_iconv")]
137 pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> {
138 assert!(str_.len() <= isize::MAX as usize);
139 let mut bytes_read = 0;
140 let mut bytes_written = 0;
141 let mut error = ptr::null_mut();
142 let result = unsafe {
143 ffi::g_convert_with_iconv(
144 str_.as_ptr(),
145 str_.len() as isize,
146 self.0,
147 &mut bytes_read,
148 &mut bytes_written,
149 &mut error,
150 )
151 };
152 if result.is_null() {
153 Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
154 } else {
155 let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
156 Ok((slice, bytes_read))
157 }
158 }
159 #[doc(alias = "g_iconv")]
160 pub fn iconv(
161 &mut self,
162 inbuf: Option<&[u8]>,
163 outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>,
164 ) -> Result<(usize, usize, usize), IConvError> {
165 let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default();
166 let mut inbytes_left = input_len;
167 let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default();
168 let mut inbuf = inbuf
169 .map(|b| mut_override(b.as_ptr()) as *mut c_char)
170 .unwrap_or_else(ptr::null_mut);
171 let mut outbuf = outbuf
172 .map(|b| b.as_mut_ptr() as *mut c_char)
173 .unwrap_or_else(ptr::null_mut);
174 let conversions = unsafe {
175 ffi::g_iconv(
176 self.0,
177 &mut inbuf,
178 &mut inbytes_left,
179 &mut outbuf,
180 &mut outbytes_left,
181 )
182 };
183 if conversions as isize == -1 {
184 let err = io::Error::last_os_error();
185 let code = err.raw_os_error().unwrap();
186 if code == libc::EILSEQ || code == libc::EINVAL {
187 Err(IConvError::WithOffset {
188 source: err,
189 offset: input_len - inbytes_left,
190 })
191 } else {
192 Err(err.into())
193 }
194 } else {
195 Ok((conversions, inbytes_left, outbytes_left))
196 }
197 }
198}
199
200impl Drop for IConv {
201 #[inline]
202 fn drop(&mut self) {
203 unsafe {
204 ffi::g_iconv_close(self.0);
205 }
206 }
207}
208
209#[doc(alias = "g_get_filename_charsets")]
210#[doc(alias = "get_filename_charsets")]
211pub fn filename_charsets() -> (bool, Vec<GString>) {
212 let mut filename_charsets: *mut *const i8 = ptr::null_mut();
213 unsafe {
214 let is_utf8: i32 = ffi::g_get_filename_charsets(&mut filename_charsets);
215 (
216 from_glib(val:is_utf8),
217 FromGlibPtrContainer::from_glib_none(ptr:filename_charsets),
218 )
219 }
220}
221
222#[doc(alias = "g_filename_from_utf8")]
223pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> {
224 let mut bytes_read = 0;
225 let mut bytes_written = std::mem::MaybeUninit::uninit();
226 let mut error = ptr::null_mut();
227 let ret = utf8string.run_with_gstr(|utf8string| {
228 assert!(utf8string.len() <= isize::MAX as usize);
229 let len = utf8string.len() as isize;
230 unsafe {
231 ffi::g_filename_from_utf8(
232 utf8string.to_glib_none().0,
233 len,
234 &mut bytes_read,
235 bytes_written.as_mut_ptr(),
236 &mut error,
237 )
238 }
239 });
240 if error.is_null() {
241 Ok(unsafe {
242 (
243 PathBuf::from_glib_full_num(ret, bytes_written.assume_init()),
244 bytes_read,
245 )
246 })
247 } else {
248 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
249 }
250}
251
252#[doc(alias = "g_filename_to_utf8")]
253pub fn filename_to_utf8(
254 opsysstring: impl AsRef<std::path::Path>,
255) -> Result<(crate::GString, usize), CvtError> {
256 let path = opsysstring.as_ref().to_glib_none();
257 let mut bytes_read = 0;
258 let mut bytes_written = std::mem::MaybeUninit::uninit();
259 let mut error = ptr::null_mut();
260 let ret = unsafe {
261 ffi::g_filename_to_utf8(
262 path.0,
263 path.1.as_bytes().len() as isize,
264 &mut bytes_read,
265 bytes_written.as_mut_ptr(),
266 &mut error,
267 )
268 };
269 if error.is_null() {
270 Ok(unsafe {
271 (
272 GString::from_glib_full_num(ret, bytes_written.assume_init()),
273 bytes_read,
274 )
275 })
276 } else {
277 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
278 }
279}
280
281#[doc(alias = "g_locale_from_utf8")]
282pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> {
283 let mut bytes_read = 0;
284 let mut bytes_written = std::mem::MaybeUninit::uninit();
285 let mut error = ptr::null_mut();
286 let ret = utf8string.run_with_gstr(|utf8string| {
287 assert!(utf8string.len() <= isize::MAX as usize);
288 unsafe {
289 ffi::g_locale_from_utf8(
290 utf8string.as_ptr(),
291 utf8string.len() as isize,
292 &mut bytes_read,
293 bytes_written.as_mut_ptr(),
294 &mut error,
295 )
296 }
297 });
298 if error.is_null() {
299 Ok(unsafe {
300 (
301 Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1),
302 bytes_read,
303 )
304 })
305 } else {
306 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
307 }
308}
309
310#[doc(alias = "g_locale_to_utf8")]
311pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> {
312 let len = opsysstring.len() as isize;
313 let mut bytes_read = 0;
314 let mut bytes_written = std::mem::MaybeUninit::uninit();
315 let mut error = ptr::null_mut();
316 let ret = unsafe {
317 ffi::g_locale_to_utf8(
318 opsysstring.to_glib_none().0,
319 len,
320 &mut bytes_read,
321 bytes_written.as_mut_ptr(),
322 &mut error,
323 )
324 };
325 if error.is_null() {
326 Ok(unsafe {
327 (
328 GString::from_glib_full_num(ret, bytes_written.assume_init()),
329 bytes_read,
330 )
331 })
332 } else {
333 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
334 }
335}
336
337#[doc(alias = "g_utf8_to_ucs4")]
338#[doc(alias = "g_utf8_to_ucs4_fast")]
339#[doc(alias = "utf8_to_ucs4")]
340pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> {
341 unsafe {
342 let mut items_written: i64 = 0;
343
344 let str_as_utf32: *mut u32 = ffi::g_utf8_to_ucs4_fast(
345 str:str.as_ref().as_ptr().cast::<c_char>(),
346 str.as_ref().len() as _,
347 &mut items_written,
348 );
349
350 // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us
351 // invalid UTF-32 codepoints
352 Slice::from_glib_full_num(ptr:str_as_utf32, len:items_written as usize)
353 }
354}
355
356#[doc(alias = "g_ucs4_to_utf8")]
357#[doc(alias = "ucs4_to_utf8")]
358pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString {
359 let mut items_read: i64 = 0;
360 let mut items_written: i64 = 0;
361 let mut error: *mut GError = ptr::null_mut();
362
363 unsafe {
364 let str_as_utf8: *mut i8 = ffi::g_ucs4_to_utf8(
365 str:str.as_ref().as_ptr().cast::<u32>(),
366 str.as_ref().len() as _,
367 &mut items_read,
368 &mut items_written,
369 &mut error,
370 );
371
372 debug_assert!(
373 error.is_null(),
374 "Rust `char` should always be convertible to UTF-8"
375 );
376
377 GString::from_glib_full_num(ptr:str_as_utf8, num:items_written as usize)
378 }
379}
380
381#[doc(alias = "g_utf8_casefold")]
382#[doc(alias = "utf8_casefold")]
383pub fn casefold(str: impl AsRef<str>) -> GString {
384 unsafe {
385 let str: *mut i8 = ffi::g_utf8_casefold(str:str.as_ref().as_ptr().cast(), str.as_ref().len() as isize);
386
387 from_glib_full(ptr:str)
388 }
389}
390
391#[doc(alias = "g_utf8_normalize")]
392#[doc(alias = "utf8_normalize")]
393pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString {
394 unsafe {
395 let str: *mut i8 = ffi::g_utf8_normalize(
396 str:str.as_ref().as_ptr().cast(),
397 str.as_ref().len() as isize,
398 mode:mode.into_glib(),
399 );
400
401 from_glib_full(ptr:str)
402 }
403}
404
405#[cfg(test)]
406mod tests {
407 #[test]
408 fn convert_ascii() {
409 assert!(super::convert(b"Hello", "utf-8", "ascii").is_ok());
410 assert!(super::convert(b"He\xaallo", "utf-8", "ascii").is_err());
411 assert_eq!(
412 super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", crate::NONE_STR)
413 .unwrap()
414 .0
415 .as_slice(),
416 b"H\\u00e9llo"
417 );
418 assert_eq!(
419 super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", Some("_"))
420 .unwrap()
421 .0
422 .as_slice(),
423 b"H_llo"
424 );
425 }
426 #[test]
427 fn iconv() {
428 let mut conv = super::IConv::new("utf-8", "ascii").unwrap();
429 assert!(conv.convert(b"Hello").is_ok());
430 assert!(conv.convert(b"He\xaallo").is_err());
431 assert!(super::IConv::new("utf-8", "badcharset123456789").is_none());
432 }
433 #[test]
434 fn filename_charsets() {
435 let _ = super::filename_charsets();
436 }
437
438 #[test]
439 fn utf8_and_utf32() {
440 let utf32 = ['A', 'b', '🤔'];
441 let utf8 = super::utf32_to_utf8(utf32);
442 assert_eq!(utf8, "Ab🤔");
443
444 let utf8 = "🤔 ț";
445 let utf32 = super::utf8_to_utf32(utf8);
446 assert_eq!(utf32.as_slice(), &['🤔', ' ', 'ț']);
447 }
448}
449