1 | // Take a look at the license at the top of the repository in the LICENSE file. |
2 | |
3 | use std::{fmt, io, os::raw::c_char, path::PathBuf, ptr}; |
4 | |
5 | use crate::{ffi, translate::*, ConvertError, Error, GString, NormalizeMode, Slice}; |
6 | |
7 | // rustdoc-stripper-ignore-next |
8 | /// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input |
9 | /// string. |
10 | #[derive (Debug)] |
11 | pub enum CvtError { |
12 | Convert(Error), |
13 | IllegalSequence { source: Error, offset: usize }, |
14 | } |
15 | |
16 | impl std::error::Error for CvtError { |
17 | fn source(&self) -> ::core::option::Option<&(dyn std::error::Error + 'static)> { |
18 | match self { |
19 | CvtError::Convert(err: &Error) => std::error::Error::source(self:err), |
20 | CvtError::IllegalSequence { source: &Error, .. } => Some(source), |
21 | } |
22 | } |
23 | } |
24 | |
25 | impl fmt::Display for CvtError { |
26 | fn fmt(&self, fmt: &mut fmt::Formatter) -> ::core::fmt::Result { |
27 | match self { |
28 | CvtError::Convert(err: &Error) => fmt::Display::fmt(self:err, f:fmt), |
29 | CvtError::IllegalSequence { source: &Error, offset: &usize } => { |
30 | write!(fmt, " {source} at offset {offset}" ) |
31 | } |
32 | } |
33 | } |
34 | } |
35 | |
36 | impl std::convert::From<Error> for CvtError { |
37 | fn from(err: Error) -> Self { |
38 | CvtError::Convert(err) |
39 | } |
40 | } |
41 | |
42 | impl CvtError { |
43 | #[inline ] |
44 | fn new(err: Error, bytes_read: usize) -> Self { |
45 | if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) { |
46 | Self::IllegalSequence { |
47 | source: err, |
48 | offset: bytes_read, |
49 | } |
50 | } else { |
51 | err.into() |
52 | } |
53 | } |
54 | } |
55 | |
56 | #[doc (alias = "g_convert" )] |
57 | pub fn convert( |
58 | str_: &[u8], |
59 | to_codeset: impl IntoGStr, |
60 | from_codeset: impl IntoGStr, |
61 | ) -> Result<(Slice<u8>, usize), CvtError> { |
62 | assert!(str_.len() <= isize::MAX as usize); |
63 | let mut bytes_read = 0; |
64 | let mut bytes_written = 0; |
65 | let mut error = ptr::null_mut(); |
66 | let result = to_codeset.run_with_gstr(|to_codeset| { |
67 | from_codeset.run_with_gstr(|from_codeset| unsafe { |
68 | ffi::g_convert( |
69 | str_.as_ptr(), |
70 | str_.len() as isize, |
71 | to_codeset.to_glib_none().0, |
72 | from_codeset.to_glib_none().0, |
73 | &mut bytes_read, |
74 | &mut bytes_written, |
75 | &mut error, |
76 | ) |
77 | }) |
78 | }); |
79 | if result.is_null() { |
80 | Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read)) |
81 | } else { |
82 | let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) }; |
83 | Ok((slice, bytes_read)) |
84 | } |
85 | } |
86 | |
87 | #[doc (alias = "g_convert_with_fallback" )] |
88 | pub fn convert_with_fallback( |
89 | str_: &[u8], |
90 | to_codeset: impl IntoGStr, |
91 | from_codeset: impl IntoGStr, |
92 | fallback: Option<impl IntoGStr>, |
93 | ) -> Result<(Slice<u8>, usize), CvtError> { |
94 | assert!(str_.len() <= isize::MAX as usize); |
95 | let mut bytes_read = 0; |
96 | let mut bytes_written = 0; |
97 | let mut error = ptr::null_mut(); |
98 | let result = to_codeset.run_with_gstr(|to_codeset| { |
99 | from_codeset.run_with_gstr(|from_codeset| { |
100 | fallback.run_with_gstr(|fallback| unsafe { |
101 | ffi::g_convert_with_fallback( |
102 | str_.as_ptr(), |
103 | str_.len() as isize, |
104 | to_codeset.to_glib_none().0, |
105 | from_codeset.to_glib_none().0, |
106 | fallback.to_glib_none().0, |
107 | &mut bytes_read, |
108 | &mut bytes_written, |
109 | &mut error, |
110 | ) |
111 | }) |
112 | }) |
113 | }); |
114 | if result.is_null() { |
115 | Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read)) |
116 | } else { |
117 | let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) }; |
118 | Ok((slice, bytes_read)) |
119 | } |
120 | } |
121 | |
122 | // rustdoc-stripper-ignore-next |
123 | /// A wrapper for [`std::io::Error`] that can hold an offset into an input string. |
124 | #[derive (Debug)] |
125 | pub enum IConvError { |
126 | Error(io::Error), |
127 | WithOffset { source: io::Error, offset: usize }, |
128 | } |
129 | |
130 | impl std::error::Error for IConvError { |
131 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
132 | match self { |
133 | IConvError::Error(err: &Error) => std::error::Error::source(self:err), |
134 | IConvError::WithOffset { source: &Error, .. } => Some(source), |
135 | } |
136 | } |
137 | } |
138 | |
139 | impl fmt::Display for IConvError { |
140 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { |
141 | match self { |
142 | IConvError::Error(err: &Error) => fmt::Display::fmt(self:err, f:fmt), |
143 | IConvError::WithOffset { source: &Error, offset: &usize } => write!(fmt, " {source} at offset {offset}" ), |
144 | } |
145 | } |
146 | } |
147 | |
148 | impl std::convert::From<io::Error> for IConvError { |
149 | fn from(err: io::Error) -> Self { |
150 | IConvError::Error(err) |
151 | } |
152 | } |
153 | |
154 | #[derive (Debug)] |
155 | #[repr (transparent)] |
156 | #[doc (alias = "GIConv" )] |
157 | pub struct IConv(ffi::GIConv); |
158 | |
159 | unsafe impl Send for IConv {} |
160 | |
161 | impl IConv { |
162 | #[doc (alias = "g_iconv_open" )] |
163 | #[allow (clippy::unnecessary_lazy_evaluations)] |
164 | pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> { |
165 | let iconv = to_codeset.run_with_gstr(|to_codeset| { |
166 | from_codeset.run_with_gstr(|from_codeset| unsafe { |
167 | ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0) |
168 | }) |
169 | }); |
170 | (iconv as isize != -1).then(|| Self(iconv)) |
171 | } |
172 | #[doc (alias = "g_convert_with_iconv" )] |
173 | pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> { |
174 | assert!(str_.len() <= isize::MAX as usize); |
175 | let mut bytes_read = 0; |
176 | let mut bytes_written = 0; |
177 | let mut error = ptr::null_mut(); |
178 | let result = unsafe { |
179 | ffi::g_convert_with_iconv( |
180 | str_.as_ptr(), |
181 | str_.len() as isize, |
182 | self.0, |
183 | &mut bytes_read, |
184 | &mut bytes_written, |
185 | &mut error, |
186 | ) |
187 | }; |
188 | if result.is_null() { |
189 | Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read)) |
190 | } else { |
191 | let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) }; |
192 | Ok((slice, bytes_read)) |
193 | } |
194 | } |
195 | #[doc (alias = "g_iconv" )] |
196 | pub fn iconv( |
197 | &mut self, |
198 | inbuf: Option<&[u8]>, |
199 | outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>, |
200 | ) -> Result<(usize, usize, usize), IConvError> { |
201 | let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default(); |
202 | let mut inbytes_left = input_len; |
203 | let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default(); |
204 | let mut inbuf = inbuf |
205 | .map(|b| mut_override(b.as_ptr()) as *mut c_char) |
206 | .unwrap_or_else(ptr::null_mut); |
207 | let mut outbuf = outbuf |
208 | .map(|b| b.as_mut_ptr() as *mut c_char) |
209 | .unwrap_or_else(ptr::null_mut); |
210 | let conversions = unsafe { |
211 | ffi::g_iconv( |
212 | self.0, |
213 | &mut inbuf, |
214 | &mut inbytes_left, |
215 | &mut outbuf, |
216 | &mut outbytes_left, |
217 | ) |
218 | }; |
219 | if conversions as isize == -1 { |
220 | let err = io::Error::last_os_error(); |
221 | let code = err.raw_os_error().unwrap(); |
222 | if code == libc::EILSEQ || code == libc::EINVAL { |
223 | Err(IConvError::WithOffset { |
224 | source: err, |
225 | offset: input_len - inbytes_left, |
226 | }) |
227 | } else { |
228 | Err(err.into()) |
229 | } |
230 | } else { |
231 | Ok((conversions, inbytes_left, outbytes_left)) |
232 | } |
233 | } |
234 | } |
235 | |
236 | impl Drop for IConv { |
237 | #[inline ] |
238 | fn drop(&mut self) { |
239 | unsafe { |
240 | ffi::g_iconv_close(self.0); |
241 | } |
242 | } |
243 | } |
244 | |
245 | #[doc (alias = "g_get_filename_charsets" )] |
246 | #[doc (alias = "get_filename_charsets" )] |
247 | pub fn filename_charsets() -> (bool, Vec<GString>) { |
248 | let mut filename_charsets: *mut *const {unknown} = ptr::null_mut(); |
249 | unsafe { |
250 | let is_utf8 = ffi::g_get_filename_charsets(&mut filename_charsets); |
251 | ( |
252 | from_glib(val:is_utf8), |
253 | FromGlibPtrContainer::from_glib_none(ptr:filename_charsets), |
254 | ) |
255 | } |
256 | } |
257 | |
258 | #[doc (alias = "g_filename_from_utf8" )] |
259 | pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> { |
260 | let mut bytes_read = 0; |
261 | let mut bytes_written = std::mem::MaybeUninit::uninit(); |
262 | let mut error = ptr::null_mut(); |
263 | let ret = utf8string.run_with_gstr(|utf8string| { |
264 | assert!(utf8string.len() <= isize::MAX as usize); |
265 | let len = utf8string.len() as isize; |
266 | unsafe { |
267 | ffi::g_filename_from_utf8( |
268 | utf8string.to_glib_none().0, |
269 | len, |
270 | &mut bytes_read, |
271 | bytes_written.as_mut_ptr(), |
272 | &mut error, |
273 | ) |
274 | } |
275 | }); |
276 | if error.is_null() { |
277 | Ok(unsafe { |
278 | ( |
279 | PathBuf::from_glib_full_num(ret, bytes_written.assume_init()), |
280 | bytes_read, |
281 | ) |
282 | }) |
283 | } else { |
284 | Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) }) |
285 | } |
286 | } |
287 | |
288 | #[doc (alias = "g_filename_to_utf8" )] |
289 | pub fn filename_to_utf8( |
290 | opsysstring: impl AsRef<std::path::Path>, |
291 | ) -> Result<(crate::GString, usize), CvtError> { |
292 | let path = opsysstring.as_ref().to_glib_none(); |
293 | let mut bytes_read = 0; |
294 | let mut bytes_written = std::mem::MaybeUninit::uninit(); |
295 | let mut error = ptr::null_mut(); |
296 | let ret = unsafe { |
297 | ffi::g_filename_to_utf8( |
298 | path.0, |
299 | path.1.as_bytes().len() as isize, |
300 | &mut bytes_read, |
301 | bytes_written.as_mut_ptr(), |
302 | &mut error, |
303 | ) |
304 | }; |
305 | if error.is_null() { |
306 | Ok(unsafe { |
307 | ( |
308 | GString::from_glib_full_num(ret, bytes_written.assume_init()), |
309 | bytes_read, |
310 | ) |
311 | }) |
312 | } else { |
313 | Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) }) |
314 | } |
315 | } |
316 | |
317 | #[doc (alias = "g_locale_from_utf8" )] |
318 | pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> { |
319 | let mut bytes_read = 0; |
320 | let mut bytes_written = std::mem::MaybeUninit::uninit(); |
321 | let mut error = ptr::null_mut(); |
322 | let ret = utf8string.run_with_gstr(|utf8string| { |
323 | assert!(utf8string.len() <= isize::MAX as usize); |
324 | unsafe { |
325 | ffi::g_locale_from_utf8( |
326 | utf8string.as_ptr(), |
327 | utf8string.len() as isize, |
328 | &mut bytes_read, |
329 | bytes_written.as_mut_ptr(), |
330 | &mut error, |
331 | ) |
332 | } |
333 | }); |
334 | if error.is_null() { |
335 | Ok(unsafe { |
336 | ( |
337 | Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1), |
338 | bytes_read, |
339 | ) |
340 | }) |
341 | } else { |
342 | Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) }) |
343 | } |
344 | } |
345 | |
346 | #[doc (alias = "g_locale_to_utf8" )] |
347 | pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> { |
348 | let len = opsysstring.len() as isize; |
349 | let mut bytes_read = 0; |
350 | let mut bytes_written = std::mem::MaybeUninit::uninit(); |
351 | let mut error = ptr::null_mut(); |
352 | let ret = unsafe { |
353 | ffi::g_locale_to_utf8( |
354 | opsysstring.to_glib_none().0, |
355 | len, |
356 | &mut bytes_read, |
357 | bytes_written.as_mut_ptr(), |
358 | &mut error, |
359 | ) |
360 | }; |
361 | if error.is_null() { |
362 | Ok(unsafe { |
363 | ( |
364 | GString::from_glib_full_num(ret, bytes_written.assume_init()), |
365 | bytes_read, |
366 | ) |
367 | }) |
368 | } else { |
369 | Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) }) |
370 | } |
371 | } |
372 | |
373 | #[doc (alias = "g_utf8_to_ucs4" )] |
374 | #[doc (alias = "g_utf8_to_ucs4_fast" )] |
375 | #[doc (alias = "utf8_to_ucs4" )] |
376 | pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> { |
377 | unsafe { |
378 | let mut items_written: i32 = 0; |
379 | |
380 | let str_as_utf32: *mut u32 = ffi::g_utf8_to_ucs4_fast( |
381 | str.as_ref().as_ptr().cast::<c_char>(), |
382 | str.as_ref().len() as _, |
383 | &mut items_written, |
384 | ); |
385 | |
386 | // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us |
387 | // invalid UTF-32 codepoints |
388 | Slice::from_glib_full_num(ptr:str_as_utf32, len:items_written as usize) |
389 | } |
390 | } |
391 | |
392 | #[doc (alias = "g_ucs4_to_utf8" )] |
393 | #[doc (alias = "ucs4_to_utf8" )] |
394 | pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString { |
395 | let mut items_read: i32 = 0; |
396 | let mut items_written: i32 = 0; |
397 | let mut error: *mut GError = ptr::null_mut(); |
398 | |
399 | unsafe { |
400 | let str_as_utf8: *mut {unknown} = ffi::g_ucs4_to_utf8( |
401 | str.as_ref().as_ptr().cast::<u32>(), |
402 | str.as_ref().len() as _, |
403 | &mut items_read, |
404 | &mut items_written, |
405 | &mut error, |
406 | ); |
407 | |
408 | debug_assert!( |
409 | error.is_null(), |
410 | "Rust `char` should always be convertible to UTF-8" |
411 | ); |
412 | |
413 | GString::from_glib_full_num(ptr:str_as_utf8, num:items_written as usize) |
414 | } |
415 | } |
416 | |
417 | #[doc (alias = "g_utf8_casefold" )] |
418 | #[doc (alias = "utf8_casefold" )] |
419 | pub fn casefold(str: impl AsRef<str>) -> GString { |
420 | unsafe { |
421 | let str: *mut {unknown} = ffi::g_utf8_casefold(str.as_ref().as_ptr().cast(), str.as_ref().len() as isize); |
422 | |
423 | from_glib_full(ptr:str) |
424 | } |
425 | } |
426 | |
427 | #[doc (alias = "g_utf8_normalize" )] |
428 | #[doc (alias = "utf8_normalize" )] |
429 | pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString { |
430 | unsafe { |
431 | let str: *mut {unknown} = ffi::g_utf8_normalize( |
432 | str.as_ref().as_ptr().cast(), |
433 | str.as_ref().len() as isize, |
434 | mode.into_glib(), |
435 | ); |
436 | |
437 | from_glib_full(ptr:str) |
438 | } |
439 | } |
440 | |
441 | #[cfg (test)] |
442 | mod tests { |
443 | #[test ] |
444 | fn convert_ascii() { |
445 | assert!(super::convert(b"Hello" , "utf-8" , "ascii" ).is_ok()); |
446 | assert!(super::convert(b"He \xaallo" , "utf-8" , "ascii" ).is_err()); |
447 | assert_eq!( |
448 | super::convert_with_fallback(b"H \xc3\xa9llo" , "ascii" , "utf-8" , crate::NONE_STR) |
449 | .unwrap() |
450 | .0 |
451 | .as_slice(), |
452 | b"H \\u00e9llo" |
453 | ); |
454 | assert_eq!( |
455 | super::convert_with_fallback(b"H \xc3\xa9llo" , "ascii" , "utf-8" , Some("_" )) |
456 | .unwrap() |
457 | .0 |
458 | .as_slice(), |
459 | b"H_llo" |
460 | ); |
461 | } |
462 | #[test ] |
463 | fn iconv() { |
464 | let mut conv = super::IConv::new("utf-8" , "ascii" ).unwrap(); |
465 | assert!(conv.convert(b"Hello" ).is_ok()); |
466 | assert!(conv.convert(b"He \xaallo" ).is_err()); |
467 | assert!(super::IConv::new("utf-8" , "badcharset123456789" ).is_none()); |
468 | } |
469 | #[test ] |
470 | fn filename_charsets() { |
471 | let _ = super::filename_charsets(); |
472 | } |
473 | |
474 | #[test ] |
475 | fn utf8_and_utf32() { |
476 | let utf32 = ['A' , 'b' , '🤔' ]; |
477 | let utf8 = super::utf32_to_utf8(utf32); |
478 | assert_eq!(utf8, "Ab🤔" ); |
479 | |
480 | let utf8 = "🤔 ț" ; |
481 | let utf32 = super::utf8_to_utf32(utf8); |
482 | assert_eq!(utf32.as_slice(), &['🤔' , ' ' , 'ț' ]); |
483 | } |
484 | } |
485 | |