1 | // Take a look at the license at the top of the repository in the LICENSE file. |
2 | |
3 | use std::{io, os::raw::c_char, path::PathBuf, ptr}; |
4 | |
5 | use crate::{ |
6 | translate::*, ConvertError, Error, GString, IntoGStr, IntoOptionalGStr, NormalizeMode, Slice, |
7 | }; |
8 | |
9 | // rustdoc-stripper-ignore-next |
10 | /// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input |
11 | /// string. |
12 | #[derive (thiserror::Error, Debug)] |
13 | pub enum CvtError { |
14 | #[error(transparent)] |
15 | Convert(#[from] Error), |
16 | #[error("{source} at offset {offset}" )] |
17 | IllegalSequence { |
18 | #[source] |
19 | source: Error, |
20 | offset: usize, |
21 | }, |
22 | } |
23 | |
24 | impl CvtError { |
25 | #[inline ] |
26 | fn new(err: Error, bytes_read: usize) -> Self { |
27 | if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) { |
28 | Self::IllegalSequence { |
29 | source: err, |
30 | offset: bytes_read, |
31 | } |
32 | } else { |
33 | err.into() |
34 | } |
35 | } |
36 | } |
37 | |
38 | #[doc (alias = "g_convert" )] |
39 | pub fn convert( |
40 | str_: &[u8], |
41 | to_codeset: impl IntoGStr, |
42 | from_codeset: impl IntoGStr, |
43 | ) -> Result<(Slice<u8>, usize), CvtError> { |
44 | assert!(str_.len() <= isize::MAX as usize); |
45 | let mut bytes_read = 0; |
46 | let mut bytes_written = 0; |
47 | let mut error = ptr::null_mut(); |
48 | let result = to_codeset.run_with_gstr(|to_codeset| { |
49 | from_codeset.run_with_gstr(|from_codeset| unsafe { |
50 | ffi::g_convert( |
51 | str_.as_ptr(), |
52 | str_.len() as isize, |
53 | to_codeset.to_glib_none().0, |
54 | from_codeset.to_glib_none().0, |
55 | &mut bytes_read, |
56 | &mut bytes_written, |
57 | &mut error, |
58 | ) |
59 | }) |
60 | }); |
61 | if result.is_null() { |
62 | Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read)) |
63 | } else { |
64 | let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) }; |
65 | Ok((slice, bytes_read)) |
66 | } |
67 | } |
68 | |
69 | #[doc (alias = "g_convert_with_fallback" )] |
70 | pub fn convert_with_fallback( |
71 | str_: &[u8], |
72 | to_codeset: impl IntoGStr, |
73 | from_codeset: impl IntoGStr, |
74 | fallback: Option<impl IntoGStr>, |
75 | ) -> Result<(Slice<u8>, usize), CvtError> { |
76 | assert!(str_.len() <= isize::MAX as usize); |
77 | let mut bytes_read = 0; |
78 | let mut bytes_written = 0; |
79 | let mut error = ptr::null_mut(); |
80 | let result = to_codeset.run_with_gstr(|to_codeset| { |
81 | from_codeset.run_with_gstr(|from_codeset| { |
82 | fallback.run_with_gstr(|fallback| unsafe { |
83 | ffi::g_convert_with_fallback( |
84 | str_.as_ptr(), |
85 | str_.len() as isize, |
86 | to_codeset.to_glib_none().0, |
87 | from_codeset.to_glib_none().0, |
88 | fallback.to_glib_none().0, |
89 | &mut bytes_read, |
90 | &mut bytes_written, |
91 | &mut error, |
92 | ) |
93 | }) |
94 | }) |
95 | }); |
96 | if result.is_null() { |
97 | Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read)) |
98 | } else { |
99 | let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) }; |
100 | Ok((slice, bytes_read)) |
101 | } |
102 | } |
103 | |
104 | // rustdoc-stripper-ignore-next |
105 | /// A wrapper for [`std::io::Error`] that can hold an offset into an input string. |
106 | #[derive (thiserror::Error, Debug)] |
107 | pub enum IConvError { |
108 | #[error(transparent)] |
109 | Error(#[from] io::Error), |
110 | #[error("{source} at offset {offset}" )] |
111 | WithOffset { |
112 | #[source] |
113 | source: io::Error, |
114 | offset: usize, |
115 | }, |
116 | } |
117 | |
118 | #[derive (Debug)] |
119 | #[repr (transparent)] |
120 | #[doc (alias = "GIConv" )] |
121 | pub struct IConv(ffi::GIConv); |
122 | |
123 | unsafe impl Send for IConv {} |
124 | |
125 | impl IConv { |
126 | #[doc (alias = "g_iconv_open" )] |
127 | #[allow (clippy::unnecessary_lazy_evaluations)] |
128 | pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> { |
129 | let iconv = to_codeset.run_with_gstr(|to_codeset| { |
130 | from_codeset.run_with_gstr(|from_codeset| unsafe { |
131 | ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0) |
132 | }) |
133 | }); |
134 | (iconv as isize != -1).then(|| Self(iconv)) |
135 | } |
136 | #[doc (alias = "g_convert_with_iconv" )] |
137 | pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> { |
138 | assert!(str_.len() <= isize::MAX as usize); |
139 | let mut bytes_read = 0; |
140 | let mut bytes_written = 0; |
141 | let mut error = ptr::null_mut(); |
142 | let result = unsafe { |
143 | ffi::g_convert_with_iconv( |
144 | str_.as_ptr(), |
145 | str_.len() as isize, |
146 | self.0, |
147 | &mut bytes_read, |
148 | &mut bytes_written, |
149 | &mut error, |
150 | ) |
151 | }; |
152 | if result.is_null() { |
153 | Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read)) |
154 | } else { |
155 | let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) }; |
156 | Ok((slice, bytes_read)) |
157 | } |
158 | } |
159 | #[doc (alias = "g_iconv" )] |
160 | pub fn iconv( |
161 | &mut self, |
162 | inbuf: Option<&[u8]>, |
163 | outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>, |
164 | ) -> Result<(usize, usize, usize), IConvError> { |
165 | let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default(); |
166 | let mut inbytes_left = input_len; |
167 | let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default(); |
168 | let mut inbuf = inbuf |
169 | .map(|b| mut_override(b.as_ptr()) as *mut c_char) |
170 | .unwrap_or_else(ptr::null_mut); |
171 | let mut outbuf = outbuf |
172 | .map(|b| b.as_mut_ptr() as *mut c_char) |
173 | .unwrap_or_else(ptr::null_mut); |
174 | let conversions = unsafe { |
175 | ffi::g_iconv( |
176 | self.0, |
177 | &mut inbuf, |
178 | &mut inbytes_left, |
179 | &mut outbuf, |
180 | &mut outbytes_left, |
181 | ) |
182 | }; |
183 | if conversions as isize == -1 { |
184 | let err = io::Error::last_os_error(); |
185 | let code = err.raw_os_error().unwrap(); |
186 | if code == libc::EILSEQ || code == libc::EINVAL { |
187 | Err(IConvError::WithOffset { |
188 | source: err, |
189 | offset: input_len - inbytes_left, |
190 | }) |
191 | } else { |
192 | Err(err.into()) |
193 | } |
194 | } else { |
195 | Ok((conversions, inbytes_left, outbytes_left)) |
196 | } |
197 | } |
198 | } |
199 | |
200 | impl Drop for IConv { |
201 | #[inline ] |
202 | fn drop(&mut self) { |
203 | unsafe { |
204 | ffi::g_iconv_close(self.0); |
205 | } |
206 | } |
207 | } |
208 | |
209 | #[doc (alias = "g_get_filename_charsets" )] |
210 | #[doc (alias = "get_filename_charsets" )] |
211 | pub fn filename_charsets() -> (bool, Vec<GString>) { |
212 | let mut filename_charsets: *mut *const i8 = ptr::null_mut(); |
213 | unsafe { |
214 | let is_utf8: i32 = ffi::g_get_filename_charsets(&mut filename_charsets); |
215 | ( |
216 | from_glib(val:is_utf8), |
217 | FromGlibPtrContainer::from_glib_none(ptr:filename_charsets), |
218 | ) |
219 | } |
220 | } |
221 | |
222 | #[doc (alias = "g_filename_from_utf8" )] |
223 | pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> { |
224 | let mut bytes_read = 0; |
225 | let mut bytes_written = std::mem::MaybeUninit::uninit(); |
226 | let mut error = ptr::null_mut(); |
227 | let ret = utf8string.run_with_gstr(|utf8string| { |
228 | assert!(utf8string.len() <= isize::MAX as usize); |
229 | let len = utf8string.len() as isize; |
230 | unsafe { |
231 | ffi::g_filename_from_utf8( |
232 | utf8string.to_glib_none().0, |
233 | len, |
234 | &mut bytes_read, |
235 | bytes_written.as_mut_ptr(), |
236 | &mut error, |
237 | ) |
238 | } |
239 | }); |
240 | if error.is_null() { |
241 | Ok(unsafe { |
242 | ( |
243 | PathBuf::from_glib_full_num(ret, bytes_written.assume_init()), |
244 | bytes_read, |
245 | ) |
246 | }) |
247 | } else { |
248 | Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) }) |
249 | } |
250 | } |
251 | |
252 | #[doc (alias = "g_filename_to_utf8" )] |
253 | pub fn filename_to_utf8( |
254 | opsysstring: impl AsRef<std::path::Path>, |
255 | ) -> Result<(crate::GString, usize), CvtError> { |
256 | let path = opsysstring.as_ref().to_glib_none(); |
257 | let mut bytes_read = 0; |
258 | let mut bytes_written = std::mem::MaybeUninit::uninit(); |
259 | let mut error = ptr::null_mut(); |
260 | let ret = unsafe { |
261 | ffi::g_filename_to_utf8( |
262 | path.0, |
263 | path.1.as_bytes().len() as isize, |
264 | &mut bytes_read, |
265 | bytes_written.as_mut_ptr(), |
266 | &mut error, |
267 | ) |
268 | }; |
269 | if error.is_null() { |
270 | Ok(unsafe { |
271 | ( |
272 | GString::from_glib_full_num(ret, bytes_written.assume_init()), |
273 | bytes_read, |
274 | ) |
275 | }) |
276 | } else { |
277 | Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) }) |
278 | } |
279 | } |
280 | |
281 | #[doc (alias = "g_locale_from_utf8" )] |
282 | pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> { |
283 | let mut bytes_read = 0; |
284 | let mut bytes_written = std::mem::MaybeUninit::uninit(); |
285 | let mut error = ptr::null_mut(); |
286 | let ret = utf8string.run_with_gstr(|utf8string| { |
287 | assert!(utf8string.len() <= isize::MAX as usize); |
288 | unsafe { |
289 | ffi::g_locale_from_utf8( |
290 | utf8string.as_ptr(), |
291 | utf8string.len() as isize, |
292 | &mut bytes_read, |
293 | bytes_written.as_mut_ptr(), |
294 | &mut error, |
295 | ) |
296 | } |
297 | }); |
298 | if error.is_null() { |
299 | Ok(unsafe { |
300 | ( |
301 | Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1), |
302 | bytes_read, |
303 | ) |
304 | }) |
305 | } else { |
306 | Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) }) |
307 | } |
308 | } |
309 | |
310 | #[doc (alias = "g_locale_to_utf8" )] |
311 | pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> { |
312 | let len = opsysstring.len() as isize; |
313 | let mut bytes_read = 0; |
314 | let mut bytes_written = std::mem::MaybeUninit::uninit(); |
315 | let mut error = ptr::null_mut(); |
316 | let ret = unsafe { |
317 | ffi::g_locale_to_utf8( |
318 | opsysstring.to_glib_none().0, |
319 | len, |
320 | &mut bytes_read, |
321 | bytes_written.as_mut_ptr(), |
322 | &mut error, |
323 | ) |
324 | }; |
325 | if error.is_null() { |
326 | Ok(unsafe { |
327 | ( |
328 | GString::from_glib_full_num(ret, bytes_written.assume_init()), |
329 | bytes_read, |
330 | ) |
331 | }) |
332 | } else { |
333 | Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) }) |
334 | } |
335 | } |
336 | |
337 | #[doc (alias = "g_utf8_to_ucs4" )] |
338 | #[doc (alias = "g_utf8_to_ucs4_fast" )] |
339 | #[doc (alias = "utf8_to_ucs4" )] |
340 | pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> { |
341 | unsafe { |
342 | let mut items_written: i64 = 0; |
343 | |
344 | let str_as_utf32: *mut u32 = ffi::g_utf8_to_ucs4_fast( |
345 | str:str.as_ref().as_ptr().cast::<c_char>(), |
346 | str.as_ref().len() as _, |
347 | &mut items_written, |
348 | ); |
349 | |
350 | // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us |
351 | // invalid UTF-32 codepoints |
352 | Slice::from_glib_full_num(ptr:str_as_utf32, len:items_written as usize) |
353 | } |
354 | } |
355 | |
356 | #[doc (alias = "g_ucs4_to_utf8" )] |
357 | #[doc (alias = "ucs4_to_utf8" )] |
358 | pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString { |
359 | let mut items_read: i64 = 0; |
360 | let mut items_written: i64 = 0; |
361 | let mut error: *mut GError = ptr::null_mut(); |
362 | |
363 | unsafe { |
364 | let str_as_utf8: *mut i8 = ffi::g_ucs4_to_utf8( |
365 | str:str.as_ref().as_ptr().cast::<u32>(), |
366 | str.as_ref().len() as _, |
367 | &mut items_read, |
368 | &mut items_written, |
369 | &mut error, |
370 | ); |
371 | |
372 | debug_assert!( |
373 | error.is_null(), |
374 | "Rust `char` should always be convertible to UTF-8" |
375 | ); |
376 | |
377 | GString::from_glib_full_num(ptr:str_as_utf8, num:items_written as usize) |
378 | } |
379 | } |
380 | |
381 | #[doc (alias = "g_utf8_casefold" )] |
382 | #[doc (alias = "utf8_casefold" )] |
383 | pub fn casefold(str: impl AsRef<str>) -> GString { |
384 | unsafe { |
385 | let str: *mut i8 = ffi::g_utf8_casefold(str:str.as_ref().as_ptr().cast(), str.as_ref().len() as isize); |
386 | |
387 | from_glib_full(ptr:str) |
388 | } |
389 | } |
390 | |
391 | #[doc (alias = "g_utf8_normalize" )] |
392 | #[doc (alias = "utf8_normalize" )] |
393 | pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString { |
394 | unsafe { |
395 | let str: *mut i8 = ffi::g_utf8_normalize( |
396 | str:str.as_ref().as_ptr().cast(), |
397 | str.as_ref().len() as isize, |
398 | mode:mode.into_glib(), |
399 | ); |
400 | |
401 | from_glib_full(ptr:str) |
402 | } |
403 | } |
404 | |
405 | #[cfg (test)] |
406 | mod tests { |
407 | #[test ] |
408 | fn convert_ascii() { |
409 | assert!(super::convert(b"Hello" , "utf-8" , "ascii" ).is_ok()); |
410 | assert!(super::convert(b"He \xaallo" , "utf-8" , "ascii" ).is_err()); |
411 | assert_eq!( |
412 | super::convert_with_fallback(b"H \xc3\xa9llo" , "ascii" , "utf-8" , crate::NONE_STR) |
413 | .unwrap() |
414 | .0 |
415 | .as_slice(), |
416 | b"H \\u00e9llo" |
417 | ); |
418 | assert_eq!( |
419 | super::convert_with_fallback(b"H \xc3\xa9llo" , "ascii" , "utf-8" , Some("_" )) |
420 | .unwrap() |
421 | .0 |
422 | .as_slice(), |
423 | b"H_llo" |
424 | ); |
425 | } |
426 | #[test ] |
427 | fn iconv() { |
428 | let mut conv = super::IConv::new("utf-8" , "ascii" ).unwrap(); |
429 | assert!(conv.convert(b"Hello" ).is_ok()); |
430 | assert!(conv.convert(b"He \xaallo" ).is_err()); |
431 | assert!(super::IConv::new("utf-8" , "badcharset123456789" ).is_none()); |
432 | } |
433 | #[test ] |
434 | fn filename_charsets() { |
435 | let _ = super::filename_charsets(); |
436 | } |
437 | |
438 | #[test ] |
439 | fn utf8_and_utf32() { |
440 | let utf32 = ['A' , 'b' , '🤔' ]; |
441 | let utf8 = super::utf32_to_utf8(utf32); |
442 | assert_eq!(utf8, "Ab🤔" ); |
443 | |
444 | let utf8 = "🤔 ț" ; |
445 | let utf32 = super::utf8_to_utf32(utf8); |
446 | assert_eq!(utf32.as_slice(), &['🤔' , ' ' , 'ț' ]); |
447 | } |
448 | } |
449 | |