| 1 | // Take a look at the license at the top of the repository in the LICENSE file. |
| 2 | |
| 3 | use std::{fmt, io, os::raw::c_char, path::PathBuf, ptr}; |
| 4 | |
| 5 | use crate::{ffi, translate::*, ConvertError, Error, GString, NormalizeMode, Slice}; |
| 6 | |
| 7 | // rustdoc-stripper-ignore-next |
| 8 | /// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input |
| 9 | /// string. |
| 10 | #[derive (Debug)] |
| 11 | pub enum CvtError { |
| 12 | Convert(Error), |
| 13 | IllegalSequence { source: Error, offset: usize }, |
| 14 | } |
| 15 | |
| 16 | impl std::error::Error for CvtError { |
| 17 | fn source(&self) -> ::core::option::Option<&(dyn std::error::Error + 'static)> { |
| 18 | match self { |
| 19 | CvtError::Convert(err: &Error) => std::error::Error::source(self:err), |
| 20 | CvtError::IllegalSequence { source: &Error, .. } => Some(source), |
| 21 | } |
| 22 | } |
| 23 | } |
| 24 | |
| 25 | impl fmt::Display for CvtError { |
| 26 | fn fmt(&self, fmt: &mut fmt::Formatter) -> ::core::fmt::Result { |
| 27 | match self { |
| 28 | CvtError::Convert(err: &Error) => fmt::Display::fmt(self:err, f:fmt), |
| 29 | CvtError::IllegalSequence { source: &Error, offset: &usize } => { |
| 30 | write!(fmt, " {source} at offset {offset}" ) |
| 31 | } |
| 32 | } |
| 33 | } |
| 34 | } |
| 35 | |
| 36 | impl std::convert::From<Error> for CvtError { |
| 37 | fn from(err: Error) -> Self { |
| 38 | CvtError::Convert(err) |
| 39 | } |
| 40 | } |
| 41 | |
| 42 | impl CvtError { |
| 43 | #[inline ] |
| 44 | fn new(err: Error, bytes_read: usize) -> Self { |
| 45 | if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) { |
| 46 | Self::IllegalSequence { |
| 47 | source: err, |
| 48 | offset: bytes_read, |
| 49 | } |
| 50 | } else { |
| 51 | err.into() |
| 52 | } |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | #[doc (alias = "g_convert" )] |
| 57 | pub fn convert( |
| 58 | str_: &[u8], |
| 59 | to_codeset: impl IntoGStr, |
| 60 | from_codeset: impl IntoGStr, |
| 61 | ) -> Result<(Slice<u8>, usize), CvtError> { |
| 62 | assert!(str_.len() <= isize::MAX as usize); |
| 63 | let mut bytes_read = 0; |
| 64 | let mut bytes_written = 0; |
| 65 | let mut error = ptr::null_mut(); |
| 66 | let result = to_codeset.run_with_gstr(|to_codeset| { |
| 67 | from_codeset.run_with_gstr(|from_codeset| unsafe { |
| 68 | ffi::g_convert( |
| 69 | str_.as_ptr(), |
| 70 | str_.len() as isize, |
| 71 | to_codeset.to_glib_none().0, |
| 72 | from_codeset.to_glib_none().0, |
| 73 | &mut bytes_read, |
| 74 | &mut bytes_written, |
| 75 | &mut error, |
| 76 | ) |
| 77 | }) |
| 78 | }); |
| 79 | if result.is_null() { |
| 80 | Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read)) |
| 81 | } else { |
| 82 | let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) }; |
| 83 | Ok((slice, bytes_read)) |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | #[doc (alias = "g_convert_with_fallback" )] |
| 88 | pub fn convert_with_fallback( |
| 89 | str_: &[u8], |
| 90 | to_codeset: impl IntoGStr, |
| 91 | from_codeset: impl IntoGStr, |
| 92 | fallback: Option<impl IntoGStr>, |
| 93 | ) -> Result<(Slice<u8>, usize), CvtError> { |
| 94 | assert!(str_.len() <= isize::MAX as usize); |
| 95 | let mut bytes_read = 0; |
| 96 | let mut bytes_written = 0; |
| 97 | let mut error = ptr::null_mut(); |
| 98 | let result = to_codeset.run_with_gstr(|to_codeset| { |
| 99 | from_codeset.run_with_gstr(|from_codeset| { |
| 100 | fallback.run_with_gstr(|fallback| unsafe { |
| 101 | ffi::g_convert_with_fallback( |
| 102 | str_.as_ptr(), |
| 103 | str_.len() as isize, |
| 104 | to_codeset.to_glib_none().0, |
| 105 | from_codeset.to_glib_none().0, |
| 106 | fallback.to_glib_none().0, |
| 107 | &mut bytes_read, |
| 108 | &mut bytes_written, |
| 109 | &mut error, |
| 110 | ) |
| 111 | }) |
| 112 | }) |
| 113 | }); |
| 114 | if result.is_null() { |
| 115 | Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read)) |
| 116 | } else { |
| 117 | let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) }; |
| 118 | Ok((slice, bytes_read)) |
| 119 | } |
| 120 | } |
| 121 | |
| 122 | // rustdoc-stripper-ignore-next |
| 123 | /// A wrapper for [`std::io::Error`] that can hold an offset into an input string. |
| 124 | #[derive (Debug)] |
| 125 | pub enum IConvError { |
| 126 | Error(io::Error), |
| 127 | WithOffset { source: io::Error, offset: usize }, |
| 128 | } |
| 129 | |
| 130 | impl std::error::Error for IConvError { |
| 131 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
| 132 | match self { |
| 133 | IConvError::Error(err: &Error) => std::error::Error::source(self:err), |
| 134 | IConvError::WithOffset { source: &Error, .. } => Some(source), |
| 135 | } |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | impl fmt::Display for IConvError { |
| 140 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { |
| 141 | match self { |
| 142 | IConvError::Error(err: &Error) => fmt::Display::fmt(self:err, f:fmt), |
| 143 | IConvError::WithOffset { source: &Error, offset: &usize } => write!(fmt, " {source} at offset {offset}" ), |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | impl std::convert::From<io::Error> for IConvError { |
| 149 | fn from(err: io::Error) -> Self { |
| 150 | IConvError::Error(err) |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | #[derive (Debug)] |
| 155 | #[repr (transparent)] |
| 156 | #[doc (alias = "GIConv" )] |
| 157 | pub struct IConv(ffi::GIConv); |
| 158 | |
| 159 | unsafe impl Send for IConv {} |
| 160 | |
| 161 | impl IConv { |
| 162 | #[doc (alias = "g_iconv_open" )] |
| 163 | #[allow (clippy::unnecessary_lazy_evaluations)] |
| 164 | pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> { |
| 165 | let iconv = to_codeset.run_with_gstr(|to_codeset| { |
| 166 | from_codeset.run_with_gstr(|from_codeset| unsafe { |
| 167 | ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0) |
| 168 | }) |
| 169 | }); |
| 170 | (iconv as isize != -1).then(|| Self(iconv)) |
| 171 | } |
| 172 | #[doc (alias = "g_convert_with_iconv" )] |
| 173 | pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> { |
| 174 | assert!(str_.len() <= isize::MAX as usize); |
| 175 | let mut bytes_read = 0; |
| 176 | let mut bytes_written = 0; |
| 177 | let mut error = ptr::null_mut(); |
| 178 | let result = unsafe { |
| 179 | ffi::g_convert_with_iconv( |
| 180 | str_.as_ptr(), |
| 181 | str_.len() as isize, |
| 182 | self.0, |
| 183 | &mut bytes_read, |
| 184 | &mut bytes_written, |
| 185 | &mut error, |
| 186 | ) |
| 187 | }; |
| 188 | if result.is_null() { |
| 189 | Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read)) |
| 190 | } else { |
| 191 | let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) }; |
| 192 | Ok((slice, bytes_read)) |
| 193 | } |
| 194 | } |
| 195 | #[doc (alias = "g_iconv" )] |
| 196 | pub fn iconv( |
| 197 | &mut self, |
| 198 | inbuf: Option<&[u8]>, |
| 199 | outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>, |
| 200 | ) -> Result<(usize, usize, usize), IConvError> { |
| 201 | let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default(); |
| 202 | let mut inbytes_left = input_len; |
| 203 | let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default(); |
| 204 | let mut inbuf = inbuf |
| 205 | .map(|b| mut_override(b.as_ptr()) as *mut c_char) |
| 206 | .unwrap_or_else(ptr::null_mut); |
| 207 | let mut outbuf = outbuf |
| 208 | .map(|b| b.as_mut_ptr() as *mut c_char) |
| 209 | .unwrap_or_else(ptr::null_mut); |
| 210 | let conversions = unsafe { |
| 211 | ffi::g_iconv( |
| 212 | self.0, |
| 213 | &mut inbuf, |
| 214 | &mut inbytes_left, |
| 215 | &mut outbuf, |
| 216 | &mut outbytes_left, |
| 217 | ) |
| 218 | }; |
| 219 | if conversions as isize == -1 { |
| 220 | let err = io::Error::last_os_error(); |
| 221 | let code = err.raw_os_error().unwrap(); |
| 222 | if code == libc::EILSEQ || code == libc::EINVAL { |
| 223 | Err(IConvError::WithOffset { |
| 224 | source: err, |
| 225 | offset: input_len - inbytes_left, |
| 226 | }) |
| 227 | } else { |
| 228 | Err(err.into()) |
| 229 | } |
| 230 | } else { |
| 231 | Ok((conversions, inbytes_left, outbytes_left)) |
| 232 | } |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | impl Drop for IConv { |
| 237 | #[inline ] |
| 238 | fn drop(&mut self) { |
| 239 | unsafe { |
| 240 | ffi::g_iconv_close(self.0); |
| 241 | } |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | #[doc (alias = "g_get_filename_charsets" )] |
| 246 | #[doc (alias = "get_filename_charsets" )] |
| 247 | pub fn filename_charsets() -> (bool, Vec<GString>) { |
| 248 | let mut filename_charsets: *mut *const {unknown} = ptr::null_mut(); |
| 249 | unsafe { |
| 250 | let is_utf8 = ffi::g_get_filename_charsets(&mut filename_charsets); |
| 251 | ( |
| 252 | from_glib(val:is_utf8), |
| 253 | FromGlibPtrContainer::from_glib_none(ptr:filename_charsets), |
| 254 | ) |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | #[doc (alias = "g_filename_from_utf8" )] |
| 259 | pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> { |
| 260 | let mut bytes_read = 0; |
| 261 | let mut bytes_written = std::mem::MaybeUninit::uninit(); |
| 262 | let mut error = ptr::null_mut(); |
| 263 | let ret = utf8string.run_with_gstr(|utf8string| { |
| 264 | assert!(utf8string.len() <= isize::MAX as usize); |
| 265 | let len = utf8string.len() as isize; |
| 266 | unsafe { |
| 267 | ffi::g_filename_from_utf8( |
| 268 | utf8string.to_glib_none().0, |
| 269 | len, |
| 270 | &mut bytes_read, |
| 271 | bytes_written.as_mut_ptr(), |
| 272 | &mut error, |
| 273 | ) |
| 274 | } |
| 275 | }); |
| 276 | if error.is_null() { |
| 277 | Ok(unsafe { |
| 278 | ( |
| 279 | PathBuf::from_glib_full_num(ret, bytes_written.assume_init()), |
| 280 | bytes_read, |
| 281 | ) |
| 282 | }) |
| 283 | } else { |
| 284 | Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) }) |
| 285 | } |
| 286 | } |
| 287 | |
| 288 | #[doc (alias = "g_filename_to_utf8" )] |
| 289 | pub fn filename_to_utf8( |
| 290 | opsysstring: impl AsRef<std::path::Path>, |
| 291 | ) -> Result<(crate::GString, usize), CvtError> { |
| 292 | let path = opsysstring.as_ref().to_glib_none(); |
| 293 | let mut bytes_read = 0; |
| 294 | let mut bytes_written = std::mem::MaybeUninit::uninit(); |
| 295 | let mut error = ptr::null_mut(); |
| 296 | let ret = unsafe { |
| 297 | ffi::g_filename_to_utf8( |
| 298 | path.0, |
| 299 | path.1.as_bytes().len() as isize, |
| 300 | &mut bytes_read, |
| 301 | bytes_written.as_mut_ptr(), |
| 302 | &mut error, |
| 303 | ) |
| 304 | }; |
| 305 | if error.is_null() { |
| 306 | Ok(unsafe { |
| 307 | ( |
| 308 | GString::from_glib_full_num(ret, bytes_written.assume_init()), |
| 309 | bytes_read, |
| 310 | ) |
| 311 | }) |
| 312 | } else { |
| 313 | Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) }) |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | #[doc (alias = "g_locale_from_utf8" )] |
| 318 | pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> { |
| 319 | let mut bytes_read = 0; |
| 320 | let mut bytes_written = std::mem::MaybeUninit::uninit(); |
| 321 | let mut error = ptr::null_mut(); |
| 322 | let ret = utf8string.run_with_gstr(|utf8string| { |
| 323 | assert!(utf8string.len() <= isize::MAX as usize); |
| 324 | unsafe { |
| 325 | ffi::g_locale_from_utf8( |
| 326 | utf8string.as_ptr(), |
| 327 | utf8string.len() as isize, |
| 328 | &mut bytes_read, |
| 329 | bytes_written.as_mut_ptr(), |
| 330 | &mut error, |
| 331 | ) |
| 332 | } |
| 333 | }); |
| 334 | if error.is_null() { |
| 335 | Ok(unsafe { |
| 336 | ( |
| 337 | Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1), |
| 338 | bytes_read, |
| 339 | ) |
| 340 | }) |
| 341 | } else { |
| 342 | Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) }) |
| 343 | } |
| 344 | } |
| 345 | |
| 346 | #[doc (alias = "g_locale_to_utf8" )] |
| 347 | pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> { |
| 348 | let len = opsysstring.len() as isize; |
| 349 | let mut bytes_read = 0; |
| 350 | let mut bytes_written = std::mem::MaybeUninit::uninit(); |
| 351 | let mut error = ptr::null_mut(); |
| 352 | let ret = unsafe { |
| 353 | ffi::g_locale_to_utf8( |
| 354 | opsysstring.to_glib_none().0, |
| 355 | len, |
| 356 | &mut bytes_read, |
| 357 | bytes_written.as_mut_ptr(), |
| 358 | &mut error, |
| 359 | ) |
| 360 | }; |
| 361 | if error.is_null() { |
| 362 | Ok(unsafe { |
| 363 | ( |
| 364 | GString::from_glib_full_num(ret, bytes_written.assume_init()), |
| 365 | bytes_read, |
| 366 | ) |
| 367 | }) |
| 368 | } else { |
| 369 | Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) }) |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | #[doc (alias = "g_utf8_to_ucs4" )] |
| 374 | #[doc (alias = "g_utf8_to_ucs4_fast" )] |
| 375 | #[doc (alias = "utf8_to_ucs4" )] |
| 376 | pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> { |
| 377 | unsafe { |
| 378 | let mut items_written: i32 = 0; |
| 379 | |
| 380 | let str_as_utf32: *mut u32 = ffi::g_utf8_to_ucs4_fast( |
| 381 | str.as_ref().as_ptr().cast::<c_char>(), |
| 382 | str.as_ref().len() as _, |
| 383 | &mut items_written, |
| 384 | ); |
| 385 | |
| 386 | // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us |
| 387 | // invalid UTF-32 codepoints |
| 388 | Slice::from_glib_full_num(ptr:str_as_utf32, len:items_written as usize) |
| 389 | } |
| 390 | } |
| 391 | |
| 392 | #[doc (alias = "g_ucs4_to_utf8" )] |
| 393 | #[doc (alias = "ucs4_to_utf8" )] |
| 394 | pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString { |
| 395 | let mut items_read: i32 = 0; |
| 396 | let mut items_written: i32 = 0; |
| 397 | let mut error: *mut GError = ptr::null_mut(); |
| 398 | |
| 399 | unsafe { |
| 400 | let str_as_utf8: *mut {unknown} = ffi::g_ucs4_to_utf8( |
| 401 | str.as_ref().as_ptr().cast::<u32>(), |
| 402 | str.as_ref().len() as _, |
| 403 | &mut items_read, |
| 404 | &mut items_written, |
| 405 | &mut error, |
| 406 | ); |
| 407 | |
| 408 | debug_assert!( |
| 409 | error.is_null(), |
| 410 | "Rust `char` should always be convertible to UTF-8" |
| 411 | ); |
| 412 | |
| 413 | GString::from_glib_full_num(ptr:str_as_utf8, num:items_written as usize) |
| 414 | } |
| 415 | } |
| 416 | |
| 417 | #[doc (alias = "g_utf8_casefold" )] |
| 418 | #[doc (alias = "utf8_casefold" )] |
| 419 | pub fn casefold(str: impl AsRef<str>) -> GString { |
| 420 | unsafe { |
| 421 | let str: *mut {unknown} = ffi::g_utf8_casefold(str.as_ref().as_ptr().cast(), str.as_ref().len() as isize); |
| 422 | |
| 423 | from_glib_full(ptr:str) |
| 424 | } |
| 425 | } |
| 426 | |
| 427 | #[doc (alias = "g_utf8_normalize" )] |
| 428 | #[doc (alias = "utf8_normalize" )] |
| 429 | pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString { |
| 430 | unsafe { |
| 431 | let str: *mut {unknown} = ffi::g_utf8_normalize( |
| 432 | str.as_ref().as_ptr().cast(), |
| 433 | str.as_ref().len() as isize, |
| 434 | mode.into_glib(), |
| 435 | ); |
| 436 | |
| 437 | from_glib_full(ptr:str) |
| 438 | } |
| 439 | } |
| 440 | |
| 441 | #[cfg (test)] |
| 442 | mod tests { |
| 443 | #[test ] |
| 444 | fn convert_ascii() { |
| 445 | assert!(super::convert(b"Hello" , "utf-8" , "ascii" ).is_ok()); |
| 446 | assert!(super::convert(b"He \xaallo" , "utf-8" , "ascii" ).is_err()); |
| 447 | assert_eq!( |
| 448 | super::convert_with_fallback(b"H \xc3\xa9llo" , "ascii" , "utf-8" , crate::NONE_STR) |
| 449 | .unwrap() |
| 450 | .0 |
| 451 | .as_slice(), |
| 452 | b"H \\u00e9llo" |
| 453 | ); |
| 454 | assert_eq!( |
| 455 | super::convert_with_fallback(b"H \xc3\xa9llo" , "ascii" , "utf-8" , Some("_" )) |
| 456 | .unwrap() |
| 457 | .0 |
| 458 | .as_slice(), |
| 459 | b"H_llo" |
| 460 | ); |
| 461 | } |
| 462 | #[test ] |
| 463 | fn iconv() { |
| 464 | let mut conv = super::IConv::new("utf-8" , "ascii" ).unwrap(); |
| 465 | assert!(conv.convert(b"Hello" ).is_ok()); |
| 466 | assert!(conv.convert(b"He \xaallo" ).is_err()); |
| 467 | assert!(super::IConv::new("utf-8" , "badcharset123456789" ).is_none()); |
| 468 | } |
| 469 | #[test ] |
| 470 | fn filename_charsets() { |
| 471 | let _ = super::filename_charsets(); |
| 472 | } |
| 473 | |
| 474 | #[test ] |
| 475 | fn utf8_and_utf32() { |
| 476 | let utf32 = ['A' , 'b' , '🤔' ]; |
| 477 | let utf8 = super::utf32_to_utf8(utf32); |
| 478 | assert_eq!(utf8, "Ab🤔" ); |
| 479 | |
| 480 | let utf8 = "🤔 ț" ; |
| 481 | let utf32 = super::utf8_to_utf32(utf8); |
| 482 | assert_eq!(utf32.as_slice(), &['🤔' , ' ' , 'ț' ]); |
| 483 | } |
| 484 | } |
| 485 | |