| 1 | use crate::conversion::IntoPyObject; |
| 2 | use crate::ffi_ptr_ext::FfiPtrExt; |
| 3 | use crate::instance::Bound; |
| 4 | use crate::types::any::PyAnyMethods; |
| 5 | use crate::types::PyString; |
| 6 | use crate::{ffi, FromPyObject, PyAny, PyObject, PyResult, Python}; |
| 7 | #[allow (deprecated)] |
| 8 | use crate::{IntoPy, ToPyObject}; |
| 9 | use std::borrow::Cow; |
| 10 | use std::convert::Infallible; |
| 11 | use std::ffi::{OsStr, OsString}; |
| 12 | |
| 13 | #[allow (deprecated)] |
| 14 | impl ToPyObject for OsStr { |
| 15 | #[inline ] |
| 16 | fn to_object(&self, py: Python<'_>) -> PyObject { |
| 17 | self.into_pyobject(py).unwrap().into_any().unbind() |
| 18 | } |
| 19 | } |
| 20 | |
| 21 | impl<'py> IntoPyObject<'py> for &OsStr { |
| 22 | type Target = PyString; |
| 23 | type Output = Bound<'py, Self::Target>; |
| 24 | type Error = Infallible; |
| 25 | |
| 26 | fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> { |
| 27 | // If the string is UTF-8, take the quick and easy shortcut |
| 28 | if let Some(valid_utf8_path) = self.to_str() { |
| 29 | return valid_utf8_path.into_pyobject(py); |
| 30 | } |
| 31 | |
| 32 | // All targets besides windows support the std::os::unix::ffi::OsStrExt API: |
| 33 | // https://doc.rust-lang.org/src/std/sys_common/mod.rs.html#59 |
| 34 | #[cfg (not(windows))] |
| 35 | { |
| 36 | #[cfg (target_os = "wasi" )] |
| 37 | let bytes = std::os::wasi::ffi::OsStrExt::as_bytes(self); |
| 38 | #[cfg (not(target_os = "wasi" ))] |
| 39 | let bytes = std::os::unix::ffi::OsStrExt::as_bytes(self); |
| 40 | |
| 41 | let ptr = bytes.as_ptr().cast(); |
| 42 | let len = bytes.len() as ffi::Py_ssize_t; |
| 43 | unsafe { |
| 44 | // DecodeFSDefault automatically chooses an appropriate decoding mechanism to |
| 45 | // parse os strings losslessly (i.e. surrogateescape most of the time) |
| 46 | Ok(ffi::PyUnicode_DecodeFSDefaultAndSize(ptr, len) |
| 47 | .assume_owned(py) |
| 48 | .downcast_into_unchecked::<PyString>()) |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | #[cfg (windows)] |
| 53 | { |
| 54 | let wstr: Vec<u16> = std::os::windows::ffi::OsStrExt::encode_wide(self).collect(); |
| 55 | |
| 56 | unsafe { |
| 57 | // This will not panic because the data from encode_wide is well-formed Windows |
| 58 | // string data |
| 59 | |
| 60 | Ok( |
| 61 | ffi::PyUnicode_FromWideChar(wstr.as_ptr(), wstr.len() as ffi::Py_ssize_t) |
| 62 | .assume_owned(py) |
| 63 | .downcast_into_unchecked::<PyString>(), |
| 64 | ) |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | impl<'py> IntoPyObject<'py> for &&OsStr { |
| 71 | type Target = PyString; |
| 72 | type Output = Bound<'py, Self::Target>; |
| 73 | type Error = Infallible; |
| 74 | |
| 75 | #[inline ] |
| 76 | fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> { |
| 77 | (*self).into_pyobject(py) |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | // There's no FromPyObject implementation for &OsStr because albeit possible on Unix, this would |
| 82 | // be impossible to implement on Windows. Hence it's omitted entirely |
| 83 | |
| 84 | impl FromPyObject<'_> for OsString { |
| 85 | fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult<Self> { |
| 86 | let pystring = ob.downcast::<PyString>()?; |
| 87 | |
| 88 | #[cfg (not(windows))] |
| 89 | { |
| 90 | // Decode from Python's lossless bytes string representation back into raw bytes |
| 91 | let fs_encoded_bytes = unsafe { |
| 92 | crate::Py::<crate::types::PyBytes>::from_owned_ptr( |
| 93 | ob.py(), |
| 94 | ffi::PyUnicode_EncodeFSDefault(pystring.as_ptr()), |
| 95 | ) |
| 96 | }; |
| 97 | |
| 98 | // Create an OsStr view into the raw bytes from Python |
| 99 | #[cfg (target_os = "wasi" )] |
| 100 | let os_str: &OsStr = |
| 101 | std::os::wasi::ffi::OsStrExt::from_bytes(fs_encoded_bytes.as_bytes(ob.py())); |
| 102 | #[cfg (not(target_os = "wasi" ))] |
| 103 | let os_str: &OsStr = |
| 104 | std::os::unix::ffi::OsStrExt::from_bytes(fs_encoded_bytes.as_bytes(ob.py())); |
| 105 | |
| 106 | Ok(os_str.to_os_string()) |
| 107 | } |
| 108 | |
| 109 | #[cfg (windows)] |
| 110 | { |
| 111 | use crate::types::string::PyStringMethods; |
| 112 | |
| 113 | // Take the quick and easy shortcut if UTF-8 |
| 114 | if let Ok(utf8_string) = pystring.to_cow() { |
| 115 | return Ok(utf8_string.into_owned().into()); |
| 116 | } |
| 117 | |
| 118 | // Get an owned allocated wide char buffer from PyString, which we have to deallocate |
| 119 | // ourselves |
| 120 | let size = |
| 121 | unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), std::ptr::null_mut(), 0) }; |
| 122 | crate::err::error_on_minusone(ob.py(), size)?; |
| 123 | |
| 124 | let mut buffer = vec![0; size as usize]; |
| 125 | let bytes_read = |
| 126 | unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), buffer.as_mut_ptr(), size) }; |
| 127 | assert_eq!(bytes_read, size); |
| 128 | |
| 129 | // Copy wide char buffer into OsString |
| 130 | let os_string = std::os::windows::ffi::OsStringExt::from_wide(&buffer); |
| 131 | |
| 132 | Ok(os_string) |
| 133 | } |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | #[allow (deprecated)] |
| 138 | impl IntoPy<PyObject> for &'_ OsStr { |
| 139 | #[inline ] |
| 140 | fn into_py(self, py: Python<'_>) -> PyObject { |
| 141 | self.into_pyobject(py).unwrap().into_any().unbind() |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | #[allow (deprecated)] |
| 146 | impl ToPyObject for Cow<'_, OsStr> { |
| 147 | #[inline ] |
| 148 | fn to_object(&self, py: Python<'_>) -> PyObject { |
| 149 | self.into_pyobject(py).unwrap().into_any().unbind() |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | #[allow (deprecated)] |
| 154 | impl IntoPy<PyObject> for Cow<'_, OsStr> { |
| 155 | #[inline ] |
| 156 | fn into_py(self, py: Python<'_>) -> PyObject { |
| 157 | self.into_pyobject(py).unwrap().into_any().unbind() |
| 158 | } |
| 159 | } |
| 160 | |
| 161 | impl<'py> IntoPyObject<'py> for Cow<'_, OsStr> { |
| 162 | type Target = PyString; |
| 163 | type Output = Bound<'py, Self::Target>; |
| 164 | type Error = Infallible; |
| 165 | |
| 166 | #[inline ] |
| 167 | fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> { |
| 168 | (*self).into_pyobject(py) |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | impl<'py> IntoPyObject<'py> for &Cow<'_, OsStr> { |
| 173 | type Target = PyString; |
| 174 | type Output = Bound<'py, Self::Target>; |
| 175 | type Error = Infallible; |
| 176 | |
| 177 | #[inline ] |
| 178 | fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> { |
| 179 | (&**self).into_pyobject(py) |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | #[allow (deprecated)] |
| 184 | impl ToPyObject for OsString { |
| 185 | #[inline ] |
| 186 | fn to_object(&self, py: Python<'_>) -> PyObject { |
| 187 | self.into_pyobject(py).unwrap().into_any().unbind() |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | #[allow (deprecated)] |
| 192 | impl IntoPy<PyObject> for OsString { |
| 193 | #[inline ] |
| 194 | fn into_py(self, py: Python<'_>) -> PyObject { |
| 195 | self.into_pyobject(py).unwrap().into_any().unbind() |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | impl<'py> IntoPyObject<'py> for OsString { |
| 200 | type Target = PyString; |
| 201 | type Output = Bound<'py, Self::Target>; |
| 202 | type Error = Infallible; |
| 203 | |
| 204 | #[inline ] |
| 205 | fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> { |
| 206 | self.as_os_str().into_pyobject(py) |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | #[allow (deprecated)] |
| 211 | impl IntoPy<PyObject> for &OsString { |
| 212 | #[inline ] |
| 213 | fn into_py(self, py: Python<'_>) -> PyObject { |
| 214 | self.into_pyobject(py).unwrap().into_any().unbind() |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | impl<'py> IntoPyObject<'py> for &OsString { |
| 219 | type Target = PyString; |
| 220 | type Output = Bound<'py, Self::Target>; |
| 221 | type Error = Infallible; |
| 222 | |
| 223 | #[inline ] |
| 224 | fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> { |
| 225 | self.as_os_str().into_pyobject(py) |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | #[cfg (test)] |
| 230 | mod tests { |
| 231 | use crate::types::{PyAnyMethods, PyString, PyStringMethods}; |
| 232 | use crate::{BoundObject, IntoPyObject, Python}; |
| 233 | use std::fmt::Debug; |
| 234 | use std::{ |
| 235 | borrow::Cow, |
| 236 | ffi::{OsStr, OsString}, |
| 237 | }; |
| 238 | |
| 239 | #[test ] |
| 240 | #[cfg (not(windows))] |
| 241 | fn test_non_utf8_conversion() { |
| 242 | Python::with_gil(|py| { |
| 243 | #[cfg (not(target_os = "wasi" ))] |
| 244 | use std::os::unix::ffi::OsStrExt; |
| 245 | #[cfg (target_os = "wasi" )] |
| 246 | use std::os::wasi::ffi::OsStrExt; |
| 247 | |
| 248 | // this is not valid UTF-8 |
| 249 | let payload = &[250, 251, 252, 253, 254, 255, 0, 255]; |
| 250 | let os_str = OsStr::from_bytes(payload); |
| 251 | |
| 252 | // do a roundtrip into Pythonland and back and compare |
| 253 | let py_str = os_str.into_pyobject(py).unwrap(); |
| 254 | let os_str_2: OsString = py_str.extract().unwrap(); |
| 255 | assert_eq!(os_str, os_str_2); |
| 256 | }); |
| 257 | } |
| 258 | |
| 259 | #[test ] |
| 260 | fn test_intopyobject_roundtrip() { |
| 261 | Python::with_gil(|py| { |
| 262 | fn test_roundtrip<'py, T>(py: Python<'py>, obj: T) |
| 263 | where |
| 264 | T: IntoPyObject<'py> + AsRef<OsStr> + Debug + Clone, |
| 265 | T::Error: Debug, |
| 266 | { |
| 267 | let pyobject = obj.clone().into_pyobject(py).unwrap().into_any(); |
| 268 | let pystring = pyobject.as_borrowed().downcast::<PyString>().unwrap(); |
| 269 | assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy()); |
| 270 | let roundtripped_obj: OsString = pystring.extract().unwrap(); |
| 271 | assert_eq!(obj.as_ref(), roundtripped_obj.as_os_str()); |
| 272 | } |
| 273 | let os_str = OsStr::new("Hello \0\n🐍" ); |
| 274 | test_roundtrip::<&OsStr>(py, os_str); |
| 275 | test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Borrowed(os_str)); |
| 276 | test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Owned(os_str.to_os_string())); |
| 277 | test_roundtrip::<OsString>(py, os_str.to_os_string()); |
| 278 | }); |
| 279 | } |
| 280 | } |
| 281 | |