1 | use crate::types::PyString; |
2 | use crate::{ffi, FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python, ToPyObject}; |
3 | use std::borrow::Cow; |
4 | use std::ffi::{OsStr, OsString}; |
5 | #[cfg (not(windows))] |
6 | use std::os::raw::c_char; |
7 | |
8 | impl ToPyObject for OsStr { |
9 | fn to_object(&self, py: Python<'_>) -> PyObject { |
10 | // If the string is UTF-8, take the quick and easy shortcut |
11 | if let Some(valid_utf8_path) = self.to_str() { |
12 | return valid_utf8_path.to_object(py); |
13 | } |
14 | |
15 | // All targets besides windows support the std::os::unix::ffi::OsStrExt API: |
16 | // https://doc.rust-lang.org/src/std/sys_common/mod.rs.html#59 |
17 | #[cfg (not(windows))] |
18 | { |
19 | #[cfg (target_os = "wasi" )] |
20 | let bytes = std::os::wasi::ffi::OsStrExt::as_bytes(self); |
21 | #[cfg (not(target_os = "wasi" ))] |
22 | let bytes = std::os::unix::ffi::OsStrExt::as_bytes(self); |
23 | |
24 | let ptr = bytes.as_ptr() as *const c_char; |
25 | let len = bytes.len() as ffi::Py_ssize_t; |
26 | unsafe { |
27 | // DecodeFSDefault automatically chooses an appropriate decoding mechanism to |
28 | // parse os strings losslessly (i.e. surrogateescape most of the time) |
29 | let pystring = ffi::PyUnicode_DecodeFSDefaultAndSize(ptr, len); |
30 | PyObject::from_owned_ptr(py, pystring) |
31 | } |
32 | } |
33 | |
34 | #[cfg (windows)] |
35 | { |
36 | let wstr: Vec<u16> = std::os::windows::ffi::OsStrExt::encode_wide(self).collect(); |
37 | |
38 | unsafe { |
39 | // This will not panic because the data from encode_wide is well-formed Windows |
40 | // string data |
41 | PyObject::from_owned_ptr( |
42 | py, |
43 | ffi::PyUnicode_FromWideChar(wstr.as_ptr(), wstr.len() as ffi::Py_ssize_t), |
44 | ) |
45 | } |
46 | } |
47 | } |
48 | } |
49 | |
50 | // There's no FromPyObject implementation for &OsStr because albeit possible on Unix, this would |
51 | // be impossible to implement on Windows. Hence it's omitted entirely |
52 | |
53 | impl FromPyObject<'_> for OsString { |
54 | fn extract(ob: &PyAny) -> PyResult<Self> { |
55 | let pystring: &PyString = ob.downcast()?; |
56 | |
57 | #[cfg (not(windows))] |
58 | { |
59 | // Decode from Python's lossless bytes string representation back into raw bytes |
60 | let fs_encoded_bytes = unsafe { |
61 | crate::Py::<crate::types::PyBytes>::from_owned_ptr( |
62 | ob.py(), |
63 | ffi::PyUnicode_EncodeFSDefault(pystring.as_ptr()), |
64 | ) |
65 | }; |
66 | |
67 | // Create an OsStr view into the raw bytes from Python |
68 | #[cfg (target_os = "wasi" )] |
69 | let os_str: &OsStr = std::os::wasi::ffi::OsStrExt::from_bytes( |
70 | fs_encoded_bytes.as_ref(ob.py()).as_bytes(), |
71 | ); |
72 | #[cfg (not(target_os = "wasi" ))] |
73 | let os_str: &OsStr = std::os::unix::ffi::OsStrExt::from_bytes( |
74 | fs_encoded_bytes.as_ref(ob.py()).as_bytes(), |
75 | ); |
76 | |
77 | Ok(os_str.to_os_string()) |
78 | } |
79 | |
80 | #[cfg (windows)] |
81 | { |
82 | // Take the quick and easy shortcut if UTF-8 |
83 | if let Ok(utf8_string) = pystring.to_str() { |
84 | return Ok(utf8_string.to_owned().into()); |
85 | } |
86 | |
87 | // Get an owned allocated wide char buffer from PyString, which we have to deallocate |
88 | // ourselves |
89 | let size = |
90 | unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), std::ptr::null_mut(), 0) }; |
91 | crate::err::error_on_minusone(ob.py(), size)?; |
92 | |
93 | let mut buffer = vec![0; size as usize]; |
94 | let bytes_read = |
95 | unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), buffer.as_mut_ptr(), size) }; |
96 | assert_eq!(bytes_read, size); |
97 | |
98 | // Copy wide char buffer into OsString |
99 | let os_string = std::os::windows::ffi::OsStringExt::from_wide(&buffer); |
100 | |
101 | Ok(os_string) |
102 | } |
103 | } |
104 | } |
105 | |
106 | impl IntoPy<PyObject> for &'_ OsStr { |
107 | #[inline ] |
108 | fn into_py(self, py: Python<'_>) -> PyObject { |
109 | self.to_object(py) |
110 | } |
111 | } |
112 | |
113 | impl ToPyObject for Cow<'_, OsStr> { |
114 | #[inline ] |
115 | fn to_object(&self, py: Python<'_>) -> PyObject { |
116 | (self as &OsStr).to_object(py) |
117 | } |
118 | } |
119 | |
120 | impl IntoPy<PyObject> for Cow<'_, OsStr> { |
121 | #[inline ] |
122 | fn into_py(self, py: Python<'_>) -> PyObject { |
123 | self.to_object(py) |
124 | } |
125 | } |
126 | |
127 | impl ToPyObject for OsString { |
128 | #[inline ] |
129 | fn to_object(&self, py: Python<'_>) -> PyObject { |
130 | (self as &OsStr).to_object(py) |
131 | } |
132 | } |
133 | |
134 | impl IntoPy<PyObject> for OsString { |
135 | fn into_py(self, py: Python<'_>) -> PyObject { |
136 | self.to_object(py) |
137 | } |
138 | } |
139 | |
140 | impl<'a> IntoPy<PyObject> for &'a OsString { |
141 | fn into_py(self, py: Python<'_>) -> PyObject { |
142 | self.to_object(py) |
143 | } |
144 | } |
145 | |
146 | #[cfg (test)] |
147 | mod tests { |
148 | use crate::{types::PyString, IntoPy, PyObject, Python, ToPyObject}; |
149 | use std::fmt::Debug; |
150 | use std::{ |
151 | borrow::Cow, |
152 | ffi::{OsStr, OsString}, |
153 | }; |
154 | |
155 | #[test ] |
156 | #[cfg (not(windows))] |
157 | fn test_non_utf8_conversion() { |
158 | Python::with_gil(|py| { |
159 | #[cfg (not(target_os = "wasi" ))] |
160 | use std::os::unix::ffi::OsStrExt; |
161 | #[cfg (target_os = "wasi" )] |
162 | use std::os::wasi::ffi::OsStrExt; |
163 | |
164 | // this is not valid UTF-8 |
165 | let payload = &[250, 251, 252, 253, 254, 255, 0, 255]; |
166 | let os_str = OsStr::from_bytes(payload); |
167 | |
168 | // do a roundtrip into Pythonland and back and compare |
169 | let py_str: PyObject = os_str.into_py(py); |
170 | let os_str_2: OsString = py_str.extract(py).unwrap(); |
171 | assert_eq!(os_str, os_str_2); |
172 | }); |
173 | } |
174 | |
175 | #[test ] |
176 | fn test_topyobject_roundtrip() { |
177 | Python::with_gil(|py| { |
178 | fn test_roundtrip<T: ToPyObject + AsRef<OsStr> + Debug>(py: Python<'_>, obj: T) { |
179 | let pyobject = obj.to_object(py); |
180 | let pystring: &PyString = pyobject.extract(py).unwrap(); |
181 | assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy()); |
182 | let roundtripped_obj: OsString = pystring.extract().unwrap(); |
183 | assert_eq!(obj.as_ref(), roundtripped_obj.as_os_str()); |
184 | } |
185 | let os_str = OsStr::new("Hello \0\n🐍" ); |
186 | test_roundtrip::<&OsStr>(py, os_str); |
187 | test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Borrowed(os_str)); |
188 | test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Owned(os_str.to_os_string())); |
189 | test_roundtrip::<OsString>(py, os_str.to_os_string()); |
190 | }); |
191 | } |
192 | |
193 | #[test ] |
194 | fn test_intopy_roundtrip() { |
195 | Python::with_gil(|py| { |
196 | fn test_roundtrip<T: IntoPy<PyObject> + AsRef<OsStr> + Debug + Clone>( |
197 | py: Python<'_>, |
198 | obj: T, |
199 | ) { |
200 | let pyobject = obj.clone().into_py(py); |
201 | let pystring: &PyString = pyobject.extract(py).unwrap(); |
202 | assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy()); |
203 | let roundtripped_obj: OsString = pystring.extract().unwrap(); |
204 | assert!(obj.as_ref() == roundtripped_obj.as_os_str()); |
205 | } |
206 | let os_str = OsStr::new("Hello \0\n🐍" ); |
207 | test_roundtrip::<&OsStr>(py, os_str); |
208 | test_roundtrip::<OsString>(py, os_str.to_os_string()); |
209 | test_roundtrip::<&OsString>(py, &os_str.to_os_string()); |
210 | }) |
211 | } |
212 | } |
213 | |