diff --git a/Cargo.lock b/Cargo.lock index 67e0813b7b..ec6e5f0295 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,9 +62,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.95" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4" [[package]] name = "approx" @@ -1757,7 +1757,7 @@ checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.1", - "zerocopy 0.8.18", + "zerocopy 0.8.20", ] [[package]] @@ -1796,7 +1796,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a88e0da7a2c97baa202165137c158d0a2e824ac465d13d81046727b34cb247d3" dependencies = [ "getrandom 0.3.1", - "zerocopy 0.8.18", + "zerocopy 0.8.20", ] [[package]] @@ -2282,6 +2282,7 @@ dependencies = [ "itertools 0.14.0", "junction", "libc", + "libffi", "libloading", "log", "malachite-bigint", @@ -2421,9 +2422,9 @@ checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" [[package]] name = "serde" -version = "1.0.217" +version = "1.0.218" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60" dependencies = [ "serde_derive", ] @@ -2452,9 +2453,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.217" +version = "1.0.218" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" dependencies = [ "proc-macro2", "quote", @@ -2463,9 +2464,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.138" +version = "1.0.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" +checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6" dependencies = [ "itoa", "memchr", @@ -2937,9 +2938,9 @@ checksum = "623f59e6af2a98bdafeb93fa277ac8e1e40440973001ca15cf4ae1541cd16d56" [[package]] name = "unicode-ident" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" +checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" [[package]] name = "unicode-normalization" @@ -3449,11 +3450,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.18" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79386d31a42a4996e3336b0919ddb90f81112af416270cff95b5f5af22b839c2" +checksum = "dde3bb8c68a8f3f1ed4ac9221aad6b10cece3e60a8e2ea54a6a2dec806d0084c" dependencies = [ - "zerocopy-derive 0.8.18", + "zerocopy-derive 0.8.20", ] [[package]] @@ -3469,9 +3470,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.18" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76331675d372f91bf8d17e13afbd5fe639200b73d01f0fc748bb059f9cca2db7" +checksum = "eea57037071898bf96a6da35fd626f4f27e9cee3ead2a6c703cf09d472b2e700" dependencies = [ "proc-macro2", "quote", diff --git a/extra_tests/snippets/builtins_ctypes.py b/extra_tests/snippets/builtins_ctypes.py index 5bd6e5ef25..c5c563a48e 100644 --- a/extra_tests/snippets/builtins_ctypes.py +++ b/extra_tests/snippets/builtins_ctypes.py @@ -1,9 +1,29 @@ import os as _os, sys as _sys +import types as _types +from _ctypes import RTLD_LOCAL, RTLD_GLOBAL from _ctypes import sizeof from _ctypes import _SimpleCData +from _ctypes import CFuncPtr as _CFuncPtr + from struct import calcsize as _calcsize + +DEFAULT_MODE = RTLD_LOCAL +if _os.name == "posix" and _sys.platform == "darwin": + # On OS X 10.3, we use RTLD_GLOBAL as default mode + # because RTLD_LOCAL does not work at least on some + # libraries. OS X 10.3 is Darwin 7, so we check for + # that. + + if int(_os.uname().release.split('.')[0]) < 8: + DEFAULT_MODE = RTLD_GLOBAL + +from _ctypes import FUNCFLAG_CDECL as _FUNCFLAG_CDECL, \ + FUNCFLAG_PYTHONAPI as _FUNCFLAG_PYTHONAPI, \ + FUNCFLAG_USE_ERRNO as _FUNCFLAG_USE_ERRNO, \ + FUNCFLAG_USE_LASTERROR as _FUNCFLAG_USE_LASTERROR + def create_string_buffer(init, size=None): """create_string_buffer(aBytes) -> character array create_string_buffer(anInteger) -> character array @@ -131,3 +151,117 @@ class c_bool(_SimpleCData): # s = create_string_buffer(b'\000' * 32) assert i.value == 42 assert abs(f.value - 3.14) < 1e-06 + +if _os.name == "nt": + from _ctypes import LoadLibrary as _dlopen + from _ctypes import FUNCFLAG_STDCALL as _FUNCFLAG_STDCALL +elif _os.name == "posix": + from _ctypes import dlopen as _dlopen + +class CDLL(object): + """An instance of this class represents a loaded dll/shared + library, exporting functions using the standard C calling + convention (named 'cdecl' on Windows). + + The exported functions can be accessed as attributes, or by + indexing with the function name. Examples: + + .qsort -> callable object + ['qsort'] -> callable object + + Calling the functions releases the Python GIL during the call and + reacquires it afterwards. + """ + _func_flags_ = _FUNCFLAG_CDECL + _func_restype_ = c_int + # default values for repr + _name = '' + _handle = 0 + _FuncPtr = None + + def __init__(self, name, mode=DEFAULT_MODE, handle=None, + use_errno=False, + use_last_error=False, + winmode=None): + self._name = name + flags = self._func_flags_ + if use_errno: + flags |= _FUNCFLAG_USE_ERRNO + if use_last_error: + flags |= _FUNCFLAG_USE_LASTERROR + if _sys.platform.startswith("aix"): + """When the name contains ".a(" and ends with ")", + e.g., "libFOO.a(libFOO.so)" - this is taken to be an + archive(member) syntax for dlopen(), and the mode is adjusted. + Otherwise, name is presented to dlopen() as a file argument. + """ + if name and name.endswith(")") and ".a(" in name: + mode |= ( _os.RTLD_MEMBER | _os.RTLD_NOW ) + if _os.name == "nt": + if winmode is not None: + mode = winmode + else: + import nt + mode = 4096 + if '/' in name or '\\' in name: + self._name = nt._getfullpathname(self._name) + mode |= nt._LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR + + class _FuncPtr(_CFuncPtr): + _flags_ = flags + _restype_ = self._func_restype_ + self._FuncPtr = _FuncPtr + + if handle is None: + self._handle = _dlopen(self._name, mode) + else: + self._handle = handle + + def __repr__(self): + return "<%s '%s', handle %x at %#x>" % \ + (self.__class__.__name__, self._name, + (self._handle & (_sys.maxsize*2 + 1)), + id(self) & (_sys.maxsize*2 + 1)) + + def __getattr__(self, name): + if name.startswith('__') and name.endswith('__'): + raise AttributeError(name) + func = self.__getitem__(name) + setattr(self, name, func) + return func + + def __getitem__(self, name_or_ordinal): + func = self._FuncPtr((name_or_ordinal, self)) + if not isinstance(name_or_ordinal, int): + func.__name__ = name_or_ordinal + return func + +class LibraryLoader(object): + def __init__(self, dlltype): + self._dlltype = dlltype + + def __getattr__(self, name): + if name[0] == '_': + raise AttributeError(name) + try: + dll = self._dlltype(name) + except OSError: + raise AttributeError(name) + setattr(self, name, dll) + return dll + + def __getitem__(self, name): + return getattr(self, name) + + def LoadLibrary(self, name): + return self._dlltype(name) + + __class_getitem__ = classmethod(_types.GenericAlias) + +cdll = LibraryLoader(CDLL) + +if _os.name == "posix" or _sys.platform == "darwin": + pass +else: + libc = cdll.msvcrt + print("rand", libc.rand()) diff --git a/vm/Cargo.toml b/vm/Cargo.toml index acc645bb74..330a2beab5 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -100,9 +100,12 @@ uname = "0.1.1" rustyline = { workspace = true } which = "6" errno = "0.3" -libloading = "0.8" widestring = { workspace = true } +[target.'cfg(all(any(target_os = "linux", target_os = "macos", target_os = "windows"), not(any(target_env = "musl", target_env = "sgx"))))'.dependencies] +libffi = "3.2" +libloading = "0.8" + [target.'cfg(any(not(target_arch = "wasm32"), target_os = "wasi"))'.dependencies] num_cpus = "1.13.1" diff --git a/vm/src/stdlib/ctypes.rs b/vm/src/stdlib/ctypes.rs index 701094a375..99866bae70 100644 --- a/vm/src/stdlib/ctypes.rs +++ b/vm/src/stdlib/ctypes.rs @@ -37,7 +37,7 @@ pub(crate) mod _ctypes { use super::base::PyCSimple; use crate::builtins::PyTypeRef; use crate::class::StaticType; - use crate::function::Either; + use crate::function::{Either, OptionalArg}; use crate::stdlib::ctypes::library; use crate::{AsObject, PyObjectRef, PyResult, TryFromObject, VirtualMachine}; use crossbeam_utils::atomic::AtomicCell; @@ -180,12 +180,31 @@ pub(crate) mod _ctypes { } #[pyfunction(name = "LoadLibrary")] - fn load_library(name: String, vm: &VirtualMachine) -> PyResult { + fn load_library_windows( + name: String, + _load_flags: OptionalArg, + vm: &VirtualMachine, + ) -> PyResult { + // TODO: audit functions first + // TODO: load_flags + let cache = library::libcache(); + let mut cache_write = cache.write(); + let (id, _) = cache_write.get_or_insert_lib(&name, vm).unwrap(); + Ok(id) + } + + #[pyfunction(name = "dlopen")] + fn load_library_unix( + name: String, + _load_flags: OptionalArg, + vm: &VirtualMachine, + ) -> PyResult { // TODO: audit functions first + // TODO: load_flags let cache = library::libcache(); let mut cache_write = cache.write(); - let lib_ref = cache_write.get_or_insert_lib(&name, vm).unwrap(); - Ok(lib_ref.get_pointer()) + let (id, _) = cache_write.get_or_insert_lib(&name, vm).unwrap(); + Ok(id) } #[pyfunction(name = "FreeLibrary")] diff --git a/vm/src/stdlib/ctypes/base.rs b/vm/src/stdlib/ctypes/base.rs index a4147c62b2..5c5396be29 100644 --- a/vm/src/stdlib/ctypes/base.rs +++ b/vm/src/stdlib/ctypes/base.rs @@ -10,6 +10,31 @@ use num_traits::ToPrimitive; use rustpython_common::lock::PyRwLock; use std::fmt::Debug; +pub fn ffi_type_from_str(_type_: &str) -> Option { + match _type_ { + "c" => Some(libffi::middle::Type::u8()), + "u" => Some(libffi::middle::Type::u32()), + "b" => Some(libffi::middle::Type::i8()), + "B" => Some(libffi::middle::Type::u8()), + "h" => Some(libffi::middle::Type::i16()), + "H" => Some(libffi::middle::Type::u16()), + "i" => Some(libffi::middle::Type::i32()), + "I" => Some(libffi::middle::Type::u32()), + "l" => Some(libffi::middle::Type::i32()), + "L" => Some(libffi::middle::Type::u32()), + "q" => Some(libffi::middle::Type::i64()), + "Q" => Some(libffi::middle::Type::u64()), + "f" => Some(libffi::middle::Type::f32()), + "d" => Some(libffi::middle::Type::f64()), + "g" => Some(libffi::middle::Type::f64()), + "?" => Some(libffi::middle::Type::u8()), + "z" => Some(libffi::middle::Type::u64()), + "Z" => Some(libffi::middle::Type::u64()), + "P" => Some(libffi::middle::Type::u64()), + _ => None, + } +} + #[allow(dead_code)] fn set_primitive(_type_: &str, value: &PyObjectRef, vm: &VirtualMachine) -> PyResult { match _type_ { diff --git a/vm/src/stdlib/ctypes/function.rs b/vm/src/stdlib/ctypes/function.rs index a7ee07744b..7d8dc0386a 100644 --- a/vm/src/stdlib/ctypes/function.rs +++ b/vm/src/stdlib/ctypes/function.rs @@ -1,24 +1,176 @@ -use crate::PyObjectRef; +use crate::builtins::{PyStr, PyTupleRef, PyTypeRef}; +use crate::convert::ToPyObject; +use crate::function::FuncArgs; use crate::stdlib::ctypes::PyCData; +use crate::stdlib::ctypes::base::{PyCSimple, ffi_type_from_str}; +use crate::types::{Callable, Constructor}; +use crate::{Py, PyObjectRef, PyResult, VirtualMachine}; use crossbeam_utils::atomic::AtomicCell; +use libffi::middle::{Arg, Cif, CodePtr, Type}; +use libloading::Symbol; +use num_traits::ToPrimitive; use rustpython_common::lock::PyRwLock; -use std::ffi::c_void; +use std::fmt::Debug; + +// https://github.com/python/cpython/blob/4f8bb3947cfbc20f970ff9d9531e1132a9e95396/Modules/_ctypes/callproc.c#L15 #[derive(Debug)] pub struct Function { - _pointer: *mut c_void, - _arguments: Vec<()>, - _return_type: Box<()>, + // TODO: no protection from use-after-free + pointer: CodePtr, + cif: Cif, +} + +unsafe impl Send for Function {} +unsafe impl Sync for Function {} + +type FP = unsafe extern "C" fn(); + +impl Function { + pub unsafe fn load( + library: &libloading::Library, + function: &str, + args: &[PyObjectRef], + ret_type: &Option, + vm: &VirtualMachine, + ) -> PyResult { + // map each arg to a PyCSimple + let args = args + .iter() + .map(|arg| { + if let Some(data) = arg.downcast_ref::() { + Ok(ffi_type_from_str(&data._type_).unwrap()) + } else { + Err(vm.new_type_error("Expected a ctypes simple type".to_string())) + } + }) + .collect::>>()?; + let terminated = format!("{}\0", function); + let pointer: Symbol<'_, FP> = unsafe { + library + .get(terminated.as_bytes()) + .map_err(|err| err.to_string()) + .map_err(|err| vm.new_value_error(err))? + }; + let code_ptr = CodePtr(*pointer as *mut _); + let return_type = match ret_type { + // TODO: Fix this + Some(_t) => { + return Err(vm.new_not_implemented_error("Return type not implemented".to_string())); + } + None => Type::c_int(), + }; + let cif = Cif::new(args, return_type); + Ok(Function { + cif, + pointer: code_ptr, + }) + } + + pub unsafe fn call( + &self, + args: Vec, + vm: &VirtualMachine, + ) -> PyResult { + let args = args + .into_iter() + .map(|arg| { + if let Some(data) = arg.downcast_ref::() { + dbg!(&data); + todo!("HANDLE ARGUMENTS") + } else { + Err(vm.new_type_error("Expected a ctypes simple type".to_string())) + } + }) + .collect::>>()?; + // TODO: FIX return type + let result: i32 = unsafe { self.cif.call(self.pointer, &args) }; + Ok(vm.ctx.new_int(result).into()) + } } #[pyclass(module = "_ctypes", name = "CFuncPtr", base = "PyCData")] +#[derive(PyPayload)] pub struct PyCFuncPtr { - pub _name_: String, - pub _argtypes_: AtomicCell>, - pub _restype_: AtomicCell, - _handle: PyObjectRef, - _f: PyRwLock, + pub name: PyRwLock, + pub _flags_: AtomicCell, + // FIXME(arihant2math): This shouldn't be an option, setting the default as the none type should work + // This is a workaround for now and I'll fix it later + pub _restype_: PyRwLock>, + pub handler: PyObjectRef, +} + +impl Debug for PyCFuncPtr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PyCFuncPtr") + .field("name", &self.name) + .finish() + } } -#[pyclass] -impl PyCFuncPtr {} +impl Constructor for PyCFuncPtr { + type Args = (PyTupleRef, FuncArgs); + + fn py_new(_cls: PyTypeRef, (tuple, _args): Self::Args, vm: &VirtualMachine) -> PyResult { + let name = tuple + .first() + .ok_or(vm.new_type_error("Expected a tuple with at least 2 elements".to_string()))? + .downcast_ref::() + .ok_or(vm.new_type_error("Expected a string".to_string()))? + .to_string(); + let handler = tuple + .into_iter() + .nth(1) + .ok_or(vm.new_type_error("Expected a tuple with at least 2 elements".to_string()))? + .clone(); + Ok(Self { + _flags_: AtomicCell::new(0), + name: PyRwLock::new(name), + _restype_: PyRwLock::new(None), + handler, + } + .to_pyobject(vm)) + } +} + +impl Callable for PyCFuncPtr { + type Args = FuncArgs; + fn call(zelf: &Py, args: Self::Args, vm: &VirtualMachine) -> PyResult { + unsafe { + let handle = zelf.handler.get_attr("_handle", vm)?; + let handle = handle.try_int(vm)?.as_bigint().clone(); + let library_cache = crate::stdlib::ctypes::library::libcache().read(); + let library = library_cache + .get_lib( + handle + .to_usize() + .ok_or(vm.new_value_error("Invalid handle".to_string()))?, + ) + .ok_or_else(|| vm.new_value_error("Library not found".to_string()))?; + let inner_lib = library.lib.lock(); + let name = zelf.name.read(); + let res_type = zelf._restype_.read(); + let func = Function::load( + inner_lib.as_ref().unwrap(), + &name, + &args.args, + &res_type, + vm, + )?; + func.call(args.args, vm) + } + } +} + +#[pyclass(flags(BASETYPE), with(Callable, Constructor))] +impl PyCFuncPtr { + #[pygetset(magic)] + fn name(&self) -> String { + self.name.read().clone() + } + + #[pygetset(setter, magic)] + fn set_name(&self, name: String) { + *self.name.write() = name; + } +} diff --git a/vm/src/stdlib/ctypes/library.rs b/vm/src/stdlib/ctypes/library.rs index f777d26bc0..74a601a488 100644 --- a/vm/src/stdlib/ctypes/library.rs +++ b/vm/src/stdlib/ctypes/library.rs @@ -1,14 +1,13 @@ use crate::VirtualMachine; -use crossbeam_utils::atomic::AtomicCell; use libloading::Library; -use rustpython_common::lock::PyRwLock; +use rustpython_common::lock::{PyMutex, PyRwLock}; use std::collections::HashMap; use std::ffi::c_void; use std::fmt; use std::ptr::null; pub struct SharedLibrary { - lib: AtomicCell>, + pub(crate) lib: PyMutex>, } impl fmt::Debug for SharedLibrary { @@ -20,26 +19,13 @@ impl fmt::Debug for SharedLibrary { impl SharedLibrary { pub fn new(name: &str) -> Result { Ok(SharedLibrary { - lib: AtomicCell::new(Some(unsafe { Library::new(name)? })), + lib: PyMutex::new(unsafe { Some(Library::new(name)?) }), }) } - #[allow(dead_code)] - pub fn get_sym(&self, name: &str) -> Result<*mut c_void, String> { - if let Some(inner) = unsafe { &*self.lib.as_ptr() } { - unsafe { - inner - .get(name.as_bytes()) - .map(|f: libloading::Symbol<'_, *mut c_void>| *f) - .map_err(|err| err.to_string()) - } - } else { - Err("The library has been closed".to_string()) - } - } - pub fn get_pointer(&self) -> usize { - if let Some(l) = unsafe { &*self.lib.as_ptr() } { + let lib_lock = self.lib.lock(); + if let Some(l) = &*lib_lock { l as *const Library as usize } else { null::() as usize @@ -47,13 +33,12 @@ impl SharedLibrary { } pub fn is_closed(&self) -> bool { - unsafe { &*self.lib.as_ptr() }.is_none() + let lib_lock = self.lib.lock(); + lib_lock.is_none() } pub fn close(&self) { - let old = self.lib.take(); - self.lib.store(None); - drop(old); + *self.lib.lock() = None; } } @@ -83,7 +68,7 @@ impl ExternalLibs { &mut self, library_path: &str, _vm: &VirtualMachine, - ) -> Result<&SharedLibrary, libloading::Error> { + ) -> Result<(usize, &SharedLibrary), libloading::Error> { let nlib = SharedLibrary::new(library_path)?; let key = nlib.get_pointer(); @@ -98,7 +83,7 @@ impl ExternalLibs { } }; - Ok(self.libraries.get(&key).unwrap()) + Ok((key, self.libraries.get(&key).unwrap())) } pub fn drop_lib(&mut self, key: usize) { diff --git a/vm/src/stdlib/mod.rs b/vm/src/stdlib/mod.rs index 529a40e861..a3180c1c0f 100644 --- a/vm/src/stdlib/mod.rs +++ b/vm/src/stdlib/mod.rs @@ -37,7 +37,10 @@ pub mod posix; #[path = "posix_compat.rs"] pub mod posix; -#[cfg(any(target_family = "unix", target_family = "windows"))] +#[cfg(all( + any(target_os = "linux", target_os = "macos", target_os = "windows"), + not(any(target_env = "musl", target_env = "sgx")) +))] mod ctypes; #[cfg(windows)] pub(crate) mod msvcrt; @@ -126,7 +129,10 @@ pub fn get_module_inits() -> StdlibMap { "_winapi" => winapi::make_module, "winreg" => winreg::make_module, } - #[cfg(any(target_family = "unix", target_family = "windows"))] + #[cfg(all( + any(target_os = "linux", target_os = "macos", target_os = "windows"), + not(any(target_env = "musl", target_env = "sgx")) + ))] { "_ctypes" => ctypes::make_module, }