8000 Implement fsencode/fsdecode for FsPath · RustPython/RustPython@8ed29d1 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8ed29d1

Browse files
committed
Implement fsencode/fsdecode for FsPath
1 parent ffa99f7 commit 8ed29d1

File tree

11 files changed

+111
-82
lines changed

11 files changed

+111
-82
lines changed

Lib/test/test_socket.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1578,7 +1578,6 @@ def test_getnameinfo(self):
15781578
# only IP addresses are allowed
15791579
self.assertRaises(OSError, socket.getnameinfo, ('mail.python.org',0), 0)
15801580

1581-
@unittest.expectedFailureIf(sys.platform != "darwin", "TODO: RUSTPYTHON; socket.gethostbyname_ex")
15821581
@unittest.skipUnless(support.is_resource_enabled('network'),
15831582
'network is not enabled')
15841583
def test_idna(self):
@@ -5519,8 +5518,6 @@ def testBytesAddr(self):
55195518
self.addCleanup(os_helper.unlink, path)
55205519
self.assertEqual(self.sock.getsockname(), path)
55215520

5522-
# TODO: RUSTPYTHON, surrogateescape
5523-
@unittest.expectedFailure
55245521
def testSurrogateescapeBind(self):
55255522
# Test binding to a valid non-ASCII pathname, with the
55265523
# non-ASCII bytes supplied using surrogateescape encoding.

stdlib/src/socket.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -930,10 +930,15 @@ mod _socket {
930930
match family {
931931
#[cfg(unix)]
932932
c::AF_UNIX => {
933+
use crate::vm::function::ArgStrOrBytesLike;
933934
use std::os::unix::ffi::OsStrExt;
934-
let buf = crate::vm::function::ArgStrOrBytesLike::try_from_object(vm, addr)?;
935-
let path = &*buf.borrow_bytes();
936-
socket2::SockAddr::unix(ffi::OsStr::from_bytes(path))
935+
let buf = ArgStrOrBytesLike::try_from_object(vm, addr)?;
936+
let bytes = &*buf.borrow_bytes();
937+
let path = match &buf {
938+
ArgStrOrBytesLike::Buf(_) => ffi::OsStr::from_bytes(bytes).into(),
939+
ArgStrOrBytesLike::Str(s) => vm.fsencode(s)?,
940+
};
941+
socket2::SockAddr::unix(path)
937942
.map_err(|_| vm.new_os_error("AF_UNIX path too long".to_owned()).into())
938943
}
939944
c::AF_INET => {
@@ -1704,7 +1709,7 @@ mod _socket {
17041709
let path = ffi::OsStr::as_bytes(addr.as_pathname().unwrap_or("".as_ref()).as_ref());
17051710
let nul_pos = memchr::memchr(b'\0', path).unwrap_or(path.len());
17061711
let path = ffi::OsStr::from_bytes(&path[..nul_pos]);
1707-
return vm.ctx.new_str(path.to_string_lossy()).into();
1712+
return vm.fsdecode(path).into();
17081713
}
17091714
// TODO: support more address families
17101715
(String::new(), 0).to_pyobject(vm)

vm/src/function/fspath.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::{
55
function::PyStr,
66
protocol::PyBuffer,
77
};
8-
use std::{ffi::OsStr, path::PathBuf};
8+
use std::{borrow::Cow, ffi::OsStr, path::PathBuf};
99

1010
#[derive(Clone)]
1111
pub enum FsPath {
@@ -58,15 +58,11 @@ impl FsPath {
5858
})
5959
}
6060

61-
pub fn as_os_str(&self, vm: &VirtualMachine) -> PyResult<&OsStr> {
61+
pub fn as_os_str(&self, vm: &VirtualMachine) -> PyResult<Cow<'_, OsStr>> {
6262
// TODO: FS encodings
6363
match self {
64-
FsPath::Str(s) => {
65-
// XXX RUSTPYTHON: this is sketchy on windows; it's not guaranteed that its
66-
// OsStr encoding will always be compatible with WTF-8.
67-
Ok(unsafe { OsStr::from_encoded_bytes_unchecked(s.as_wtf8().as_bytes()) })
68-
}
69-
FsPath::Bytes(b) => Self::bytes_as_osstr(b.as_bytes(), vm),
64+
FsPath::Str(s) => vm.fsencode(s),
65+
FsPath::Bytes(b) => Self::bytes_as_osstr(b.as_bytes(), vm).map(Cow::Borrowed),
7066
}
7167
}
7268

vm/src/ospath.rs

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -21,28 +21,14 @@ pub(super) enum OutputMode {
2121
}
2222

2323
impl OutputMode {
24-
pub(super) fn process_path(self, path: impl Into<PathBuf>, vm: &VirtualMachine) -> PyResult {
25-
fn inner(mode: OutputMode, path: PathBuf, vm: &VirtualMachine) -> PyResult {
26-
let path_as_string = |p: PathBuf| {
27-
p.into_os_string().into_string().map_err(|_| {
28-
vm.new_unicode_decode_error(
29-
"Can't convert OS path to valid UTF-8 string".into(),
30-
)
31-
})
32-
};
24+
pub(super) fn process_path(self, path: impl Into<PathBuf>, vm: &VirtualMachine) -> PyObjectRef {
25+
fn inner(mode: OutputMode, path: PathBuf, vm: &VirtualMachine) -> PyObjectRef {
3326
match mode {
34-
OutputMode::String => path_as_string(path).map(|s| vm.ctx.new_str(s).into()),
35-
OutputMode::Bytes => {
36-
#[cfg(any(unix, target_os = "wasi"))]
37-
{
38-
use rustpython_common::os::ffi::OsStringExt;
39-
Ok(vm.ctx.new_bytes(path.into_os_string().into_vec()).into())
40-
}
41-
#[cfg(windows)]
42-
{
43-
path_as_string(path).map(|s| vm.ctx.new_bytes(s.into_bytes()).into())
44-
}
45-
}
27+
OutputMode::String => vm.fsdecode(path).into(),
28+
OutputMode::Bytes => vm
29+
.ctx
30+
.new_bytes(path.into_os_string().into_encoded_bytes())
31+
.into(),
FEE1
4632
}
4733
}
4834
inner(self, path.into(), vm)
@@ -59,7 +45,7 @@ impl OsPath {
5945
}
6046

6147
pub(crate) fn from_fspath(fspath: FsPath, vm: &VirtualMachine) -> PyResult<OsPath> {
62-
let path = fspath.as_os_str(vm)?.to_owned();
48+
let path = fspath.as_os_str(vm)?.into_owned();
6349
let mode = match fspath {
6450
FsPath::Str(_) => OutputMode::String,
6551
FsPath::Bytes(_) => OutputMode::Bytes,
@@ -88,7 +74,7 @@ impl OsPath {
8874
widestring::WideCString::from_os_str(&self.path).map_err(|err| err.to_pyexception(vm))
8975
}
9076

91-
pub fn filename(&self, vm: &VirtualMachine) -> PyResult {
77+
pub fn filename(&self, vm: &VirtualMachine) -> PyObjectRef {
9278
self.mode.process_path(self.path.clone(), vm)
9379
}
9480
}
@@ -133,7 +119,7 @@ impl From<OsPath> for OsPathOrFd {
133119
impl OsPathOrFd {
134120
pub fn filename(&self, vm: &VirtualMachine) -> PyObjectRef {
135121
match self {
136-
OsPathOrFd::Path(path) => path.filename(vm).unwrap_or_else(|_| vm.ctx.none()),
122+
OsPathOrFd::Path(path) => path.filename(vm),
137123
OsPathOrFd::Fd(fd) => vm.ctx.new_int(*fd).into(),
138124
}
139125
}

vm/src/stdlib/codecs.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,12 @@ mod _codecs {
312312

313313
#[pyfunction]
314314
fn utf_8_encode(args: EncodeArgs, vm: &VirtualMachine) -> EncodeResult {
315-
if args.s.is_utf8() {
315+
if args.s.is_utf8()
316+
|| args
317+
.errors
318+
.as_ref()
319+
.is_some_and(|s| s.is(identifier!(vm, surrogatepass)))
320+
{
316321
return Ok((args.s.as_bytes().to_vec(), args.s.byte_len()));
317322
}
318323
do_codec!(utf8::encode, args, vm)

vm/src/stdlib/io.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2225,7 +2225,7 @@ mod _io {
22252225
*data = None;
22262226

22272227
let encoding = match args.encoding {
2228-
None if vm.state.settings.utf8_mode > 0 => PyStr::from("utf-8").into_ref(&vm.ctx),
2228+
None if vm.state.settings.utf8_mode > 0 => identifier!(vm, utf_8).to_owned(),
22292229
Some(enc) if enc.as_wtf8() != "locale" => enc,
22302230
_ => {
22312231
// None without utf8_mode or "locale" encoding
@@ -2238,7 +2238,7 @@ mod _io {
22382238

22392239
let errors = args
22402240
.errors
2241-
.unwrap_or_else(|| PyStr::from("strict").into_ref(&vm.ctx));
2241+
.unwrap_or_else(|| identifier!(vm, strict).to_owned());
22422242

22432243
let has_read1 = vm.get_attribute_opt(buffer.clone(), "read1")?.is_some();
22442244
let seekable = vm.call_method(&buffer, "seekable", ())?.try_to_bool(vm)?;

vm/src/stdlib/nt.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ pub(crate) mod module {
246246
.as_ref()
247247
.canonicalize()
248248
.map_err(|e| e.to_pyexception(vm))?;
249-
path.mode.process_path(real, vm)
249+
Ok(path.mode.process_path(real, vm))
250250
}
251251

252252
#[pyfunction]
@@ -279,7 +279,7 @@ pub(crate) mod module {
279279
}
280280
}
281281
let buffer = widestring::WideCString::from_vec_truncate(buffer);
282-
path.mode.process_path(buffer.to_os_string(), vm)
282+
Ok(path.mode.process_path(buffer.to_os_string(), vm))
283283
}
284284

285285
#[pyfunction]
@@ -294,7 +294,7 @@ pub(crate) mod module {
294294
return Err(errno_err(vm));
295295
}
296296
let buffer = widestring::WideCString::from_vec_truncate(buffer);
297-
path.mode.process_path(buffer.to_os_string(), vm)
297+
Ok(path.mode.process_path(buffer.to_os_string(), vm))
298298
}
299299

300300
#[pyfunction]

vm/src/stdlib/os.rs

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ pub(super) mod _os {
332332
};
333333
dir_iter
334334
.map(|entry| match entry {
335-
Ok(entry_path) => path.mode.process_path(entry_path.file_name(), vm),
335+
Ok(entry_path) => Ok(path.mode.process_path(entry_path.file_name(), vm)),
336336
Err(err) => Err(IOErrorBuilder::with_filename(&err, path.clone(), vm)),
337337
})
338338
.collect::<PyResult<_>>()?
@@ -352,22 +352,18 @@ pub(super) mod _os {
352352
let mut dir =
353353
nix::dir::Dir::from_fd(new_fd).map_err(|e| e.into_pyexception(vm))?;
354354
dir.iter()
355-
.filter_map(|entry| {
356-
entry
357-
.map_err(|e| e.into_pyexception(vm))
358-
.and_then(|entry| {
359-
let fname = entry.file_name().to_bytes();
360-
Ok(match fname {
361-
b"." | b".." => None,
362-
_ => Some(
363-
OutputMode::String
364-
.process_path(ffi::OsStr::from_bytes(fname), vm)?,
365-
),
366-
})
367-
})
368-
.transpose()
355+
.filter_map_ok(|entry| {
356+
let fname = entry.file_name().to_bytes();
357+
match fname {
358+
b"." | b".." => None,
359+
_ => Some(
360+
OutputMode::String
361+
.process_path(ffi::OsStr::from_bytes(fname), vm),
362+
),
363+
}
369364
})
370-
.collect::<PyResult<_>>()?
365+
.collect::<Result<_, _>>()
366+
.map_err(|e| e.into_pyexception(vm))?
371367
}
372368
}
373369
};
@@ -429,7 +425,7 @@ pub(super) mod _os {
429425
let [] = dir_fd.0;
430426
let path =
431427
fs::read_link(&path).map_err(|err| IOErrorBuilder::with_filename(&err, path, vm))?;
432-
mode.process_path(path, vm)
428+
Ok(mode.process_path(path, vm))
433429
}
434430

435431
#[pyattr]
@@ -452,12 +448,12 @@ pub(super) mod _os {
452448
impl DirEntry {
453449
#[pygetset]
454450
fn name(&self, vm: &VirtualMachine) -> PyResult {
455-
self.mode.process_path(&self.file_name, vm)
451+
Ok(self.mode.process_path(&self.file_name, vm))
456452
}
457453

458454
#[pygetset]
459455
fn path(&self, vm: &VirtualMachine) -> PyResult {
460-
self.mode.process_path(&self.pathval, vm)
456+
Ok(self.mode.process_path(&self.pathval, vm))
461457
}
462458

463459
fn perform_on_metadata(
@@ -908,12 +904,12 @@ pub(super) mod _os {
908904

909905
#[pyfunction]
910906
fn getcwd(vm: &VirtualMachine) -> PyResult {
911-
OutputMode::String.process_path(curdir_inner(vm)?, vm)
907+
Ok(OutputMode::String.process_path(curdir_inner(vm)?, vm))
912908
}
913909

914910
#[pyfunction]
915911
fn getcwdb(vm: &VirtualMachine) -> PyResult {
916-
OutputMode::Bytes.process_path(curdir_inner(vm)?, vm)
912+
Ok(OutputMode::Bytes.process_path(curdir_inner(vm)?, vm))
917913
}
918914

919915
#[pyfunction]

vm/src/stdlib/sys.rs

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -458,21 +458,13 @@ mod sys {
458458
}
459459

460460
#[pyfunction]
461-
fn getfilesystemencoding(_vm: &VirtualMachine) -> String {
462-
// TODO: implement non-utf-8 mode.
463-
"utf-8".to_owned()
461+
fn getfilesystemencoding(vm: &VirtualMachine) -> PyStrRef {
462+
vm.fs_encoding().to_owned()
464463
}
465464

466-
#[cfg(not(windows))]
467465
#[pyfunction]
468-
fn getfilesystemencodeerrors(_vm: &VirtualMachine) -> String {
469-
"surrogateescape".to_owned()
470-
}
471-
472-
#[cfg(windows)]
473-
#[pyfunction]
474-
fn getfilesystemencodeerrors(_vm: &VirtualMachine) -> String {
475-
"surrogatepass".to_owned()
466+
fn getfilesystemencodeerrors(vm: &VirtualMachine) -> PyStrRef {
467+
vm.fs_encode_errors().to_owned()
476468
}
477469

478470
#[pyfunction]

vm/src/vm/context.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ pub struct Context {
5151
}
5252

5353
macro_rules! declare_const_name {
54-
($($name:ident,)*) => {
54+
($($name:ident$(: $s:literal)?,)*) => {
5555
#[derive(Debug, Clone, Copy)]
5656
#[allow(non_snake_case)]
5757
pub struct ConstName {
@@ -61,11 +61,13 @@ macro_rules! declare_const_name {
6161
impl ConstName {
6262
unsafe fn new(pool: &StringPool, typ: &PyTypeRef) -> Self {
6363
Self {
64-
$($name: unsafe { pool.intern(stringify!($name), typ.clone()) },)*
64+
$($name: unsafe { pool.intern(declare_const_name!(@string $name $($s)?), typ.clone()) },)*
6565
}
6666
}
6767
}
68-
}
68+
};
69+
(@string $name:ident) => { stringify!($name) };
70+
(@string $name:ident $string:literal) => { $string };
6971
}
7072

7173
declare_const_name! {
@@ -236,6 +238,15 @@ declare_const_name! {
236238
flush,
237239
close,
238240
WarningMessage,
241+
strict,
242+
ignore,
243+
replace,
244+
xmlcharrefreplace,
245+
backslashreplace,
246+
namereplace,
247+
surrogatepass,
248+
surrogateescape,
249+
utf_8: "utf-8",
239250
}
240251

241252
// Basic objects:

0 commit comments

Comments
 (0)
0