8000 Add localeconv function to locale module by minhrongcon2000 · Pull Request #4558 · RustPython/RustPython · GitHub
[go: up one dir, main page]

Skip to content

Add localeconv function to locale module #4558

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
b3e39a2
Add localeconv function to locale module
minhrongcon2000 Feb 24, 2023
1ed1b29
Fix potential infinite loop
minhrongcon2000 Feb 24, 2023
7e9390c
Skip locale test
minhrongcon2000 Feb 24, 2023
b2c1e5f
Refactor and add platform def for localeconv
minhrongcon2000 Feb 24, 2023
0dd95e0
Add missing platform def on constant
minhrongcon2000 Feb 24, 2023
65f5036
Fix wrong format
minhrongcon2000 Feb 24, 2023
359c696
Fix platform dependent error build
minhrongcon2000 Feb 24, 2023
1533f7b
Add platform def at the top of locale module
8000 minhrongcon2000 Feb 24, 2023
09c750c
Refactor code
minhrongcon2000 Feb 24, 2023
844a30a
Fix mismatch typing
minhrongcon2000 Feb 24, 2023
26f103a
Use libc::c_char instead
minhrongcon2000 Feb 24, 2023
9374005
Fix skip test reason for locale
minhrongcon2000 Feb 24, 2023
757545f
Add setlocale function
minhrongcon2000 Feb 24, 2023
184f891
Merge commit '757545fe37c347c75a5734e0fa9ef9d13a3bcce8' into fix/add-…
minhrongcon2000 Feb 25, 2023
ecc21e8
Fix clippy requirements
minhrongcon2000 Feb 25, 2023
973d2b2
Fix rustfmt issues
minhrongcon2000 Feb 25, 2023
2d37933
Remove test_locale skip testcase
minhrongcon2000 Feb 25, 2023
082d1ef
Fix unittest
minhrongcon2000 Feb 25, 2023
f5730c4
Fix clippy issues
minhrongcon2000 Feb 25, 2023
31941a4
Clean up locale module code
minhrongcon2000 Feb 25, 2023
0c5ecb5
Remove setlocale test expected failure
minhrongcon2000 Feb 25, 2023
26f04e2
Skip format and strcol test case
minhrongcon2000 Feb 28, 2023
0268af5
Fix skpTest bug
minhrongcon2000 Feb 28, 2023
44a36af
Skip locale test for window
minhrongcon2000 Feb 28, 2023
9801409
Fix test locale skip convention
minhrongcon2000 Feb 28, 2023
fcfa35a
Revert original skip decorator and polish code
minhrongcon2000 Feb 28, 2023
a44a593
Allow test_strcoll_3303
minhrongcon2000 Feb 28, 2023
9e5be11
Expect failure instead of skip
minhrongcon2000 Feb 28, 2023
6291ca9
Update Lib/test/test_locale.py
youknowone Feb 28, 2023
38d2253
clean up and additional error handling for NulError
youknowone Feb 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions Lib/test/test_locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ def setUp(self):
is_emscripten or is_wasi,
"musl libc issue on Emscripten/WASI, bpo-46390"
)
@unittest.skip("TODO: RUSTPYTHON")
def test_strcoll_with_diacritic(self):
self.assertLess(locale.strcoll('à', 'b'), 0)

Expand All @@ -390,6 +391,7 @@ def test_strcoll_with_diacritic(self):
is_emscripten or is_wasi,
"musl libc issue on Emscripten/WASI, bpo-46390"
)
@unittest.skip("TODO: RUSTPYTHON")
def test_strxfrm_with_diacritic(self):
self.assertLess(locale.strxfrm('à'), locale.strxfrm('b'))

Expand Down Expand Up @@ -506,12 +508,11 @@ def test_japanese(self):


class TestMiscellaneous(unittest.TestCase):
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_defaults_UTF8(self):
# Issue #18378: on (at least) macOS setting LC_CTYPE to "UTF-8" is
# valid. Furthermore LC_CTYPE=UTF is used by the UTF-8 locale coercing
# during interpreter startup (on macOS).

import _locale
import os

Expand Down Expand Up @@ -544,6 +545,10 @@ def test_defaults_UTF8(self):

if orig_getlocale is not None:
_locale._getdefaultlocale = orig_getlocale

# TODO: RUSTPYTHON
if sys.platform == "win32":
test_defaults_UTF8 = unittest.expectedFailure(test_defaults_UTF8)

def test_getencoding(self):
# Invoke getencoding to make sure it does not cause exceptions.
Expand All @@ -560,13 +565,12 @@ def test_getpreferredencoding(self):
# If encoding non-empty, make sure it is valid
codecs.lookup(enc)

@unittest.skip("TODO: RustPython strcoll has not been implemented")
def test_strcoll_3303(self):
# test crasher from bug #3303
self.assertRaises(TypeError, locale.strcoll, "a", None)
self.assertRaises(TypeError, locale.strcoll, b"a", None)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_setlocale_category(self):
locale.setlocale(locale.LC_ALL)
locale.setlocale(locale.LC_TIME)
Expand All @@ -577,6 +581,10 @@ def test_setlocale_category(self):

# crasher from bug #7419
self.assertRaises(locale.Error, locale.setlocale, 12345)

# TODO: RUSTPYTHON
if sys.platform == "win32":
test_setlocale_category = unittest.expectedFailure(test_setlocale_category)

def test_getsetlocale_issue1813(self):
# Issue #1813: setting and getting the locale under a Turkish locale
Expand Down
1 change: 1 addition & 0 deletions Lib/test/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ def test_float__format__locale(self):
self.assertEqual(locale.format_string('%g', x, grouping=True), format(x, 'n'))
self.assertEqual(locale.format_string('%.10g', x, grouping=True), format(x, '.10n'))

@unittest.skip("TODO: RustPython format code n is not integrated with locale")
@run_with_locale('LC_NUMERIC', 'en_US.UTF8')
def test_int__format__locale(self):
# test locale support for __format__ code 'n' for integers
Expand Down
5 changes: 5 additions & 0 deletions stdlib/src/lib.rs
8000
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ mod dis;
mod gc;
mod hashlib;
mod json;
mod locale;
mod math;
#[cfg(unix)]
mod mmap;
Expand Down Expand Up @@ -159,5 +160,9 @@ pub fn get_module_inits() -> impl Iterator<Item = (Cow<'static, str>, StdlibInit
{
"_uuid" => uuid::make_module,
}
#[cfg(all(unix, not(any(target_os = "ios", target_os = "android"))))]
{
"_locale" => locale::make_module,
}
}
}
145 changes: 145 additions & 0 deletions stdlib/src/locale.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#[cfg(all(unix, not(any(target_os = "ios", target_os = "android"))))]
pub(crate) use _locale::make_module;

#[cfg(all(unix, not(any(target_os = "ios", target_os = "android"))))]
#[pymodule]
mod _locale {
use rustpython_vm::{
builtins::{PyDictRef, PyIntRef, PyListRef, PyStrRef, PyTypeRef},
function::OptionalArg,
PyObjectRef, PyResult, VirtualMachine,
};

#[pyattr]
use libc::{
ABDAY_1, ABDAY_2, ABDAY_3, ABDAY_4, ABDAY_5, ABDAY_6, ABDAY_7, ABMON_1, ABMON_10, ABMON_11,
ABMON_12, ABMON_2, ABMON_3, ABMON_4, ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9,
ALT_DIGITS, AM_STR, CODESET, CRNCYSTR, DAY_1, DAY_2, DAY_3, DAY_4, DAY_5, DAY_6, DAY_7,
D_FMT, D_T_FMT, ERA, ERA_D_FMT, ERA_D_T_FMT, ERA_T_FMT, LC_ALL, LC_COLLATE, LC_CTYPE,
LC_MESSAGES, LC_MONETARY, LC_NUMERIC, LC_TIME, MON_1, MON_10, MON_11, MON_12, MON_2, MON_3,
MON_4, MON_5, MON_6, MON_7, MON_8, MON_9, NOEXPR, PM_STR, RADIXCHAR, THOUSEP, T_FMT,
T_FMT_AMPM, YESEXPR,
};
Comment on lines +18 to +27
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not all of these constants are available on every platform. When I encountered this sort of problem in the past, I looked up each item individually in the libc repository to see which platforms support that item. (See #3840, for example.)

I realized back then (and I still maintain) that this process would prove to be tedious and error-prone. Perhaps I should look into using the autocfg crate instead, but that's ultimately beyond the scope of this PR.

Copy link
Contributor Author
@minhrongcon2000 minhrongcon2000 Feb 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried with Window locale.h this afternoon and found out that Window API uses different naming convention (prefix with W, if I remember). Hence, if we would like to fix it, I'm afraid that we need to implement our own locale interaction. I will do it in another PR once this module takes shape.

Right now, I put this module limit to unix system only!


use std::{
ffi::{CStr, CString},
ptr,
};

#[pyattr(name = "CHAR_MAX")]
fn char_max(vm: &VirtualMachine) -> PyIntRef {
vm.ctx.new_int(libc::c_char::MAX)
}

unsafe fn copy_grouping(group: *mut libc::c_char, vm: &VirtualMachine) -> PyListRef {
let mut group_vec: Vec<PyObjectRef> = Vec::new();
let mut ptr = group;

while ![0_i8, libc::c_char::MAX].contains(&*ptr) {
let val = vm.ctx.new_int(*ptr);
group_vec.push(val.into());
ptr = ptr.offset(1);
}
// https://github.com/python/cpython/blob/677320348728ce058fa3579017e985af74a236d4/Modules/_localemodule.c#L80
if !group_vec.is_empty() {
group_vec.push(vm.ctx.new_int(0i32).into());
}
vm.ctx.new_list(group_vec)
}

unsafe fn _parse_ptr_to_str(vm: &VirtualMachine, raw_ptr: *const libc::c_char) -> PyResult {
let slice = unsafe { CStr::from_ptr(raw_ptr) };
let cstr = slice
.to_str()
.expect("localeconv always return decodable string");

Ok(vm.new_pyobj(cstr))
}

#[pyattr(name = "Error", once)]
fn error(vm: &VirtualMachine) -> PyTypeRef {
vm.ctx.new_exception_type(
"locale",
"Error",
Some(vec![vm.ctx.exceptions.exception_type.to_owned()]),
)
}

#[pyfunction]
fn localeconv(vm: &VirtualMachine) -> PyResult<PyDictRef> {
let result = vm.ctx.new_dict();

unsafe {
let lc = libc::localeconv();

macro_rules! set_string_field {
($field:ident) => {{
result.set_item(stringify!($field), _parse_ptr_to_str(vm, (*lc).$field)?, vm)?
}};
}

macro_rules! set_int_field {
($field:ident) => {{
result.set_item(stringify!($field), vm.new_pyobj((*lc).$field), vm)?
}};
}

macro_rules! set_group_field {
($field:ident) => {{
result.set_item(
stringify!($field),
copy_grouping((*lc).$field, vm).into(),
vm,
)?
}};
}

set_group_field!(mon_grouping);
set_group_field!(grouping);
set_int_field!(int_frac_digits);
set_int_field!(frac_digits);
set_int_field!(p_cs_precedes);
set_int_field!(p_sep_by_space);
set_int_field!(n_cs_precedes);
set_int_field!(p_sign_posn);
set_int_field!(n_sign_posn);
set_string_field!(decimal_point);
set_string_field!(thousands_sep);
set_string_field!(int_curr_symbol);
set_string_field!(currency_symbol);
set_string_field!(mon_decimal_point);
set_string_field!(mon_thousands_sep);
set_int_field!(n_sep_by_space);
set_string_field!(positive_sign);
set_string_field!(negative_sign);
}
Ok(result)
}

#[derive(FromArgs)]
struct LocaleArgs {
#[pyarg(any)]
category: i32,
#[pyarg(any, optional)]
locale: OptionalArg<Option<PyStrRef>>,
}

#[pyfunction]
fn setlocale(args: LocaleArgs, vm: &VirtualMachine) -> PyResult {
unsafe {
let result = match args.locale.flatten() {
None => libc::setlocale(args.category, ptr::null()),
Some(l) => {
let l_str = CString::new(l.to_string()).expect("expect to be always converted");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not always successful

>>> _locale.setlocale(0, '\0')
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: embedded null character

let l_ptr = CStr::as_ptr(&l_str);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because CString implements std::ops::Deref<Target=CStr>, this is redundant.

libc::setlocale(args.category, lstr.as_ptr())

works exactly same way.

libc::setlocale(args.category, l_ptr)
}
};
if result.is_null() {
let error = error(vm);
return Err(vm.new_exception_msg(error, String::from("unsupported locale setting")));
}
_parse_ptr_to_str(vm, result)
}
}
}
0