diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 7c7890cd6e..95517ead35 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -487,9 +487,9 @@ def isalpha_op_impl(x: ibis_types.Value): @scalar_op_compiler.register_unary_op(ops.isdigit_op) def isdigit_op_impl(x: ibis_types.Value): - # Based on docs, should include superscript/subscript-ed numbers - # Tests however pass only when set to Nd unicode class - return typing.cast(ibis_types.StringValue, x).re_search(r"^(\p{Nd})+$") + return typing.cast(ibis_types.StringValue, x).re_search( + r"^[\p{Nd}\x{00B9}\x{00B2}\x{00B3}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}]+$" + ) @scalar_op_compiler.register_unary_op(ops.isdecimal_op) diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py index 209bc87f9b..a720614892 100644 --- a/tests/system/small/operations/test_strings.py +++ b/tests/system/small/operations/test_strings.py @@ -324,13 +324,10 @@ def test_isalpha(weird_strings, weird_strings_pd): ) -@pytest.mark.skipif( - "dev" in pa.__version__, - # b/333484335 pyarrow is inconsistent on the behavior - reason="pyarrow dev version is inconsistent on isdigit behavior.", -) def test_isdigit(weird_strings, weird_strings_pd): - pd_result = weird_strings_pd.str.isdigit() + # check the behavior against normal pandas str, since pyarrow has a bug with superscripts/fractions b/333484335 + # astype object instead of str to support pd.NA + pd_result = weird_strings_pd.astype(object).str.isdigit() bf_result = weird_strings.str.isdigit().to_pandas() pd.testing.assert_series_equal(