From 4991be389a7cf08c92464b87b9dc653b372c03e3 Mon Sep 17 00:00:00 2001 From: "Kozlov, Alexey" Date: Wed, 5 May 2021 05:24:27 +0300 Subject: [PATCH 1/2] Fixes pack of failed tests --- sdc/datatypes/categorical/functions.py | 32 +++++++++++++++++-- sdc/datatypes/series/boxing.py | 28 ++++++++++++++-- sdc/tests/categorical/test_df_category.py | 3 +- sdc/tests/categorical/test_series_category.py | 4 ++- sdc/tests/indexes/test_int64_index.py | 4 +-- sdc/tests/test_series_ops.py | 1 - sdc/utilities/sdc_typing_utils.py | 2 ++ 7 files changed, 63 insertions(+), 11 deletions(-) diff --git a/sdc/datatypes/categorical/functions.py b/sdc/datatypes/categorical/functions.py index af166b085..060e7da9a 100644 --- a/sdc/datatypes/categorical/functions.py +++ b/sdc/datatypes/categorical/functions.py @@ -24,9 +24,11 @@ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** -from sdc.utilities.utils import sdc_overload_attribute +from sdc.utilities.utils import sdc_overload_attribute, sdc_overload +from numba.extending import intrinsic +from numba import types -from .types import CategoricalDtypeType +from .types import CategoricalDtypeType, Categorical @sdc_overload_attribute(CategoricalDtypeType, 'ordered') @@ -36,3 +38,29 @@ def pd_CategoricalDtype_categories_overload(self): def impl(self): return ordered return impl + + +@intrinsic +def _categorical_len(tyctx, arr_type): + ret_type = types.intp + + def codegen(context, builder, sig, args): + arr_val, = args + arr_info = context.make_helper(builder, arr_type, arr_val) + res = builder.load(arr_info._get_ptr_by_name('nitems')) + return res + + return ret_type(arr_type), codegen + + +@sdc_overload(len) +def pd_Categorical_len_overload(self): + if not isinstance(self, Categorical): + return None + + # Categorical use ArrayModel and don't expose be_type members + # hence we use intrinsic to access those fields. TO-DO: refactor + def impl(self): + return _categorical_len(self) + + return impl diff --git a/sdc/datatypes/series/boxing.py b/sdc/datatypes/series/boxing.py index 51e41d05e..95becac12 100644 --- a/sdc/datatypes/series/boxing.py +++ b/sdc/datatypes/series/boxing.py @@ -25,9 +25,13 @@ # ***************************************************************************** from numba.core.imputils import lower_constant -from numba.core import cgutils +from numba.core import cgutils, types from .types import SeriesType +from sdc.datatypes.indexes.positional_index_type import PositionalIndexType +from sdc.hiframes.boxing import _unbox_index_data +from sdc.extensions.indexes.range_index_ext import unbox_range_index +from sdc.datatypes.indexes.range_index_type import RangeIndexDataType @lower_constant(SeriesType) @@ -39,7 +43,21 @@ def constant_Series(context, builder, ty, pyval): """ series = cgutils.create_struct_proxy(ty)(context, builder) series.data = _constant_Series_data(context, builder, ty, pyval) - # TODO: index and name + + # TODO: index and name (this only handles PositionalIndexType(False) + # and repeats unboxing, need to refactor to support all indexes) + native_range_index = cgutils.create_struct_proxy(RangeIndexDataType)(context, builder) + native_range_index.start = context.get_constant(types.int64, pyval.index.start) + native_range_index.stop = context.get_constant(types.int64, pyval.index.stop) + native_range_index.step = context.get_constant(types.int64, pyval.index.step) + + range_index = cgutils.create_struct_proxy(ty.index.data)(context, builder) + range_index.data = native_range_index._getvalue() + + positional_index = cgutils.create_struct_proxy(PositionalIndexType(False))(context, builder) + positional_index.data = range_index._getvalue() + + series.index = positional_index._getvalue() return series._getvalue() @@ -52,7 +70,11 @@ def _constant_Series_data(context, builder, ty, pyval): from ..categorical.types import CategoricalDtypeType - if isinstance(ty.dtype, CategoricalDtypeType): + # TO-DO: this requires lower_constant to be implemented for other types + # like indices and so on, until that raise NotImplementedError + if (isinstance(ty.dtype, CategoricalDtypeType) + and ty.index is PositionalIndexType(False) + and ty.is_named is False): from ..categorical.boxing import constant_Categorical return constant_Categorical(context, builder, ty.data, pyval.array) diff --git a/sdc/tests/categorical/test_df_category.py b/sdc/tests/categorical/test_df_category.py index 1d47a3924..b3af22da7 100644 --- a/sdc/tests/categorical/test_df_category.py +++ b/sdc/tests/categorical/test_df_category.py @@ -38,6 +38,7 @@ from sdc.hiframes.pd_dataframe_type import DataFrameType from sdc.tests.test_utils import skip_numba_jit +from sdc.datatypes.indexes.positional_index_type import PositionalIndexType class DFCategoryTest(TestCase): @@ -54,7 +55,7 @@ def test_typeof(self): assert(isinstance(nb_type, DataFrameType)) assert(nb_type.columns == ('A',)) - assert(nb_type.index == types.none) + assert(nb_type.index == PositionalIndexType(False)) assert(nb_type.data[0].pd_dtype == CategoricalDtypeType(categories=[1, 2, 3], ordered=False)) assert(nb_type.data[0] == Categorical(CategoricalDtypeType(categories=[1, 2, 3], ordered=False))) diff --git a/sdc/tests/categorical/test_series_category.py b/sdc/tests/categorical/test_series_category.py index 9e6d69643..39385de9f 100644 --- a/sdc/tests/categorical/test_series_category.py +++ b/sdc/tests/categorical/test_series_category.py @@ -27,6 +27,7 @@ from sdc.tests.test_base import TestCase import pandas as pd +import numpy as np import numba as nb from numba import types @@ -35,6 +36,7 @@ CategoricalDtypeType, Categorical, ) +from sdc.datatypes.indexes.positional_index_type import PositionalIndexType class SeriesCategoryTest(TestCase): @@ -51,7 +53,7 @@ def test_typeof(self): assert(isinstance(nb_type, SeriesType)) assert(nb_type.dtype == CategoricalDtypeType(categories=[1, 2, 3], ordered=False)) - assert(nb_type.index == types.none) + assert(nb_type.index == PositionalIndexType(False)) assert(nb_type.data == Categorical(CategoricalDtypeType(categories=[1, 2, 3], ordered=False))) def test_unboxing(self): diff --git a/sdc/tests/indexes/test_int64_index.py b/sdc/tests/indexes/test_int64_index.py index 552e01f7b..27f518919 100644 --- a/sdc/tests/indexes/test_int64_index.py +++ b/sdc/tests/indexes/test_int64_index.py @@ -230,11 +230,9 @@ def test_impl(index, deep): result = sdc_func(index, deep) result_ref = test_impl(index, deep) pd.testing.assert_index_equal(result, result_ref) - # pandas uses ndarray views when copies index, so for python - # case check that data arrays share the same memory self.assertEqual( result._data is index._data, - result_ref._data.base is index._data + result_ref._data is index._data ) def test_int64_index_getitem_scalar(self): diff --git a/sdc/tests/test_series_ops.py b/sdc/tests/test_series_ops.py index 5dbda0d42..eccfb1d40 100644 --- a/sdc/tests/test_series_ops.py +++ b/sdc/tests/test_series_ops.py @@ -1062,7 +1062,6 @@ def test_impl(S1, S2, value): result_ref = test_impl(S1, scalar, fill_value) pd.testing.assert_series_equal(result, result_ref) - @unittest.expectedFailure # Numba issue with 1/0 is different (inf) than in Numpy (nan) def test_series_binop_floordiv_numeric(self): def test_impl(a, b, value): return a.floordiv(b, fill_value=value) diff --git a/sdc/utilities/sdc_typing_utils.py b/sdc/utilities/sdc_typing_utils.py index b1b6a8ebf..3c5c4219e 100644 --- a/sdc/utilities/sdc_typing_utils.py +++ b/sdc/utilities/sdc_typing_utils.py @@ -40,6 +40,7 @@ from sdc.str_arr_type import string_array_type from sdc.datatypes.indexes import * from sdc.str_arr_ext import StringArrayType +from sdc.datatypes.categorical.types import Categorical sdc_old_index_types = (types.Array, StringArrayType, ) @@ -65,6 +66,7 @@ sdc_pandas_df_column_types = ( types.Array, StringArrayType, + Categorical, ) class TypeChecker: From 59e8a0b24b7e6c70240fe245d323e8209e843f1f Mon Sep 17 00:00:00 2001 From: "Kozlov, Alexey" Date: Wed, 5 May 2021 16:49:36 +0300 Subject: [PATCH 2/2] Skipping floordiv test due to diff behavior in Stock vs Intel numpy --- sdc/tests/test_series_ops.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdc/tests/test_series_ops.py b/sdc/tests/test_series_ops.py index eccfb1d40..b8469b7b5 100644 --- a/sdc/tests/test_series_ops.py +++ b/sdc/tests/test_series_ops.py @@ -1062,6 +1062,8 @@ def test_impl(S1, S2, value): result_ref = test_impl(S1, scalar, fill_value) pd.testing.assert_series_equal(result, result_ref) + # See SAT-4111 for more details + @skip_numba_jit("numpy + mkl_umath 1.0 // 0 gives nan, not inf as stock numpy>=1.20") def test_series_binop_floordiv_numeric(self): def test_impl(a, b, value): return a.floordiv(b, fill_value=value)