IntelPython · kozlov-alexey · May 11, 2021 · May 5, 2021 · May 5, 2021
diff --git a/sdc/datatypes/categorical/functions.py b/sdc/datatypes/categorical/functions.py
@@ -24,9 +24,11 @@
 # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
-from sdc.utilities.utils import sdc_overload_attribute
+from sdc.utilities.utils import sdc_overload_attribute, sdc_overload
+from numba.extending import intrinsic
+from numba import types
 
-from .types import CategoricalDtypeType
+from .types import CategoricalDtypeType, Categorical
 
 
 @sdc_overload_attribute(CategoricalDtypeType, 'ordered')
@@ -36,3 +38,29 @@ def pd_CategoricalDtype_categories_overload(self):
     def impl(self):
         return ordered
     return impl
+
+
+@intrinsic
+def _categorical_len(tyctx, arr_type):
+    ret_type = types.intp
+
+    def codegen(context, builder, sig, args):
+        arr_val, = args
+        arr_info = context.make_helper(builder, arr_type, arr_val)
+        res = builder.load(arr_info._get_ptr_by_name('nitems'))
+        return res
+
+    return ret_type(arr_type), codegen
+
+
+@sdc_overload(len)
+def pd_Categorical_len_overload(self):
+    if not isinstance(self, Categorical):
+        return None
+
+    # Categorical use ArrayModel and don't expose be_type members
+    # hence we use intrinsic to access those fields. TO-DO: refactor
+    def impl(self):
+        return _categorical_len(self)
+
+    return impl
diff --git a/sdc/datatypes/series/boxing.py b/sdc/datatypes/series/boxing.py
@@ -25,9 +25,13 @@
 # *****************************************************************************
 
 from numba.core.imputils import lower_constant
-from numba.core import cgutils
+from numba.core import cgutils, types
 
 from .types import SeriesType
+from sdc.datatypes.indexes.positional_index_type import PositionalIndexType
+from sdc.hiframes.boxing import _unbox_index_data
+from sdc.extensions.indexes.range_index_ext import unbox_range_index
+from sdc.datatypes.indexes.range_index_type import RangeIndexDataType
 
 
 @lower_constant(SeriesType)
@@ -39,7 +43,21 @@ def constant_Series(context, builder, ty, pyval):
     """
     series = cgutils.create_struct_proxy(ty)(context, builder)
     series.data = _constant_Series_data(context, builder, ty, pyval)
-    # TODO: index and name
+
+    # TODO: index and name (this only handles PositionalIndexType(False)
+    # and repeats unboxing, need to refactor to support all indexes)
+    native_range_index = cgutils.create_struct_proxy(RangeIndexDataType)(context, builder)
+    native_range_index.start = context.get_constant(types.int64, pyval.index.start)
+    native_range_index.stop = context.get_constant(types.int64, pyval.index.stop)
+    native_range_index.step = context.get_constant(types.int64, pyval.index.step)
+
+    range_index = cgutils.create_struct_proxy(ty.index.data)(context, builder)
+    range_index.data = native_range_index._getvalue()
+
+    positional_index = cgutils.create_struct_proxy(PositionalIndexType(False))(context, builder)
+    positional_index.data = range_index._getvalue()
+
+    series.index = positional_index._getvalue()
     return series._getvalue()
 
 
@@ -52,7 +70,11 @@ def _constant_Series_data(context, builder, ty, pyval):
 
     from ..categorical.types import CategoricalDtypeType
 
-    if isinstance(ty.dtype, CategoricalDtypeType):
+    # TO-DO: this requires lower_constant to be implemented for other types
+    # like indices and so on, until that raise NotImplementedError
+    if (isinstance(ty.dtype, CategoricalDtypeType)
+            and ty.index is PositionalIndexType(False)
+            and ty.is_named is False):
         from ..categorical.boxing import constant_Categorical
         return constant_Categorical(context, builder, ty.data, pyval.array)
 

diff --git a/sdc/tests/categorical/test_df_category.py b/sdc/tests/categorical/test_df_category.py
@@ -38,6 +38,7 @@
 
 from sdc.hiframes.pd_dataframe_type import DataFrameType
 from sdc.tests.test_utils import skip_numba_jit
+from sdc.datatypes.indexes.positional_index_type import PositionalIndexType
 
 
 class DFCategoryTest(TestCase):
@@ -54,7 +55,7 @@ def test_typeof(self):
 
         assert(isinstance(nb_type, DataFrameType))
         assert(nb_type.columns == ('A',))
-        assert(nb_type.index == types.none)
+        assert(nb_type.index == PositionalIndexType(False))
        assert(nb_type.data[0].pd_dtype == CategoricalDtypeType(categories=[1, 2, 3], ordered=False))
         assert(nb_type.data[0] == Categorical(CategoricalDtypeType(categories=[1, 2, 3], ordered=False)))
 

diff --git a/sdc/tests/categorical/test_series_category.py b/sdc/tests/categorical/test_series_category.py
@@ -27,6 +27,7 @@
 from sdc.tests.test_base import TestCase
 
 import pandas as pd
+import numpy as np
 import numba as nb
 from numba import types
 
@@ -35,6 +36,7 @@
     CategoricalDtypeType,
     Categorical,
 )
+from sdc.datatypes.indexes.positional_index_type import PositionalIndexType
 
 
 class SeriesCategoryTest(TestCase):
@@ -51,7 +53,7 @@ def test_typeof(self):
 
         assert(isinstance(nb_type, SeriesType))
         assert(nb_type.dtype == CategoricalDtypeType(categories=[1, 2, 3], ordered=False))
-        assert(nb_type.index == types.none)
+        assert(nb_type.index == PositionalIndexType(False))
         assert(nb_type.data == Categorical(CategoricalDtypeType(categories=[1, 2, 3], ordered=False)))
 
     def test_unboxing(self):

diff --git a/sdc/tests/indexes/test_int64_index.py b/sdc/tests/indexes/test_int64_index.py
@@ -230,11 +230,9 @@ def test_impl(index, deep):
                 result = sdc_func(index, deep)
                 result_ref = test_impl(index, deep)
                 pd.testing.assert_index_equal(result, result_ref)
-                # pandas uses ndarray views when copies index, so for python
-                # case check that data arrays share the same memory
                 self.assertEqual(
                     result._data is index._data,
-                    result_ref._data.base is index._data
+                    result_ref._data is index._data
                 )
 
     def test_int64_index_getitem_scalar(self):

diff --git a/sdc/tests/test_series_ops.py b/sdc/tests/test_series_ops.py
@@ -1062,7 +1062,8 @@ def test_impl(S1, S2, value):
                 result_ref = test_impl(S1, scalar, fill_value)
                 pd.testing.assert_series_equal(result, result_ref)
 
-    @unittest.expectedFailure  # Numba issue with 1/0 is different (inf) than in Numpy (nan)
+    # See SAT-4111 for more details
+    @skip_numba_jit("numpy + mkl_umath 1.0 // 0 gives nan, not inf as stock numpy>=1.20")
     def test_series_binop_floordiv_numeric(self):
         def test_impl(a, b, value):
             return a.floordiv(b, fill_value=value)

diff --git a/sdc/utilities/sdc_typing_utils.py b/sdc/utilities/sdc_typing_utils.py
@@ -40,6 +40,7 @@
 from sdc.str_arr_type import string_array_type
 from sdc.datatypes.indexes import *
 from sdc.str_arr_ext import StringArrayType
+from sdc.datatypes.categorical.types import Categorical
 
 
 sdc_old_index_types = (types.Array, StringArrayType, )
@@ -65,6 +66,7 @@
 sdc_pandas_df_column_types = (
         types.Array,
         StringArrayType,
+        Categorical,
     )
 
 class TypeChecker: