8000 Revert "Merge from master 2021_w8 (#962)" · IntelPython/sdc@696bf34 · GitHub
[go: up one dir, main page]

Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 696bf34

Browse files
Revert "Merge from master 2021_w8 (#962)"
This reverts commit 5ce3841.
1 parent 5ce3841 commit 696bf34

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+449
-2021
lines changed

README.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ Distribution includes Intel® SDC for Python 3.6 and Python 3.7 for Windows and
3434

3535
Intel® SDC conda package can be installed using the steps below::
3636

37-
> conda create -n sdc-env python=<3.7 or 3.6> pyarrow=0.17.0 pandas=1.2.0 -c anaconda -c conda-forge
37+
> conda create -n sdc-env python=<3.7 or 3.6> pyarrow=0.17.0 pandas=1.0.5 -c anaconda -c conda-forge
3838
> conda activate sdc-env
3939
> conda install sdc -c intel/label/beta -c intel -c defaults -c conda-forge --override-channels
4040

4141
Intel® SDC wheel package can be installed using the steps below::
4242

43-
> conda create -n sdc-env python=<3.7 or 3.6> pip pyarrow=0.17.0 pandas=1.2.0 -c anaconda -c conda-forge
43+
> conda create -n sdc-env python=<3.7 or 3.6> pip pyarrow=0.17.0 pandas=1.0.5 -c anaconda -c conda-forge
4444
> conda activate sdc-env
4545
> pip install --index-url https://pypi.anaconda.org/intel/label/beta/simple --extra-index-url https://pypi.anaconda.org/intel/simple --extra-index-url https://pypi.org/simple sdc
4646

@@ -82,7 +82,7 @@ Building on Linux with setuptools
8282

8383
export PYVER=<3.6 or 3.7>
8484
export NUMPYVER=<1.16 or 1.17>
85-
conda create -n sdc-env -q -y -c intel/label/beta -c defaults -c intel -c conda-forge python=$PYVER numpy=$NUMPYVER tbb-devel tbb4py numba=0.52 pandas=1.2.0 pyarrow=0.17.0 gcc_linux-64 gxx_linux-64
85+
conda create -n sdc-env -q -y -c intel/label/beta -c defaults -c intel -c conda-forge python=$PYVER numpy=$NUMPYVER tbb-devel tbb4py numba=0.49 pandas=1.0.5 pyarrow=0.17.0 gcc_linux-64 gxx_linux-64
8686
source activate sdc-env
8787
git clone https://github.com/IntelPython/sdc.git
8888
cd sdc
@@ -120,7 +120,7 @@ Building on Windows with setuptools
120120

121121
set PYVER=<3.6 or 3.7>
122122
set NUMPYVER=<1.16 or 1.17>
123-
conda create -n sdc-env -c intel/label/beta -c defaults -c intel -c conda-forge python=%PYVER% numpy=%NUMPYVER% tbb-devel tbb4py numba=0.52 pandas=1.2.0 pyarrow=0.17.0
123+
conda create -n sdc-env -c intel/label/beta -c defaults -c intel -c conda-forge python=%PYVER% numpy=%NUMPYVER% tbb-devel tbb4py numba=0.49 pandas=1.0.5 pyarrow=0.17.0
124124
conda activate sdc-env
125125
set INCLUDE=%INCLUDE%;%CONDA_PREFIX%\Library\include
126126
set LIB=%LIB%;%CONDA_PREFIX%\Library\lib

buildscripts/utilities.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def __init__(self, python, sdc_local_channel=None):
5252
self.line_single = '-'*80
5353

5454
# Set channels
55-
self.channel_list = ['-c', 'defaults', '-c', 'conda-forge']
55+
self.channel_list = ['-c', 'intel/label/beta', '-c', 'defaults', '-c', 'conda-forge']
5656
if sdc_local_channel:
5757
sdc_local_channel = Path(sdc_local_channel).resolve().as_uri()
5858
self.channel_list = ['-c', sdc_local_channel] + self.channel_list

conda-recipe/meta.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
{% set NUMBA_VERSION = "==0.52.0" %}
2-
{% set PANDAS_VERSION = "==1.2.0" %}
1+
{% set NUMBA_VERSION = "==0.51.2" %}
2+
{% set PANDAS_VERSION = "==1.0.5" %}
33
{% set PYARROW_VERSION = "==0.17.0" %}
44

55
package:

docs/source/_templates/_api_ref.pandas.window_templ.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ Exponentially-weighted moving window functions
5151
----------------------------------------------
5252

5353
.. sdc_toctree
54-
ewm.ExponentialMovingWindow.mean
55-
ewm.ExponentialMovingWindow.std
56-
ewm.ExponentialMovingWindow.var
57-
ewm.ExponentialMovingWindow.corr
58-
ewm.ExponentialMovingWindow.cov
54+
EWM.mean
55+
EWM.std
56+
EWM.var
57+
EWM.corr
58+
EWM.cov

docs/source/getting_started.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@ Distribution includes Intel SDC for Python 3.6 and 3.7 for Windows and Linux pla
4141
Intel SDC conda package can be installed using the steps below:
4242
::
4343

44-
> conda create -n sdc_env python=<3.7 or 3.6> pyarrow=0.17.0 pandas=1.2.0 -c anaconda -c conda-forge
44+
> conda create -n sdc_env python=<3.7 or 3.6> pyarrow=0.17.0 pandas=0.25.3 -c anaconda -c conda-forge
4545
> conda activate sdc_env
4646
> conda install sdc -c intel/label/beta -c intel -c defaults -c conda-forge --override-channels
4747

4848
Intel SDC wheel package can be installed using the steps below:
4949
::
5050

51-
> conda create -n sdc_env python=<3.7 or 3.6> pip pyarrow=0.17.0 pandas=1.2.0 -c anaconda -c conda-forge
51+
> conda create -n sdc_env python=<3.7 or 3.6> pip pyarrow=0.17.0 pandas=0.25.3 -c anaconda -c conda-forge
5252
> conda activate sdc_env
5353
> pip install --index-url https://pypi.anaconda.org/intel/label/beta/simple --extra-index-url https://pypi.anaconda.org/intel/simple --extra-index-url https://pypi.org/simple sdc
5454

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
numpy>=1.16
2-
pandas==1.2.0
2+
pandas==0.25.3
33
pyarrow==0.17.0
4-
numba==0.52.0
4+
numba==0.51.2
55
tbb
66
tbb-devel

sdc/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
# re-export from Numba
3030
from numba import (typeof, prange, pndindex, gdb, gdb_breakpoint, gdb_init,
31-
stencil, threading_layer, objmode)
31+
stencil, threading_layer, jitclass, objmode)
3232

3333
import sdc.config
3434
import sdc.set_ext
@@ -48,7 +48,6 @@
4848
import sdc.datatypes.series.init
4949

5050
import sdc.extensions.indexes.range_index_ext
51-
import sdc.extensions.indexes.int64_index_ext
5251

5352
from ._version import get_versions
5453

sdc/_str_ext.cpp

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
#include <string>
3232
#include <vector>
3333
#include <cmath>
34-
#include <algorithm>
3534

3635
#include "_str_decode.cpp"
3736

@@ -130,7 +129,6 @@ extern "C"
130129
npy_intp array_size(PyArrayObject* arr);
131130
void* array_getptr1(PyArrayObject* arr, npy_intp ind);
132131
void array_setitem(PyArrayObject* arr, char* p, PyObject* s);
133-
void stable_argsort(char* data_ptr, uint32_t* in_offsets, int64_t len, int8_t ascending, uint64_t* result);
134132

135133
PyMODINIT_FUNC PyInit_hstr_ext(void)
136134
{
@@ -203,7 +201,6 @@ extern "C"
203201
PyObject_SetAttrString(m, "array_setitem", PyLong_FromVoidPtr((void*)(&array_setitem)));
204202
PyObject_SetAttrString(m, "decode_utf8", PyLong_FromVoidPtr((void*)(&decode_utf8)));
205203
PyObject_SetAttrString(m, "get_utf8_size", PyLong_FromVoidPtr((void*)(&get_utf8_size)));
206-
PyObject_SetAttrString(m, "stable_argsort", PyLong_FromVoidPtr((void*)(&stable_argsort)));
207204
return m;
208205
}
209206

@@ -874,35 +871,4 @@ extern "C"
874871
return;
875872
}
876873

877-
void stable_argsort(char* data_ptr, uint32_t* in_offsets, int64_t len, int8_t ascending, uint64_t* result)
878-
{
879-
using str_index_pair_type = std::pair<std::string, int64_t>;
880-
std::vector<str_index_pair_type> str_arr_indexed;
881-
str_arr_indexed.reserve(len);
882-
883-
for (int64_t i=0; i < len; ++i)
884-
{
885-
uint32_t start = in_offsets[i];
886-
uint32_t size = in_offsets[i + 1] - in_offsets[i];
887-
str_arr_indexed.emplace_back(
888-
std::move(std::string(&data_ptr[start], size)),
889-
i
890-
);
891-
}
892-
893-
std::stable_sort(str_arr_indexed.begin(),
894-
str_arr_indexed.end(),
895-
[=](const str_index_pair_type& left, const str_index_pair_type& right){
896-
if (ascending)
897-
return left.first < right.first;
898-
else
899-
return left.first > right.first;
900-
}
901-
);
902-
903-
for (int64_t i=0; i < len; ++i)
904-
result[i] = str_arr_indexed[i].second;
905-
}
906-
907-
908874
} // extern "C"

sdc/datatypes/common_functions.py

Lines changed: 46 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,14 @@
4848
from sdc.functions import numpy_like
4949
from sdc.str_arr_type import string_array_type, StringArrayType
5050
from sdc.datatypes.range_index_type import RangeIndexType
51-
from sdc.datatypes.int64_index_type import Int64IndexType
5251
from sdc.str_arr_ext import (num_total_chars, append_string_array_to,
5352
str_arr_is_na, pre_alloc_string_array, str_arr_set_na, string_array_type,
5453
cp_str_list_to_array, create_str_arr_from_list, get_utf8_size,
55-
str_arr_set_na_by_mask, str_arr_stable_argosort)
54+
str_arr_set_na_by_mask)
5655
from sdc.utilities.prange_utils import parallel_chunks
5756
from sdc.utilities.utils import sdc_overload, sdc_register_jitable
58-
from sdc.utilities.sdc_typing_utils import (
59-
find_common_dtype_from_numpy_dtypes,
60-
TypeChecker)
61-
from sdc.utilities.sdc_typing_utils import sdc_pandas_index_types
57+
from sdc.utilities.sdc_typing_utils import (find_common_dtype_from_numpy_dtypes,
58+
TypeChecker)
6259

6360

6461
class SDCLimitation(Exception):
@@ -74,20 +71,18 @@ def hpat_arrays_append(A, B):
7471
def hpat_arrays_append_overload(A, B):
7572
"""Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A"""
7673

77-
use_A_array = isinstance(A, (RangeIndexType, Int64IndexType))
78-
use_B_array = isinstance(B, (RangeIndexType, Int64IndexType))
79-
if isinstance(A, (types.Array, RangeIndexType, Int64IndexType)):
80-
if isinstance(B, (types.Array, RangeIndexType, Int64IndexType)):
74+
A_is_range_index = isinstance(A, RangeIndexType)
75+
B_is_range_index = isinstance(B, RangeIndexType)
76+
if isinstance(A, (types.Array, RangeIndexType)):
77+
if isinstance(B, (types.Array, RangeIndexType)):
8178
def _append_single_numeric_impl(A, B):
82-
_A = A.values if use_A_array == True else A # noqa
83-
_B = B.values if use_B_array == True else B # noqa
79+
_A = A.values if A_is_range_index == True else A # noqa
80+
_B = B.values if B_is_range_index == True else B # noqa
8481
return numpy.concatenate((_A, _B,))
8582

8683
return _append_single_numeric_impl
87-
88-
elif (isinstance(B, (types.UniTuple, types.List))
89-
and isinstance(B.dtype, (types.Array, RangeIndexType, Int64IndexType))):
90-
B_dtype_is_index = isinstance(B.dtype, (RangeIndexType, Int64IndexType))
84+
elif isinstance(B, (types.UniTuple, types.List)) and isinstance(B.dtype, (types.Array, RangeIndexType)):
85+
B_dtype_is_range_index = isinstance(B.dtype, RangeIndexType)
9186
numba_common_dtype = find_common_dtype_from_numpy_dtypes([A.dtype, B.dtype.dtype], [])
9287

9388
# TODO: refactor to use numpy.concatenate when Numba supports building a tuple at runtime
@@ -97,10 +92,10 @@ def _append_list_numeric_impl(A, B):
9792
new_data = numpy.empty(total_length, numba_common_dtype)
9893

9994
stop = len(A)
100-
_A = numpy.array(A) if use_A_array == True else A # noqa
95+
_A = numpy.array(A) if A_is_range_index == True else A # noqa
10196
new_data[:stop] = _A
10297
for arr in B:
103-
_arr = arr.values if B_dtype_is_index == True else arr # noqa
98+
_arr = numpy.array(arr) if B_dtype_is_range_index == True else arr # noqa
10499
start = stop
105100
stop = start + len(_arr)
106101
new_data[start:stop] = _arr
@@ -223,13 +218,12 @@ def sdc_join_series_indexes_overload(left, right):
223218
"""Function for joining arrays left and right in a way similar to pandas.join 'outer' algorithm"""
224219

225220
# check that both operands are of types used for representing Pandas indexes
226-
if not (isinstance(left, sdc_pandas_index_types) and isinstance(right, sdc_pandas_index_types)
227-
and not isinstance(left, types.NoneType)
228-
and not isinstance(right, types.NoneType)):
221+
if not (isinstance(left, (types.Array, StringArrayType, RangeIndexType))
222+
and isinstance(right, (types.Array, StringArrayType, RangeIndexType))):
229223
return None
230224

231-
convert_left = isinstance(left, (RangeIndexType, Int64IndexType))
232-
convert_right = isinstance(right, (RangeIndexType, Int64IndexType))
225+
convert_left = isinstance(left, RangeIndexType)
226+
convert_right = isinstance(right, RangeIndexType)
233227

234228
def _convert_to_arrays_impl(left, right):
235229
_left = left.values if convert_left == True else left # noqa
@@ -249,9 +243,10 @@ def sdc_join_range_indexes_impl(left, right):
249243

250244
return sdc_join_range_indexes_impl
251245

252-
elif (isinstance(left, (RangeIndexType, Int64IndexType, types.Array))
253-
and isinstance(right, (RangeIndexType, Int64IndexType, types.Array))
254-
and not (isinstance(left, types.Array) and isinstance(right, types.Array))):
246+
elif isinstance(left, RangeIndexType) and isinstance(right, types.Array):
247+
return _convert_to_arrays_impl
248+
249+
elif isinstance(left, types.Array) and isinstance(right, RangeIndexType):
255250
return _convert_to_arrays_impl
256251

257252
# TODO: remove code duplication below and merge numeric and StringArray impls into one
@@ -518,39 +513,41 @@ def sdc_arrays_argsort(A, kind='quicksort'):
518513

519514

520515
@sdc_overload(sdc_arrays_argsort, jit_options={'parallel': False})
521-
def sdc_arrays_argsort_overload(A, kind='quicksort', ascending=True):
516+
def sdc_arrays_argsort_overload(A, kind='quicksort'):
522517
"""Function providing pandas argsort implementation for different 1D array types"""
523518

524519
# kind is not known at compile time, so get this function here and use in impl if needed
525520
quicksort_func = quicksort.make_jit_quicksort().run_quicksort
526521

527522
kind_is_default = isinstance(kind, str)
528523
if isinstance(A, types.Array):
529-
def _sdc_arrays_argsort_array_impl(A, kind='quicksort', ascending=True):
524+
def _sdc_arrays_argsort_array_impl(A, kind='quicksort'):
530525
_kind = 'quicksort' if kind_is_default == True else kind # noqa
531-
return numpy_like.argsort(A, kind=_kind, ascending=ascending)
526+
return numpy_like.argsort(A, kind=_kind)
532527

533528
return _sdc_arrays_argsort_array_impl
534529

535530
elif A == string_array_type:
536-
def _sdc_arrays_argsort_str_arr_impl(A, kind='quicksort', ascending=True):
531+
def _sdc_arrays_argsort_str_arr_impl(A, kind='quicksort'):
537532

533+
nan_mask = sdc.hiframes.api.get_nan_mask(A)
534+
idx = numpy.arange(len(A))
535+
old_nan_positions = idx[nan_mask]
536+
537+
data = A[~nan_mask]
538+
keys = idx[~nan_mask]
538539
if kind == 'quicksort':
539-
indexes = numpy.arange(len(A))
540-
data_index_pairs = list(zip(list(A), list(indexes)))
541-
zipped = quicksort_func(data_index_pairs)
542-
argsorted = [zipped[i][1] for i in indexes]
543-
res = numpy.array(argsorted, dtype=numpy.int64)
544-
# for non-stable sort the order within groups does not matter
545-
# so just reverse the result when sorting in descending order
546-
if not ascending:
547-
res = res[::-1]
540+
zipped = list(zip(list(data), list(keys)))
541+
zipped = quicksort_func(zipped)
542+
argsorted = [zipped[i][1] for i in numpy.arange(len(data))]
548543
elif kind == 'mergesort':
549-
res = str_arr_stable_argosort(A, ascending=ascending)
544+
sdc.hiframes.sort.local_sort((data, ), (keys, ))
545+
argsorted = list(keys)
550546
else:
551547
raise ValueError("Unrecognized kind of sort in sdc_arrays_argsort")
552548

553-
return res
549+
argsorted.extend(old_nan_positions)
550+
return numpy.asarray(argsorted, dtype=numpy.int32)
554551

555552
return _sdc_arrays_argsort_str_arr_impl
556553

@@ -621,16 +618,13 @@ def _sdc_take(data, indexes):
621618
@sdc_overload(_sdc_take)
622619
def _sdc_take_overload(data, indexes):
623620

624-
valid_data_types = (types.Array,) + sdc_pandas_index_types
625-
if not (isinstance(data, valid_data_types) and not isinstance(data, types.NoneType)):
621+
if not isinstance(data, (types.Array, StringArrayType, RangeIndexType)):
626622
return None
627-
628-
if not (isinstance(indexes, (types.Array, types.List, Int64IndexType))
623+
if not (isinstance(indexes, (types.Array, types.List))
629624
and isinstance(indexes.dtype, (types.Integer, types.ListType))):
630625
return None
631626

632-
if (isinstance(indexes.dtype, types.ListType)
633-
and isinstance(data, (types.Array, types.List, RangeIndexType, Int64IndexType))):
627+
if isinstance(indexes.dtype, types.ListType) and isinstance(data, (types.Array, types.List, RangeIndexType)):
634628
arr_dtype = data.dtype
635629

636630
def _sdc_take_list_impl(data, indexes):
@@ -683,7 +677,7 @@ def _sdc_take_list_str_impl(data, indexes):
683677

684678
return _sdc_take_list_str_impl
685679

686-
elif isinstance(data, (types.Array, RangeIndexType, Int64IndexType)):
680+
elif isinstance(data, (types.Array, RangeIndexType)):
687681
arr_dtype = data.dtype
688682

689683
def _sdc_take_array_impl(data, indexes):
@@ -746,7 +740,6 @@ def sdc_reindex_series_overload(arr, index, name, by_index):
746740
""" Reindexes series data by new index following the logic of pandas.core.indexing.check_bool_indexer """
747741

748742
range_indexes = isinstance(index, RangeIndexType) and isinstance(by_index, RangeIndexType)
749-
int64_indexes = isinstance(index, Int64IndexType) and isinstance(by_index, Int64IndexType)
750743
data_dtype, index_dtype = arr.dtype, index.dtype
751744
data_is_str_arr = isinstance(arr.dtype, types.UnicodeType)
752745

@@ -755,8 +748,6 @@ def sdc_reindex_series_impl(arr, index, name, by_index):
755748
# no reindexing is needed if indexes are equal
756749
if range_indexes == True: # noqa
757750
equal_indexes = numpy_like.array_equal(index, by_index)
758-
elif int64_indexes == True: # noqa
759-
equal_indexes = numpy_like.array_equal(index, by_index)
760751
else:
761752
equal_indexes = False
762753
if (index is by_index or equal_indexes):
@@ -781,10 +772,10 @@ def sdc_reindex_series_impl(arr, index, name, by_index):
781772
map_index_to_position[value] = i
782773

783774
index_mismatch = 0
784-
for i in numba.prange(len(by_index)):
785-
val = by_index[i]
786-
if val in map_index_to_position:
787-
pos_in_self = map_index_to_position[val]
775+
# FIXME: TypingError in parfor step (wrong promotion to float64?) if prange is used
776+
for i in numpy.arange(len(by_index)):
777+
if by_index[i] in map_index_to_position:
778+
pos_in_self = map_index_to_position[by_index[i]]
788779
_res_data[i] = arr[pos_in_self]
789780
if data_is_str_arr == True: # noqa
790781
res_data_nan_mask[i] = isna(arr, i)

sdc/datatypes/hpat_pandas_dataframe_functions.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
gen_impl_generator, find_common_dtype_from_numpy_dtypes)
5151
from sdc.str_arr_ext import StringArrayType
5252
from sdc.datatypes.range_index_type import RangeIndexType
53-
from sdc.datatypes.int64_index_type import Int64IndexType
5453

5554
from sdc.hiframes.pd_dataframe_type import DataFrameType
5655
from sdc.hiframes.pd_dataframe_ext import init_dataframe_internal, get_structure_maps
@@ -2258,7 +2257,7 @@ def sdc_pandas_dataframe_accessor_getitem(self, idx):
22582257

22592258
if accessor == 'at':
22602259
num_idx = (isinstance(idx[0], types.Number)
2261-
and isinstance(self.dataframe.index, (types.NoneType, RangeIndexType, Int64IndexType)))
2260+
and isinstance(self.dataframe.index, (types.Array, types.NoneType, RangeIndexType)))
22622261
str_idx = (isinstance(idx[0], (types.UnicodeType, types.StringLiteral))
22632262
and isinstance(self.dataframe.index, StringArrayType))
22642263
if isinstance(idx, types.Tuple) and isinstance(idx[1], types.StringLiteral):

0 commit comments

Comments
 (0)
0