8000 Update feature/groupby_optimization from master by densmirn · Pull Request #843 · IntelPython/sdc · GitHub
[go: up one dir, main page]

Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Update feature/groupby_optimization from master #843

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ the code by leveraging modern hardware instructions and by utilizing all availab

Intel® SDC documentation can be found `here <https://intelpython.github.io/sdc-doc/>`_.

Intel® SDC uses special Numba build based on ``0.48.0`` tag for build and run.
Required Numba version can be installed from ``intel/label/beta`` channel from the Anaconda Cloud.

.. note::
For maximum performance and stability, please use numba from ``intel/label/beta`` channel.

Expand Down Expand Up @@ -61,9 +58,6 @@ If you do not have conda, we recommend using Miniconda3::
./miniconda.sh -b
export PATH=$HOME/miniconda3/bin:$PATH

Intel® SDC uses special Numba build based on ``0.48.0`` tag for build and run.
Required Numba version can be installed from ``intel/label/beta`` channel from the Anaconda Cloud.

.. note::
For maximum performance and stability, please use numba from ``intel/label/beta`` channel.

Expand All @@ -88,7 +82,7 @@ Building on Linux with setuptools

PYVER=<3.6 or 3.7>
NUMPYVER=<1.16 or 1.17>
conda create -n sdc-env -q -y -c intel/label/beta -c defaults -c intel -c conda-forge python=$PYVER numpy=$NUMPYVER numba=0.48.0 pandas=0.25.3 pyarrow=0.15.1 gcc_linux-64 gxx_linux-64
conda create -n sdc-env -q -y -c intel/label/beta -c defaults -c intel -c conda-forge python=$PYVER numpy=$NUMPYVER numba=0.49 pandas=0.25.3 pyarrow=0.17.0 gcc_linux-64 gxx_linux-64
source activate sdc-env
git clone https://github.com/IntelPython/sdc.git
cd sdc
Expand Down Expand Up @@ -126,7 +120,7 @@ Building on Windows with setuptools

set PYVER=<3.6 or 3.7>
set NUMPYVER=<1.16 or 1.17>
conda create -n sdc-env -c intel/label/beta -c defaults -c intel -c conda-forge python=%PYVER% numpy=%NUMPYVER% numba=0.48.0 pandas=0.25.3 pyarrow=0.15.1
conda create -n sdc-env -c intel/label/beta -c defaults -c intel -c conda-forge python=%PYVER% numpy=%NUMPYVER% numba=0.49 pandas=0.25.3 pyarrow=0.17.0
conda activate sdc-env
set INCLUDE=%INCLUDE%;%CONDA_PREFIX%\Library\include
set LIB=%LIB%;%CONDA_PREFIX%\Library\lib
Expand Down
3 changes: 0 additions & 3 deletions buildscripts/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@

def build(sdc_utils):
os.chdir(str(sdc_utils.src_path))
# For Windows build do not use intel channel due to build issue
if platform.system() == 'Windows':
sdc_utils.channels = '-c intel/label/beta -c defaults -c conda-forge'

sdc_utils.log_info('Start Intel SDC build', separate=True)
conda_build_cmd = ' '.join([
Expand Down
3 changes: 2 additions & 1 deletion buildscripts/run_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def run_examples(sdc_utils):
sdc_utils.log_info('Run Intel(R) SDC examples', separate=True)
sdc_utils.log_info(sdc_utils.line_double)
sdc_utils.create_environment()
sdc_utils.install_conda_package(['sdc'])
sdc_package = f'sdc={sdc_utils.get_sdc_version_from_channel()}'
sdc_utils.install_conda_package([sdc_package])

run_examples(sdc_utils)
4 changes: 2 additions & 2 deletions buildscripts/sdc-conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{% set NUMBA_VERSION = "==0.48" %}
{% set NUMBA_VERSION = "==0.49.1" %}
{% set PANDAS_VERSION = "==0.25.3" %}
{% set PYARROW_VERSION = "==0.15.1" %}
{% set PYARROW_VERSION = "==0.17.0" %}

package:
name: sdc
Expand Down
23 changes: 20 additions & 3 deletions buildscripts/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@



import json
import os
import platform
import re
Expand All @@ -51,7 +52,7 @@ def __init__(self, python, sdc_local_channel=None):
self.line_single = '-'*80

# Set channels
self.channel_list = ['-c', 'intel/label/beta', '-c', 'intel', '-c', 'defaults', '-c', 'conda-forge']
self.channel_list = ['-c', 'intel/label/beta', '-c', 'defaults', '-c', 'conda-forge']
if sdc_local_channel:
sdc_local_channel = Path(sdc_local_channel).resolve().as_uri()
self.channel_list = ['-c', sdc_local_channel] + self.channel_list
Expand Down Expand Up @@ -87,7 +88,7 @@ def create_environment(self, packages_list=[]):
# Create Intel SDC environment
create_args = ['-q', '-y', '-n', self.env_name, f'python={self.python}']
create_args += packages_list + self.channel_list + ['--override-channels']
self.__run_conda_command(Conda_Commands.CREATE, create_args)
self.log_info(self.__run_conda_command(Conda_Commands.CREATE, create_args))

return

Expand All @@ -97,7 +98,7 @@ def install_conda_package(self, packages_list):
self.log_info(f'Install {" ".join(packages_list)} to {self.env_name} conda environment')
install_args = ['-n', self.env_name]
install_args += self.channel_list + ['--override-channels', '-q', '-y'] + packages_list
self.__run_conda_command(Conda_Commands.INSTALL, install_args)
self.log_info(self.__run_conda_command(Conda_Commands.INSTALL, install_args))

return

Expand Down Expand Up @@ -135,3 +136,19 @@ def log_info(self, msg, separate=False):
if separate:
print(f'{time.strftime("%d/%m/%Y %H:%M:%S")}: {self.line_double}', flush=True)
print(f'{time.strftime("%d/%m/%Y %H:%M:%S")}: {msg}', flush=True)

def get_sdc_version_from_channel(self):
python_version = 'py' + self.python.replace('.', '')

# Get Intel SDC version from first channel in channel_list
search_args = ['sdc', '-c', self.channel_list[1], '--override-channels', '--json']
search_result = self.__run_conda_command(Conda_Commands.SEARCH, search_args)

repo_data = json.loads(search_result)
for package_data in repo_data['sdc']:
sdc_version = package_data['version']
sdc_build = package_data['build']
if python_version in sdc_build:
break

return f'{sdc_version}={sdc_build}'
2 changes: 1 addition & 1 deletion docs/source/compilation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Output:

Traceback (most recent call last):
...
numba.errors.TypingError: Failed in nopython mode pipeline (step: nopython frontend)
numba.core.errors.TypingError: Failed in nopython mode pipeline (step: nopython frontend)
Unknown attribute 'read_excel' of type Module(<module 'pandas' from ...)


Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
numpy>=1.16
pandas==0.25.3
pyarrow==0.15.1
numba==0.48
pyarrow==0.17.0
numba==0.49.1
6 changes: 4 additions & 2 deletions sdc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
import sdc.datatypes.hpat_pandas_stringmethods_functions
import sdc.datatypes.hpat_pandas_groupby_functions

import sdc.extensions.indexes.range_index_ext

from ._version import get_versions

if not sdc.config.config_pipeline_hpat_default:
Expand All @@ -57,8 +59,8 @@
"""

# sdc.config.numba_compiler_define_nopython_pipeline_orig = \
# numba.compiler.DefaultPassBuilder.define_nopython_pipeline
# numba.compiler.DefaultPassBuilder.define_nopython_pipeline = \
# numba.core.compiler.DefaultPassBuilder.define_nopython_pipeline
# numba.core.compiler.DefaultPassBuilder.define_nopython_pipeline = \
# sdc.datatypes.hpat_pandas_dataframe_pass.sdc_nopython_pipeline_lite_register

import sdc.rewrites.dataframe_constructor
Expand Down
16 changes: 8 additions & 8 deletions sdc/compiler.py
10000
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,18 @@
from sdc.hiframes.hiframes_typed import HiFramesTypedPass
from sdc.hiframes.dataframe_pass import DataFramePass
import numba
import numba.compiler
from numba.compiler import DefaultPassBuilder
import numba.core.compiler
from numba.core.compiler import DefaultPassBuilder
from numba import ir_utils, ir, postproc
from numba.targets.registry import CPUDispatcher
from numba.ir_utils import guard, get_definition
from numba.inline_closurecall import inline_closure_call
from numba.core.registry import CPUDispatcher
from numba.core.ir_utils import guard, get_definition
from numba.core.inline_closurecall import inline_closure_call
from numba.typed_passes import (NopythonTypeInference, AnnotateTypes, ParforPass, IRLegalization)
from numba.untyped_passes import (DeadBranchPrune, InlineInlinables, InlineClosureLikes)
from sdc import config
from sdc.distributed import DistributedPass

from numba.compiler_machinery import FunctionPass, register_pass
from numba.core.compiler_machinery import FunctionPass, register_pass

# workaround for Numba #3876 issue with large labels in mortgage benchmark
binding.set_option("tmp", "-non-global-value-max-name-size=2048")
Expand Down Expand Up @@ -143,7 +143,7 @@ def run_pass(self, state):
return True


class SDCPipeline(numba.compiler.CompilerBase):
class SDCPipeline(numba.core.compiler.CompilerBase):
"""SDC compiler pipeline
"""

Expand All @@ -170,7 +170,7 @@ def __init__(self):
pass

def run_pass(self, state):
numba.parfor.lower_parfor_sequential(
numba.parfors.parfor.lower_parfor_sequential(
state.typingctx, state.func_ir, state.typemap, state.calltypes)

return True
Expand Down
8 changes: 4 additions & 4 deletions sdc/cv_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@
import numba
import sdc
from numba import types
from numba.typing.templates import infer_global, AbstractTemplate, infer, signature
from numba.core.typing.templates import infer_global, AbstractTemplate, infer, signature
from numba.extending import lower_builtin, overload, intrinsic
from numba import cgutils
from numba.core import cgutils
from sdc.str_ext import string_type
from numba.targets.imputils import impl_ret_new_ref, impl_ret_borrowed
from numba.targets.arrayobj import _empty_nd_impl
from numba.core.imputils import impl_ret_new_ref, impl_ret_borrowed
from numba.np.arrayobj import _empty_nd_impl

import cv2
import numpy as np
Expand Down
3 changes: 0 additions & 3 deletions sdc/datatypes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,3 @@
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************


# import sdc.datatypes.hpat_pandas_dataframe_pass
6 changes: 3 additions & 3 deletions sdc/datatypes/common_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@
from pandas.core.indexing import IndexingError

import numba
from numba.targets import quicksort
from numba.misc import quicksort
from numba import types
from numba.errors import TypingError
from numba.core.errors import TypingError
from numba.extending import register_jitable
from numba import numpy_support
from numba.np import numpy_support
from numba.typed import Dict

import sdc
Expand Down
6 changes: 3 additions & 3 deletions sdc/datatypes/hpat_pandas_dataframe_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@

from pandas.core.indexing import IndexingError

from numba import types, prange
from numba.special import literally
from numba import types
from numba import literally
from numba.typed import List, Dict
from numba.errors import TypingError
from numba.core.errors import TypingError
from pandas.core.indexing import IndexingError

from sdc.hiframes.pd_dataframe_ext import DataFrameType
Expand Down
7 changes: 4 additions & 3 deletions sdc/datatypes/hpat_pandas_dataframe_getitem_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@

import pandas

from numba import types, cgutils
from numba import types
from numba.core import cgutils
from numba.extending import models, overload, register_model, make_attribute_wrapper, intrinsic
from numba.datamodel import register_default, StructModel
from numba.typing.templates import signature
from numba.core.datamodel import register_default, StructModel
from numba.core.typing.templates import signature


class DataFrameGetitemAccessorType(types.Type):
Expand Down
Loading
0