8000 Refactored GroupBy ASVs by WillAyd · Pull Request #20043 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

Refactored GroupBy ASVs #20043

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 10, 2018
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Added ASVs for transformation
  • Loading branch information
WillAyd committed Mar 8, 2018
commit 1917e60e8a96fccead61157478c1a9e6ce9ff485
27 changes: 19 additions & 8 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ class AggFunctions(object):

goal_time = 0.2

def setup_cache(self):
def setup_cache():
N = 10**5
fac1 = np.array(['A', 'B', 'C'], dtype='O')
fac2 = np.array(['one', 'two'], dtype='O')
Expand Down Expand Up @@ -338,15 +338,16 @@ class GroupByMethods(object):

goal_time = 0.2

param_names = ['dtype', 'method']
param_names = ['dtype', 'method', 'application']
params = [['int', 'float', 'object', 'datetime'],
['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin',
'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head',
'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique',
'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew',
'std', 'sum', 'tail', 'unique', 'value_counts', 'var']]
'std', 'sum', 'tail', 'unique', 'value_counts', 'var'],
['direct', 'transformation']]

def setup(self, dtype, method):
def setup(self, dtype, method, application):
if method in method_blacklist.get(dtype, {}):
raise NotImplementedError # skip benchmark
ngroups = 1000
Expand All @@ -364,13 +365,23 @@ def setup(self, dtype, method):
key = date_range('1/1/2011', periods=size, freq='s')

df = DataFrame({'values': values, 'key': key})
self.as_group_method = getattr(df.groupby('key')['values'], method)
self.as_field_method = getattr(df.groupby('values')['key'], method)

def time_dtype_as_group(self, dtype, method):
if application == 'transform':
if method == 'describe':
r 6826 aise NotImplementedError

self.as_group_method = lambda: df.groupby(
'key')['values'].transform(method)
self.as_field_method = lambda: df.groupby(
'values')['key'].transform(method)
else:
self.as_group_method = getattr(df.groupby('key')['values'], method)
self.as_field_method = getattr(df.groupby('values')['key'], method)

def time_dtype_as_group(self, dtype, method, application):
self.as_group_method()

def time_dtype_as_field(self, dtype, method):
def time_dtype_as_field(self, dtype, method, application):
self.as_field_method()


Expand Down
0