8000 performance improvements · pydata/xarray@1fce68b · GitHub
[go: up one dir, main page]

Skip to content

Commit 1fce68b

Browse files
committed
performance improvements
1 parent a1210b8 commit 1fce68b

File tree

6 files changed

+91
-47
lines changed

6 files changed

+91
-47
lines changed

xarray/core/alignment.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ def align(*objects, **kwargs):
7575
raise TypeError('align() got unexpected keyword arguments: %s'
7676
% list(kwargs))
7777

78+
if not indexes and len(objects) == 1:
79+
# fast path for the trivial case
80+
obj, = objects
81+
return (obj.copy(deep=copy),)
82+
7883
all_indexes = defaultdict(list)
7984
for obj in objects:
8085
for dim, index in iteritems(obj.indexes):
@@ -102,7 +107,12 @@ def align(*objects, **kwargs):
102107
for obj in objects:
103108
valid_indexers = dict((k, v) for k, v in joined_indexes.items()
104109
if k in obj.dims)
105-
result.append(obj.reindex(copy=copy, **valid_indexers))
110+
if not valid_indexers:
111+
# fast path for no reindexing necessary
112+
new_obj = obj.copy(deep=copy)
113+
else:
114+
new_obj = obj.reindex(copy=copy, **valid_indexers)
115+
result.append(new_obj)
106116

107117
return tuple(result)
108118

xarray/core/computation.py

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from .merge import _align_for_merge as deep_align
1212
from .merge import merge_coords_without_align
1313
from .utils import is_dict_like
14-
from .pycompat import dask_array_type, OrderedDict, basestring
14+
from .pycompat import dask_array_type, OrderedDict, basestring, suppress
1515

1616

1717
SLICE_NONE = slice(None)
@@ -138,22 +138,41 @@ def _default_result_attrs(attrs, func, signature):
138138

139139

140140
def build_output_coords(args, signature, new_coords=None):
141+
coord_variables = []
142+
for arg in args:
143+
try:
144+
coords = arg.coords
145+
except AttributeError:
146+
pass # skip this argument
147+
else:
148+
coord_vars = getattr(coords, 'variables', coords)
149+
coord_variables.append(coord_vars)
141150

142-
coord_variables = [getattr(getattr(arg, 'coords', {}), 'variables', {})
143-
for arg in args]
144151
if new_coords is not None:
145152
coord_variables.append(getattr(new_coords, 'variables', new_coords))
146153

147-
merged = merge_coords_without_align(coord_variables)
154+
if len(args) == 1 and new_coords is None:
155+
# we can skip the expensive merge
156+
merged, = coord_variables
157+
else:
158+
merged = merge_coords_without_align(coord_variables)
159+
160+
missing_dims = signature.all_output_core_dims - set(merged)
161+
if missing_dims:
162+
raise ValueError('new output dimensions must have matching entries in '
163+
'`new_coords`: %r' % missing_dims)
148164

149-
output = []
165+
output_coords = []
150166
for output_dims in signature.output_core_dims:
151167
dropped_dims = signature.all_input_core_dims - set(output_dims)
152-
coords = OrderedDict((k, v) for k, v in merged.items()
153-
if set(v.dims).isdisjoint(dropped_dims))
154-
output.append(coords)
168+
if dropped_dims:
169+
coords = OrderedDict((k, v) for k, v in merged.items()
170+
if set(v.dims).isdisjoint(dropped_dims))
171+
else:
172+
coords = merged
173+
output_coords.append(coords)
155174

156-
return output
175+
return output_coords
157176

158177

159178
def apply_dataarray_ufunc(func, *args, **kwargs):
@@ -172,7 +191,8 @@ def apply_dataarray_ufunc(func, *args, **kwargs):
172191
if signature is None:
173192
signature = _default_signature(len(args))
174193

175-
args = deep_align(args, join=join, copy=False, raise_on_invalid=False)
194+
if len(args) > 1:
195+
args = deep_align(args, join=join, copy=False, raise_on_invalid=False)
176196

177197
name = result_name(args)
178198
result_coords = build_output_coords(args, signature, new_coords)
@@ -181,16 +201,23 @@ def apply_dataarray_ufunc(func, *args, **kwargs):
181201
result_var = func(*data_vars)
182202

183203
if signature.n_outputs > 1:
184-
return tuple(DataArray(variable, coords, name=name)
204+
return tuple(DataArray(variable, coords, name=name, fastpath=True)
185205
for variable, coords in zip(result_var, result_coords))
186206
else:
187207
coords, = result_coords
188-
return DataArray(result_var, coords, name=name)
208+
return DataArray(result_var, coords, name=name, fastpath=True)
189209

190210

191211
def join_dict_keys(objects, how='inner'):
212+
all_keys = [obj.keys() for obj in objects if hasattr(obj, 'keys')]
213+
214+
if len(all_keys) == 1:
215+
# shortcut
216+
result_keys, = all_keys
217+
return result_keys
218+
192219
joiner = _get_joiner(how)
193-
all_keys = (obj.keys() for obj in objects if hasattr(obj, 'keys'))
220+
# TODO: use a faster ordered set than a pandas.Index
194221
result_keys = joiner([pd.Index(keys) for keys in all_keys])
195222
return result_keys
196223

@@ -203,6 +230,17 @@ def collect_dict_values(objects, keys, fill_value=None):
203230
for key in keys]
204231

205232

233+
def _fast_dataset(variables, coord_variables):
234+
"""Create a dataset as quickly as possible.
235+
236+
Variables are modified *inplace*.
237+
"""
238+
from .dataset import Dataset
239+
variables.update(coord_variables)
240+
coord_names = set(coord_variables)
241+
return Dataset._from_vars_and_coord_names(variables, coord_names)
242+
243+
206244
def apply_dataset_ufunc(func, *args, **kwargs):
207245
"""
208246
def apply_dataset_ufunc(func, args, signature=None, join='inner',
@@ -221,7 +259,8 @@ def apply_dataset_ufunc(func, args, signature=None, join='inner',
221259
if signature is None:
222260
signature = _default_signature(len(args))
223261

224-
args = deep_align(args, join=join, copy=False, raise_on_invalid=False)
262+
if len(args) > 1:
263+
args = deep_align(args, join=join, copy=False, raise_on_invalid=False)
225264

226265
list_of_coords = build_output_coords(args, signature, new_coords)
227266

@@ -243,12 +282,11 @@ def apply_dataset_ufunc(func, args, signature=None, join='inner',
243282
for value, results_dict in zip(values, result_dict_list):
244283
results_dict[name] = value
245284

246-
return tuple(Dataset(*args)
285+
return tuple(_fast_dataset(*args)
247286
for args in zip(result_dict_list, list_of_coords))
248287
else:
249-
data_vars = result_vars
250288
coord_vars, = list_of_coords
251-
return Dataset(data_vars, coord_vars)
289+
return _fast_dataset(result_vars, coord_vars)
252290

253291

254292
def _iter_over_selections(obj, dim, values):

xarray/core/dataset.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
from .merge import (dataset_update_method, dataset_merge_method,
2121
merge_data_and_coords)
2222
from .utils import Frozen, SortedKeysDict, maybe_wrap_array, hashable
23-
from .variable import (Variable, as_variable, IndexVariable, broadcast_variables)
23+
from .variable import (Variable, as_variable, IndexVariable,
24+
broadcast_variables, default_index_coordinate)
2425
from .pycompat import (iteritems, basestring, OrderedDict,
2526
dask_array_type)
2627
from .combine import concat
@@ -87,9 +88,12 @@ def calculate_dimensions(variables):
8788
return dims
8889

8990

90-
def _assert_empty(args, msg='%s'):
91-
if args:
92-
raise ValueError(msg % args)
91+
def add_default_dim_coords_inplace(variables, dims):
92+
# type: (MutableMapping[object, Variable], Mapping[object, int]) -> None
93+
"""Add missing coordinates to variables inplace."""
94+
for dim, size in iteritems(dims):
95+
if dim not in variables:
96+
variables[dim] = default_index_coordinate(dim, size)
9397

9498

9599
def as_dataset(obj):
@@ -206,13 +210,6 @@ def __init__(self, data_vars=None, coords=None, attrs=None,
206210
self.attrs = attrs
207211
self._initialized = True
208212

209-
def _add_missing_coords_inplace(self):
210-
"""Add missing coordinates to self._variables
211-
"""
212-
for dim, size in iteritems(self.dims):
213-
if dim not in self._variables:
214-
self._variables[dim] = default_index_coordinate(dim, size)
215-
216213
def _set_init_vars_and_dims(self, data_vars, coords, compat):
217214
"""Set the initial value of Dataset variables and dimensions
218215
"""
@@ -678,9 +675,11 @@ def reset_coords(self, names=None, drop=False, inplace=False):
678675
if isinstance(names, basestring):
679676
names = [names]
680677
self._assert_all_in_dataset(names)
681-
_assert_empty(
682-
set(names) & set(self.dims),
683-
'cannot remove index coordinates with reset_coords: %s')
678+
bad_coords = set(names) & set(self.dims)
679+
if bad_coords:
680+
raise ValueError(
681+
'cannot remove index coordinates with reset_coords: %s'
682+
% bad_coords)
684683
obj = self if inplace else self.copy()
685684
obj._coord_names.difference_update(names)
686685
if drop:
@@ -1695,8 +1694,10 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
16951694
else:
16961695
dims = set(dim)
16971696

1698-
_assert_empty([dim for dim in dims if dim not in self.dims],
1699-
'Dataset does not contain the dimensions: %s')
1697+
missing_dimensions = [dim for dim in dims if dim not in self.dims]
1698+
if missing_dimensions:
1699+
raise ValueError('Dataset does not contain the dimensions: %s'
1700+
% missing_dimensions)
17001701

17011702
variables = OrderedDict()
17021703
for name, var in iteritems(self._variables):

xarray/core/merge.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from .alignment import align
44
from .utils import Frozen, is_dict_like
5-
from .variable import as_variable, default_index_coordinate
5+
from .variable import as_variable
66
from .pycompat import (basestring, OrderedDict)
77

88

@@ -423,7 +423,7 @@ def merge_core(objs, compat='broadcast_equals', join='outer', priority_arg=None,
423423
------
424424
MergeError if the merge cannot be done successfully.
425425
"""
426-
from .dataset import calculate_dimensions
426+
from .dataset import calculate_dimensions, add_default_dim_coords_inplace
427427

428428
_assert_compat_valid(compat)
429429

@@ -440,10 +440,7 @@ def merge_core(objs, compat='broadcast_equals', join='outer', priority_arg=None,
440440
variables = merge_variables(expanded, priority_vars, compat=compat)
441441

442442
dims = calculate_dimensions(variables)
443-
444-
for dim, size in dims.items():
445-
if dim not in variables:
446-
variables[dim] = default_index_coordinate(dim, size)
443+
add_default_dim_coords_inplace(variables, dims)
447444

448445
coord_names.update(dims)
449446

xarray/core/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def remove_incompatible_items(first_dict, second_dict, compat=equivalent):
155155

156156

157157
def is_dict_like(value):
158-
return hasattr(value, '__getitem__') and hasattr(value, 'keys')
158+
return hasattr(value, 'keys') and hasattr(value, '__getitem__')
159159

160160

161161
def is_full_slice(value):

xarray/test/test_computation.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,13 +217,11 @@ def stack2(obj):
217217
# no new_coords
218218
return xr.apply_ufunc(func, obj, signature=sig)
219219

220-
actual = stack2(data_array)
221-
expected_data_array.coords['sign'] = [0, 1]
222-
assert_identical(actual, expected_data_array)
220+
with pytest.raises(ValueError):
221+
stack2(data_array)
223222

224-
actual = stack2(dataset)
225-
expected_dataset.coords['sign'] = [0, 1]
226-
assert_identical(actual, expected_dataset)
223+
with pytest.raises(ValueError):
224+
stack2(dataset)
227225

228226

229227
def test_broadcast_compat_data_1d():

0 commit comments

Comments
 (0)
0