8000 [MRG] Joblib 0.9.4 by ogrisel · Pull Request #6179 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG] Joblib 0.9.4 #6179

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 20, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Ju 8000 mp to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,25 @@ API changes summary
- ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`.
Use ``loss`` instead. By `Manoj Kumar`_.


.. _changes_0_17_1:

Version 0.17.1
==============

Changelog
---------

Bug fixes
.........


- Upgrade vendored joblib to version 0.9.4 that fixes an important bug in
``joblib.Parallel`` that can silently yield to wrong results when working
on datasets larger than 1MB:
https://github.com/joblib/joblib/blob/0.9.4/CHANGES.rst


.. _changes_0_17:

Version 0.17
Expand Down
6 changes: 3 additions & 3 deletions sklearn/externals/joblib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
>>> c = square(a)
>>> # The above call did not trigger an evaluation

2) **Embarrassingly parallel helper:** to make is easy to write readable
2) **Embarrassingly parallel helper:** to make it easy to write readable
parallel code and debug it quickly::

>>> from sklearn.externals.joblib import Parallel, delayed
Expand All @@ -86,7 +86,7 @@
3) **Logging/tracing:** The different functionalities will
progressively acquire better logging mechanism to help track what
has been ran, and capture I/O easily. In addition, Joblib will
provide a few I/O primitives, to easily define define logging and
provide a few I/O primitives, to easily define logging and
display streams, and provide a way of compiling a report.
We want to be able to quickly inspect what has been run.

Expand Down Expand Up @@ -115,7 +115,7 @@
# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
#
__version__ = '0.9.3'
__version__ = '0.9.4'


from .memory import Memory, MemorizedResult
Expand Down
5 changes: 4 additions & 1 deletion sklearn/externals/joblib/_compat.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""
Compatibility layer for Python 3/Python 2 single codebase
"""
import sys

PY3_OR_LATER = sys.version_info[0] >= 3

try:
_basestring = basestring
_bytes_or_unicode = (str, unicode)
except NameError:
_basestring = str
_bytes_or_unicode = (bytes, str)
_bytes_or_unicode = (bytes, str)
27 changes: 11 additions & 16 deletions sklearn/externals/joblib/format_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,14 @@
import time
import tokenize
import traceback
import types

try: # Python 2
generate_tokens = tokenize.generate_tokens
except AttributeError: # Python 3
generate_tokens = tokenize.tokenize

PY3 = (sys.version[0] == '3')
INDENT = ' ' * 8

from ._compat import _basestring

###############################################################################
# some internal-use functions
Expand Down Expand Up @@ -195,14 +193,13 @@ def format_records(records): # , print_globals=False):
# the abspath call will throw an OSError. Just ignore it and
# keep the original file string.
pass

if file.endswith('.pyc'):
file = file[:-4] + '.py'

link = file
try:
args, varargs, varkw, locals = inspect.getargvalues(frame)
except:
# This can happen due to a bug in python2.3. We should be
# able to remove this try/except when 2.4 becomes a
# requirement. Bug details at http://python.org/sf/1005466
print("\nJoblib's exception reporting continues...\n")

args, varargs, varkw, locals = inspect.getargvalues(frame)

if func == '?':
call = ''
Expand Down Expand Up @@ -350,13 +347,11 @@ def format_exc(etype, evalue, etb, context=5, tb_offset=0):
date = time.ctime(time.time())
pid = 'PID: %i' % os.getpid()

head = '%s%s%s\n%s%s%s' % (etype, ' ' * (75 - len(str(etype)) - len(date)),
date, pid, ' ' * (75 - len(str(pid)) - len(pyver)),
pyver)
head = '%s%s%s\n%s%s%s' % (
etype, ' ' * (75 - len(str(etype)) - len(date)),
date, pid, ' ' * (75 - len(str(pid)) - len(pyver)),
pyver)

# Flush cache before calling inspect. This helps alleviate some of the
# problems with python 2.3's inspect.py.
linecache.checkcache()
# Drop topmost frames if requested
try:
records = _fixed_getframes(etb, context, tb_offset)
Expand Down
116 changes: 84 additions & 32 deletions sklearn/externals/joblib/func_inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from ._compat import _basestring
from .logger import pformat
from ._memory_helpers import open_py_source
from ._compat import PY3_OR_LATER


def get_func_code(func):
""" Attempts to retrieve a reliable function code hash.
Expand Down Expand Up @@ -156,6 +158,53 @@ def get_func_name(func, resolv_alias=True, win_characters=True):
return module, name


def getfullargspec(func):
"""Compatibility function to provide inspect.getfullargspec in Python 2

This should be rewritten using a backport of Python 3 signature
once we drop support for Python 2.6. We went for a simpler
approach at the time of writing because signature uses OrderedDict
which is not available in Python 2.6.
"""
try:
return inspect.getfullargspec(func)
except AttributeError:
arg_spec = inspect.getargspec(func)
import collections
tuple_fields = ('args varargs varkw defaults kwonlyargs '
'kwonlydefaults annotations')
tuple_type = collections.namedtuple('FullArgSpec', tuple_fields)

return tuple_type(args=arg_spec.args,
varargs=arg_spec.varargs,
varkw=arg_spec.keywords,
defaults=arg_spec.defaults,
kwonlyargs=[],
kwonlydefaults=None,
annotations={})


def _signature_str(function_name, arg_spec):
"""Helper function to output a function signature"""
# inspect.formatargspec can not deal with the same
# number of arguments in python 2 and 3
arg_spec_for_format = arg_spec[:7 if PY3_OR_LATER else 4]

arg_spec_str = inspect.formatargspec(*arg_spec_for_format)
return '{0}{1}'.format(function_name, arg_spec_str)


def _function_called_str(function_name, args, kwargs):
"""Helper function to output a function call"""
template_str = '{0}({1}, {2})'

args_str = repr(args)[1:-1]
kwargs_str = ', '.join('%s=%s' % (k, v)
for k, v in kwargs.items())
return template_str.format(function_name, args_str,
kwargs_str)


def filter_args(func, ignore_lst, args=(), kwargs=dict()):
""" Filters the given args and kwargs using a list of arguments to
ignore, and a function specification.
Expand All @@ -180,24 +229,23 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
args = list(args)
if isinstance(ignore_lst, _basestring):
# Catch a common mistake
raise ValueError('ignore_lst must be a list of parameters to ignore '
raise ValueError(
'ignore_lst must be a list of parameters to ignore '
'%s (type %s) was given' % (ignore_lst, type(ignore_lst)))
# Special case for functools.partial objects
if (not inspect.ismethod(func) and not inspect.isfunction(func)):
if ignore_lst:
warnings.warn('Cannot inspect object %s, ignore list will '
'not work.' % func, stacklevel=2)
'not work.' % func, stacklevel=2)
return {'*': args, '**': kwargs}
arg_spec = inspect.getargspec(func)
# We need to if/them to account for different versions of Python
if hasattr(arg_spec, 'args'):
arg_names = arg_spec.args
arg_defaults = arg_spec.defaults
arg_keywords = arg_spec.keywords
arg_varargs = arg_spec.varargs
else:
arg_names, arg_varargs, arg_keywords, arg_defaults = arg_spec
arg_defaults = arg_defaults or {}
arg_spec = getfullargspec(func)
arg_names = arg_spec.args + arg_spec.kwonlyargs
arg_defaults = arg_spec.defaults or ()
arg_defaults = arg_defaults + tuple(arg_spec.kwonlydefaults[k]
for k in arg_spec.kwonlyargs)
arg_varargs = arg_spec.varargs
arg_varkw = arg_spec.varkw

if inspect.ismethod(func):
# First argument is 'self', it has been removed by Python
# we need to add it back:
Expand All @@ -211,7 +259,18 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
for arg_position, arg_name in enumerate(arg_names):
if arg_position < len(args):
# Positional argument or keyword argument given as positional
arg_dict[arg_name] = args[arg_position]
if arg_name not in arg_spec.kwonlyargs:
arg_dict[arg_name] = args[arg_position]
else:
raise ValueError(
"Keyword-only parameter '%s' was passed as "
'positional parameter for %s:\n'
' %s was called.'
% (arg_name,
_signature_str(name, arg_spec),
_function_called_str(name, args, kwargs))
)

else:
position = arg_position - len(arg_names)
if arg_name in kwargs:
Expand All @@ -221,28 +280,24 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
arg_dict[arg_name] = arg_defaults[position]
except (IndexError, KeyError):
# Missing argument
raise ValueError('Wrong number of arguments for %s%s:\n'
' %s(%s, %s) was called.'
% (name,
inspect.formatargspec(*inspect.getargspec(func)),
name,
repr(args)[1:-1],
', '.join('%s=%s' % (k, v)
for k, v in kwargs.items())
)
)
raise ValueError(
'Wrong number of arguments for %s:\n'
' %s was called.'
% (_signature_str(name, arg_spec),
_function_called_str(name, args, kwargs))
)

varkwargs = dict()
for arg_name, arg_value in sorted(kwargs.items()):
if arg_name in arg_dict:
arg_dict[arg_name] = arg_value
elif arg_keywords is not None:
elif arg_varkw is not None:
varkwargs[arg_name] = arg_value
else:
raise TypeError("Ignore list for %s() contains an unexpected "
"keyword argument '%s'" % (name, arg_name))

if arg_keywords is not None:
if arg_varkw is not None:
arg_dict['**'] = varkwargs
if arg_varargs is not None:
varargs = args[arg_position + 1:]
Expand All @@ -254,13 +309,10 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
arg_dict.pop(item)
else:
raise ValueError("Ignore list: argument '%s' is not defined for "
"function %s%s" %
(item, name,
inspect.formatargspec(arg_names,
arg_varargs,
arg_keywords,
arg_defaults,
)))
"function %s"
% (item,
_signature_str(name, arg_spec))
)
# XXX: Return a sorted list of pairs?
return arg_dict

Expand Down
41 changes: 30 additions & 11 deletions sklearn/externals/joblib/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,17 @@
# Copyright (c) 2009 Gael Varoquaux
# License: BSD Style, 3 clauses.

import warnings
import pickle
import hashlib
import sys
import types
import struct
from ._compat import _bytes_or_unicode

import io

PY3 = sys.version[0] == '3'
from ._compat import _bytes_or_unicode, PY3_OR_LATER


if PY3:
if PY3_OR_LATER:
Pickler = pickle._Pickler
else:
Pickler = pickle.Pickler
Expand All @@ -30,7 +28,17 @@ class _ConsistentSet(object):
whatever the order of its items.
"""
def __init__(self, set_sequence):
self._sequence = sorted(set_sequence)
# Forces order of elements in set to ensure consistent hash.
try:
# Trying first to order the set assuming the type of elements is
# consistent and orderable.
# This fails on python 3 when elements are unorderable
# but we keep it in a try as it's faster.
self._sequence = sorted(set_sequence)
except TypeError:
# If elements are unorderable, sorting them using their hash.
# This is slower but works in any case.
self._sequence = sorted((hash(e) for e in set_sequence))


class _MyHash(object):
Expand All @@ -49,7 +57,7 @@ def __init__(self, hash_name='md5'):
self.stream = io.BytesIO()
# By default we want a pickle protocol that only changes with
# the major python version and not the minor one
protocol = (pickle.DEFAULT_PROTOCOL if PY3
protocol = (pickle.DEFAULT_PROTOCOL if PY3_OR_LATER
else pickle.HIGHEST_PROTOCOL)
Pickler.__init__(self, self.stream, protocol=protocol)
# Initialise the hash obj
Expand All @@ -59,7 +67,8 @@ def hash(self, obj, return_digest=True):
try:
self.dump(obj)
except pickle.PicklingError as e:
warnings.warn('PicklingError while hashing %r: %r' % (obj, e))
e.args += ('PicklingError while hashing %r: %r' % (obj, e),)
raise
dumps = self.stream.getvalue()
self._hash.update(dumps)
if return_digest:
Expand Down Expand Up @@ -128,8 +137,18 @@ def save_global(self, obj, name=None, pack=struct.pack):
dispatch[type(pickle.dump)] = save_global

def _batch_setitems(self, items):
# forces order of keys in dict to ensure consistent hash
Pickler._batch_setitems(self, iter(sorted(items)))
# forces order of keys in dict to ensure consistent hash.
try:
# Trying first to compare dict assuming the type of keys is
# consistent and orderable.
# This fails on python 3 when keys are unorderable
# but we keep it in a try as it's faster.
Pickler._batch_setitems(self, iter(sorted(items)))
except TypeError:
# If keys are unorderable, sorting them using their hash. This is
# slower but works in any case.
Pickler._batch_setitems(self, iter(sorted((hash(k), v)
for k, v in items)))

def save_set(self, set_items):
# forces order of items in Set to ensure consistent hash
Expand Down Expand Up @@ -182,7 +201,7 @@ def save(self, obj):
# the array is Fortran rather than C contiguous
except (ValueError, BufferError):
# Cater for non-single-segment arrays: this creates a
# copy, and thus alleviates this issue.
# copy, and thus aleviates this issue.
# XXX: There might be a more efficient way of doing this
obj_bytes_view = obj.flatten().view(self.np.uint8)
self._hash.update(self._getbuffer(obj_bytes_view))
Expand Down
Loading
0