8000 ENH: Improve support for pathlib.Path objects in load functions by paulmueller · Pull Request #11348 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

ENH: Improve support for pathlib.Path objects in load functions #11348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Nov 1, 2018
Merged
6 changes: 6 additions & 0 deletions doc/release/1.15.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,12 @@ Like ``np.percentile`` and ``np.nanpercentile``, but takes quantiles in [0, 1]
rather than percentiles in [0, 100]. ``np.percentile`` is now a thin wrapper
around ``np.quantile`` with the extra step of dividing by 100.

``pathlib.Path`` support for more functions
-------------------------------------------
The ``np.core.records.fromfile`` function now supports ``pathlib.Path``
objects in addition to a string or a file object. Furthermore, the
``np.load`` function now also supports ``pathlib.Path`` objects when
using memory mapping (``mmap_mode`` keyword argument).

Build system
------------
Expand Down
11 changes: 7 additions & 4 deletions numpy/core/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

from . import numeric as sb
from . import numerictypes as nt
from numpy.compat import isfileobj, bytes, long
from numpy.compat import isfileobj, bytes, long, is_pathlib_path
from .arrayprint import get_printoptions

# All of the functions allow formats to be a dtype
Expand Down Expand Up @@ -737,9 +737,9 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
names=None, titles=None, aligned=False, byteorder=None):
"""Create an array from binary file data

If file is a string then that file is opened, else it is assumed
to be a file object. The file object must support random access
(i.e. it must have tell and seek methods).
If file is a string or a pathlib.Path then that file is opened,
else it is assumed to be a file object. The file object must
support random access (i.e. it must have tell and seek methods).

>>> from tempfile import TemporaryFile
>>> a = np.empty(10,dtype='f8,i4,a5')
Expand Down Expand Up @@ -767,6 +767,9 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
if isinstance(fd, str):
name = 1
fd = open(fd, 'rb')
elif is_pathlib_path(fd):
name = 1
fd = fd.open('rb')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious, what happens when fd is a unicode string on python2?

if (offset > 0):
fd.seek(offset, 1)
size = get_remaining_size(fd)
Expand Down
21 changes: 20 additions & 1 deletion numpy/core/tests/test_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
import pytest

import numpy as np
from numpy.compat import Path
from numpy.testing import (
assert_, assert_equal, assert_array_equal, assert_array_almost_equal,
assert_raises, assert_warns
assert_raises, assert_warns, temppath
)


Expand Down Expand Up @@ -325,6 +326,24 @@ def test_zero_width_strings(self):
assert_equal(rec['f1'], [b'', b'', b''])


@pytest.mark.skipif(Path is None, reason="No pathlib.Path")
class TestPathUsage(object):
# Test that pathlib.Path can be used
def test_tofile_fromfile(self):
with temppath(suffix='.bin') as path:
path = Path(path)
a = np.empty(10, dtype='f8,i4,a5')
a[5] = (0.5,10,'abcde')
a.newbyteorder('<')
with path.open("wb") as fd:
a.tofile(fd)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ndarray.tofile() discards information on endianness and most recent ARM features switchable endianness (bi-endian)

I wonder if this is contributing to #12330.

Certainly this test has just started failing stochastically on shippable / ARM for 2.7 / 3.7, so it may not have been stable in the first place given how recently this was merged.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wasn't the problem -- I'm preparing the patch now in the linked PR, which is related to testing precision and np.empty() as far as I can tell.

x = np.core.records.fromfile(path,
formats='f8,i4,a5',
shape=10,
byteorder='<')
assert_array_equal(x, a)


class TestRecord(object):
def setup(self):
self.data = np.rec.fromrecords([(1, 2, 3), (4, 5, 6)],
Expand Down
22 changes: 15 additions & 7 deletions numpy/lib/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@
import io
import warnings
from numpy.lib.utils import safe_eval
from numpy.compat import asbytes, asstr, isfileobj, long, basestring
from numpy.compat import (
asbytes, asstr, isfileobj, long, basestring, is_pathlib_path
)

if sys.version_info[0] >= 3:
import pickle
Expand Down Expand Up @@ -709,7 +711,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,

Parameters
----------
filename : str
filename : str or pathlib.Path instance
The name of the file on disk. This may *not* be a file-like
object.
mode : str, optional
Expand Down Expand Up @@ -750,9 +752,9 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
memmap

"""
if not isinstance(filename, basestring):
raise ValueError("Filename must be a string. Memmap cannot use"
" existing file handles.")
if not (isinstance(filename, basestring) or is_pathlib_path(filename)):
raise ValueError("Filename must be a string or a pathlib.Path."
" Memmap cannot use existing file handles.")

if 'w' in mode:
# We are creating the file, not reading it.
Expand All @@ -770,7 +772,10 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
shape=shape,
)
# If we got here, then it should be safe to create the file.
fp = open(filename, mode+'b')
if is_pathlib_path(filename):
fp = filename.open(mode+'b')
else:
fp = open(filename, mode+'b')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better as

from numpy.compat import os_fspath

fp = open(os_fspath(filename), mode+'b')

Since this handles PurePath and third-party pathlike objects

try:
used_ver = _write_array_header(fp, d, version)
# this warning can be removed when 1.9 has aged enough
Expand All @@ -782,7 +787,10 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
fp.close()
else:
# Read the header of the file first.
fp = open(filename, 'rb')
if is_pathlib_path(filename):
fp = filename.open('rb')
else:
fp = open(filename, 'rb')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment here

try:
version = read_magic(fp)
_check_version(version)
Expand Down
25 changes: 24 additions & 1 deletion numpy/lib/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2252,11 +2252,34 @@ def test_loadtxt(self):
assert_array_equal(x, a)

def test_save_load(self):
# Test that pathlib.Path instances can be used with savez.
# Test that pathlib.Path instances can be used with save.
with temppath(suffix='.npy') as path:
path = Path(path)
a = np.array([[1, 2], [3, 4]], int)
np.save(path, a)
data = np.load(path)
assert_array_equal(data, a)

def test_save_load_memmap(self):
# Test that pathlib.Path instances can be loaded mem-mapped.
with temppath(suffix='.npy') as path:
path = Path(path)
a = np.array([[1, 2], [3, 4]], int)
np.save(path, a)
data = np.load(path, mmap_mode='r')
assert_array_equal(data, a)
del data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the rationale for this line? A comment might be nice

Copy link
Contributor Author
@paulmueller paulmueller Oct 13, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is required such that numpy closes the mem-mapped file. Otherwise you will get weird errors on Windows.


def test_save_load_memmap_readwrite(self):
# Test that pathlib.Path instances can be written mem-mapped.
with temppath(suffix='.npy') as path:
path = Path(path)
a = np.array([[1, 2], [3, 4]], int)
np.save(path, a)
b = np.load(path, mmap_mode='r+')
a[0][0] = 5
b[0][0] = 5
del b # closes the file
data = np.load(path)
assert_array_equal(data, a)

Expand Down
0