8000 ENH: Improve support for pathlib.Path objects in load functions by paulmueller · Pull Request #11348 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

ENH: Improve support for pathlib.Path objects in load functions #11348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Nov 1, 2018
Merged
7 changes: 7 additions & 0 deletions doc/release/1.16.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,13 @@ Speedup ``np.take`` for read-only arrays
The implementation of ``np.take`` no longer makes an unnecessary copy of the
source array when its ``writeable`` flag is set to ``False``.

Support path-like objects for more functions
--------------------------------------------
The ``np.core.records.fromfile`` function now supports ``pathlib.Path``
and other path-like objects in addition to a file object. Furthermore, the
``np.load`` function now also supports path-like objects when
using memory mapping (``mmap_mode`` keyword argument).


Changes
=======
Expand Down
18 changes: 11 additions & 7 deletions numpy/core/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

from . import numeric as sb
from . import numerictypes as nt
from numpy.compat import isfileobj, bytes, long, unicode
from numpy.compat import isfileobj, bytes, long, unicode, os_fspath
from .arrayprint import get_printoptions

# All of the functions allow formats to be a dtype
Expand Down Expand Up @@ -737,9 +737,9 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
names=None, titles=None, aligned=False, byteorder=None):
"""Create an array from binary file data

If file is a string then that file is opened, else it is assumed
to be a file object. The file object must support random access
(i.e. it must have tell and seek methods).
If file is a string or a path-like object then that file is opened,
else it is assumed to be a file object. The file object must
support random access (i.e. it must have tell and seek methods).

>>> from tempfile import TemporaryFile
>>> a = np.empty(10,dtype='f8,i4,a5')
Expand All @@ -763,10 +763,14 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
elif isinstance(shape, (int, long)):
shape = (shape,)

name = 0
if isinstance(fd, str):
if isfileobj(fd):
# file already opened
name = 0
else:
# open file
fd = open(os_fspath(fd), 'rb')
name = 1
fd = open(fd, 'rb')

if (offset > 0):
fd.seek(offset, 1)
size = get_remaining_size(fd)
Expand Down
21 changes: 20 additions & 1 deletion numpy/core/tests/test_records.py
8000
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
import pytest

import numpy as np
from numpy.compat import Path
from numpy.testing import (
assert_, assert_equal, assert_array_equal, assert_array_almost_equal,
assert_raises, assert_warns
assert_raises, assert_warns, temppath
)
from numpy.core.numeric import pickle

Expand Down Expand Up @@ -325,6 +326,24 @@ def test_zero_width_strings(self):
assert_equal(rec['f1'], [b'', b'', b''])


@pytest.mark.skipif(Path is None, reason="No pathlib.Path")
class TestPathUsage(object):
# Test that pathlib.Path can be used
def test_tofile_fromfile(self):
with temppath(suffix='.bin') as path:
path = Path(path)
a = np.empty(10, dtype='f8,i4,a5')
a[5] = (0.5,10,'abcde')
a.newbyteorder('<')
with path.open("wb") as fd:
a.tofile(fd)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ndarray.tofile() discards information on endianness and most recent ARM features switchable endianness (bi-endian)

I wonder if this is contributing to #12330.

Certainly this test has just started failing stochastically on shippable / ARM for 2.7 / 3.7, so it may not have been stable in the first place given how recently this was merged.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wasn't the problem -- I'm preparing the patch now in the linked PR, which is related to testing precision and np.empty() as far as I can tell.

x = np.core.records.fromfile(path,
formats='f8,i4,a5',
shape=10,
byteorder='<')
assert_array_equal(x, a)


class TestRecord(object):
def setup(self):
self.data = np.rec.fromrecords([(1, 2, 3), (4, 5, 6)],
Expand Down
16 changes: 9 additions & 7 deletions numpy/lib/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@
import io
import warnings
from numpy.lib.utils import safe_eval
from numpy.compat import asbytes, asstr, isfileobj, long, basestring
from numpy.compat import (
asbytes, asstr, isfileobj, long, os_fspath
)
from numpy.core.numeric import pickle


Expand Down Expand Up @@ -706,7 +708,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,

Parameters
----------
filename : str
filename : str or path-like
The name of the file on disk. This may *not* be a file-like
object.
mode : str, optional
Expand Down Expand Up @@ -747,9 +749,9 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
memmap

"""
if not isinstance(filename, basestring):
raise ValueError("Filename must be a string. Memmap cannot use"
" existing file handles.")
if isfileobj(filename):
raise ValueError("Filename must be a string or a path-like object."
" Memmap cannot use existing file handles.")

if 'w' in mode:
# We are creating the file, not reading it.
Expand All @@ -767,7 +769,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
shape=shape,
)
# If we got here, then it should be safe to create the file.
fp = open(filename, mode+'b')
fp = open(os_fspath(filename), mode+'b')
try:
used_ver = _write_array_header(fp, d, version)
# this warning can be removed when 1.9 has aged enough
Expand All @@ -779,7 +781,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
fp.close()
else:
# Read the header of the file first.
fp = open(filename, 'rb')
fp = open(os_fspath(filename), 'rb')
try:
version = read_magic(fp)
_check_version(version)
Expand Down
26 changes: 25 additions & 1 deletion numpy/lib/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2295,11 +2295,35 @@ def test_loadtxt(self):
assert_array_equal(x, a)

def test_save_load(self):
# Test that pathlib.Path instances can be used with savez.
# Test that pathlib.Path instances can be used with save.
with temppath(suffix='.npy') as path:
path = Path(path)
a = np.array([[1, 2], [3, 4]], int)
np.save(path, a)
data = np.load(path)
assert_array_equal(data, a)

def test_save_load_memmap(self):
# Test that pathlib.Path instances can be loaded mem-mapped.
with temppath(suffix='.npy') as path:
path = Path(path)
a = np.array([[1, 2], [3, 4]], int)
np.save(path, a)
data = np.load(path, mmap_mode='r')
assert_array_equal(data, a)
# close the mem-mapped file
del data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the rationale for this line? A comment might be nice

Copy link
Contributor Author
@paulmueller paulmueller Oct 13, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is required such that numpy closes the mem-mapped file. Otherwise you will get weird errors on Windows.


def test_save_load_memmap_readwrite(self):
# Test that pathlib.Path instances can be written mem-mapped.
with temppath(suffix='.npy') as path:
path = Path(path)
a = np.array([[1, 2], [3, 4]], int)
np.save(path, a)
b = np.load(path, mmap_mode='r+')
a[0][0] = 5
b[0][0] = 5
del b # closes the file
data = np.load(path)
assert_array_equal(data, a)

Expand Down
0