8000 use memoryviews instead of ndarrays by jbrockmendel · Pull Request #22147 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

use memoryviews instead of ndarrays #22147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 1, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
use memoryviews instead of ndarrays
  • Loading branch information
jbrockmendel committed Jul 31, 2018
commit 59140c36cecaa9e6e6ec76048f4ff103c1d132c3
8 changes: 4 additions & 4 deletions pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import cython

import numpy as np
from numpy cimport ndarray, uint8_t, uint32_t, uint64_t
from numpy cimport uint8_t, uint32_t, uint64_t

from util cimport _checknull
from cpython cimport (PyBytes_Check,
Expand All @@ -17,7 +17,7 @@ DEF dROUNDS = 4


@cython.boundscheck(False)
def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
def hash_object_array(object[:] arr, object key, object encoding='utf8'):
"""
Parameters
----------
Expand All @@ -37,7 +37,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
"""
cdef:
Py_ssize_t i, l, n
ndarray[uint64_t] result
uint64_t[:] result
bytes data, k
uint8_t *kb
uint64_t *lens
Expand Down Expand Up @@ -89,7 +89,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):

free(vecs)
free(lens)
return result
return result.base # .base to retrieve underlying np.ndarray


cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil:
Expand Down
7 changes: 4 additions & 3 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,7 @@ cdef inline object create_time_from_ts(
return time(dts.hour, dts.min, dts.sec, dts.us)


def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,
box="datetime"):
def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"):
"""
Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp

Expand All @@ -102,7 +101,9 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,

cdef:
Py_ssize_t i, n = len(arr)
ndarray[int64_t] trans, deltas
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t pos
npy_datetimestruct dts
object dt
int64_t value, delta
Expand Down
22 changes: 14 additions & 8 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
Sets obj.tzinfo inplace, alters obj.dts inplace.
"""
cdef:
ndarray[int64_t] trans, deltas
ndarray[int64_t] trans
int64_t[:] deltas
int64_t local_val
Py_ssize_t pos

Expand Down Expand Up @@ -632,14 +633,15 @@ cdef inline int64_t[:] _tz_convert_dst(ndarray[int64_t] values, tzinfo tz,
Py_ssize_t n = len(values)
Py_ssize_t i, j, pos
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] tt, trans, deltas
ndarray[int64_t] tt, trans
int64_t[:] deltas
ndarray[Py_ssize_t] posn
int64_t v

trans, deltas, typ = get_dst_info(tz)
if not to_utc:
# We add `offset` below instead of subtracting it
deltas = -1 * deltas
deltas = -1 * deltas.base # `.base` to access underlying ndarray

tt = values[values != NPY_NAT]
if not len(tt):
Expand Down Expand Up @@ -728,7 +730,8 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
converted: int64
"""
cdef:
ndarray[int64_t] trans, deltas
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t pos
int64_t v, offset, utc_date
npy_datetimestruct dts
Expand Down Expand Up @@ -843,7 +846,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
localized : ndarray[int64_t]
"""
cdef:
ndarray[int64_t] trans, deltas, idx_shifted
ndarray[int64_t] trans, idx_shifted
int64_t[:] deltas
ndarray ambiguous_array
Py_ssize_t i, idx, pos, ntrans, n = len(vals)
int64_t *tdata
Expand Down Expand Up @@ -1124,7 +1128,8 @@ cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz):
cdef:
Py_ssize_t n = len(stamps)
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] trans, deltas
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t[:] pos
npy_datetimestruct dts
int64_t delta
Expand Down Expand Up @@ -1190,7 +1195,7 @@ cdef inline int64_t _normalized_stamp(npy_datetimestruct *dts) nogil:
return dtstruct_to_dt64(dts)


def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
def is_date_array_normalized(int64_t[:] stamps, tz=None):
"""
Check if all of the given (nanosecond) timestamps are normalized to
midnight, i.e. hour == minute == second == 0. If the optional timezone
Expand All @@ -1207,7 +1212,8 @@ def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
"""
cdef:
Py_ssize_t i, n = len(stamps)
ndarray[int64_t] trans, deltas
ndarray[int64_t] trans
int64_t[:] deltas
npy_datetimestruct dts
int64_t local_val, delta

Expand Down
37 changes: 18 additions & 19 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ from cpython.datetime cimport datetime
import time

import numpy as np
from numpy cimport ndarray

# Avoid import from outside _libs
if sys.version_info.major == 2:
Expand Down Expand Up @@ -381,11 +380,11 @@ cpdef object _get_rule_month(object source, object default='DEC'):
# Parsing for type-inference


def try_parse_dates(ndarray[object] values, parser=None,
def try_parse_dates(object[:] values, parser=None,
dayfirst=False, default=None):
cdef:
Py_ssize_t i, n
ndarray[object] result
object[:] result

n = len(values)
result = np.empty(n, dtype='O')
Expand Down Expand Up @@ -420,15 +419,15 @@ def try_parse_dates(ndarray[object] values, parser=None,
# raise if passed parser and it failed
raise

return result
return result.base # .base to access underlying ndarray


def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
def try_parse_date_and_time(object[:] dates, object[:] times,
date_parser=None, time_parser=None,
dayfirst=False, default=None):
cdef:
Py_ssize_t i, n
ndarray[object] result
object[:] result

n = len(dates)
if len(times) != n:
Expand Down Expand Up @@ -457,14 +456,14 @@ def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
result[i] = datetime(d.year, d.month, d.day,
t.hour, t.minute, t.second)

return result
return result.base # .base to access underlying ndarray


def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
ndarray[object] days):
def try_parse_year_month_day(object[:] years, object[:] months,
object[:] days):
cdef:
Py_ssize_t i, n
ndarray[object] result
object[:] result

n = len(years)
if len(months) != n or len(days) != n:
Expand All @@ -474,19 +473,19 @@ def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
for i in range(n):
result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))

return result
return result.base # .base to access underlying ndarray


def try_parse_datetime_components(ndarray[object] years,
ndarray[object] months,
ndarray[object] days,
ndarray[object] hours,
ndarray[object] minutes,
ndarray[object] seconds):
def try_parse_datetime_components(object[:] years,
object[:] months,
object[:] days,
object[:] hours,
object[:] minutes,
object[:] seconds):

cdef:
Py_ssize_t i, n
ndarray[object] result
object[:] result< F438 /span>
int secs
double float_secs
double micros
Expand All @@ -509,7 +508,7 @@ def try_parse_datetime_components(ndarray[object] years,
int(hours[i]), int(minutes[i]), secs,
int(micros))

return result
return result.base # .base to access underlying ndarray


# ----------------------------------------------------------------------
Expand Down
39 changes: 20 additions & 19 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -423,13 +423,13 @@ cdef inline int month_to_quarter(int month):

@cython.wraparound(False)
@cython.boundscheck(False)
def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None):
def dt64arr_to_periodarr(int64_t[:] dtarr, int freq, tz=None):
"""
Convert array of datetime64 values (passed in as 'i8' dtype) to a set of
periods corresponding to desired frequency, per period convention.
"""
cdef:
ndarray[int64_t] out
int64_t[:] out
Py_ssize_t i, l
npy_datetimestruct dts

Expand All @@ -447,18 +447,18 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None):
out[i] = get_period_ordinal(&dts, freq)
else:
out = localize_dt64arr_to_period(dtarr, freq, tz)
return out
return out.base # .base to access underlying np.ndarray


@cython.wraparound(False)
@cython.boundscheck(False)
def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq):
def periodarr_to_dt64arr(int64_t[:] periodarr, int freq):
"""
Convert array to datetime64 values from a set of ordinals corresponding to
periods per period convention.
"""
cdef:
ndarray[int64_t] out
int64_t[:] out
Py_ssize_t i, l

l = len(periodarr)
Expand All @@ -472,7 +472,7 @@ def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq):
continue
out[i] = period_ordinal_to_dt64(periodarr[i], freq)

return out
return out.base # .base to access underlying np.ndarray


cpdef int64_t period_asfreq(int64_t ordinal, int freq1, int freq2, bint end):
Expand Down Expand Up @@ -556,7 +556,7 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end):
if upsampling, choose to use start ('S') or end ('E') of period.
"""
cdef:
ndarray[int64_t] result
int64_t[:] result
Py_ssize_t i, n
freq_conv_func func
asfreq_info af_info
Expand Down Expand Up @@ -584,7 +584,7 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end):
raise ValueError("Unable to convert to desired frequency.")
result[i] = val

return result
return result.base # .base to access underlying np.ndarray


cpdef int64_t period_ordinal(int y, int m, int d, int h, int min,
Expand Down Expand Up @@ -825,10 +825,10 @@ cdef int pdays_in_month(int64_t ordinal, int freq):
return ccalendar.get_days_in_month(dts.year, dts.month)


def get_period_field_arr(int code, ndarray[int64_t] arr, int freq):
def get_period_field_arr(int code, int64_t[:] arr, int freq):
cdef:
Py_ssize_t i, sz
ndarray[int64_t] out
int64_t[:] out
accessor f

func = _get_accessor_func(code)
Expand All @@ -844,7 +844,7 @@ def get_period_field_arr(int code, ndarray[int64_t] arr, int freq):
continue
out[i] = func(arr[i], freq)

return out
return out.base # .base to access underlying np.ndarray


cdef accessor _get_accessor_func(int code):
Expand Down Expand Up @@ -875,10 +875,10 @@ cdef accessor _get_accessor_func(int code):
return NULL


def extract_ordinals(ndarray[object] values, freq):
def extract_ordinals(object[:] values, freq):
cdef:
Py_ssize_t i, n = len(values)
ndarray[int64_t] ordinals = np.empty(n, dtype=np.int64)
int64_t[:] ordinals = np.empty(n, dtype=np.int64)
object p

freqstr = Period._maybe_convert_freq(freq).freqstr
Expand All @@ -904,10 +904,10 @@ def extract_ordinals(ndarray[object] values, freq):
else:
ordinals[i] = p.ordinal

return ordinals
return ordinals.base # .base to access underlying np.ndarray


def extract_freq(ndarray[object] values):
def extract_freq(object[:] values):
cdef:
Py_ssize_t i, n = len(values)
object p
Expand All @@ -930,12 +930,13 @@ def extract_freq(ndarray[object] values):

@cython.wraparound(False)
@cython.boundscheck(False)
cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
int freq, object tz):
cdef int64_t[:] localize_dt64arr_to_period(int64_t[:] stamps,
int freq, object tz):
cdef:
Py_ssize_t n = len(stamps)
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] trans, deltas
int64_t[:] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t[:] pos
npy_datetimestruct dts
int64_t local_val
Expand Down
7 changes: 4 additions & 3 deletions pandas/_libs/tslibs/resolution.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ cdef int RESO_DAY = 6

# ----------------------------------------------------------------------

cpdef resolution(ndarray[int64_t] stamps, tz=None):
cpdef resolution(int64_t[:] stamps, tz=None):
cdef:
Py_ssize_t i, n = len(stamps)
npy_datetimestruct dts
Expand All @@ -42,11 +42,12 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None):
return _reso_local(stamps, tz)


cdef _reso_local(ndarray[int64_t] stamps, object tz):
cdef _reso_local(int64_t[:] stamps, object tz):
cdef:
Py_ssize_t i, n = len(stamps)
int reso = RESO_DAY, curr_reso
ndarray[int64_t] trans, deltas
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t[:] pos
npy_datetimestruct dts
int64_t local_val, delta
Expand Down
Loading
0