10000 ENH: Add replace ufunc for bytes and unicode dtypes · numpy/numpy@19396d2 · GitHub
[go: up one dir, main page]

Skip to content

Commit 19396d2

Browse files
committed
ENH: Add replace ufunc for bytes and unicode dtypes
1 parent 4dc749e commit 19396d2

File tree

7 files changed

+287
-125
lines changed

7 files changed

+287
-125
lines changed

numpy/_core/code_generators/generate_umath.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,6 +1187,11 @@ def english_upper(s):
11871187
docstrings.get('numpy._core.umath.count'),
11881188
None,
11891189
),
1190+
'_replace_impl':
1191+
Ufunc(4, 1, None,
1192+
docstrings.get('numpy.core.umath._replace_impl'),
1193+
None,
1194+
),
11901195
'startswith':
11911196
Ufunc(4, 1, False_,
11921197
docstrings.get('numpy._core.umath.startswith'),

numpy/_core/code_generators/ufunc_docstrings.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4558,6 +4558,13 @@ def add_newdoc(place, name, doc):
45584558
45594559
""")
45604560

4561+
add_newdoc('numpy.core.umath', '_replace_impl',
4562+
"""
4563+
UFunc implementation of ``replace``. This internal function
4564+
is called by ``replace`` with ``out`` set, so that the
4565+
size of the resulting string buffer is known.
4566+
""")
4567+
45614568
add_newdoc('numpy._core.umath', 'startswith',
45624569
"""
45634570
Returns a boolean array which is `True` where the string element

numpy/_core/defchararray.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,8 +1267,8 @@ def replace(a, old, new, count=None):
12671267
>>> np.char.replace(a, 'is', 'was')
12681268
array(['The dwash was fresh', 'Thwas was it'], dtype='<U19')
12691269
"""
1270-
return _to_bytes_or_str_array(
1271-
_vec_string(a, object_, 'replace', [old, new] + _clean_args(count)), a)
1270+
count = count if count is not None else numpy.iinfo(numpy.int64).max
1271+
return numpy._core.umath.replace(a, old, new, count)
12721272

12731273

12741274
@array_function_dispatch(_count_dispatcher)
@@ -2516,7 +2516,7 @@ def replace(self, old, new, count=None):
25162516
char.replace
25172517
25182518
"""
2519-
return asarray(replace(self, old, new, count))
2519+
return replace(self, old, new, count)
25202520

25212521
def rfind(self, sub, start=0, end=None):
25222522
"""

numpy/_core/src/umath/string_buffer.h

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,17 @@ struct Buffer {
6969
return (npy_int64) (tmp - *this + 1);
7070
}
7171

72+
inline npy_int64
73+
rstrip()
74+
{
75+
Buffer tmp(after, 0);
76+
tmp--;
77+
while (tmp >= *this && (*tmp == '\0' || NumPyOS_ascii_isspace(*tmp))) {
78+
tmp--;
79+
}
80+
return (npy_int64) (tmp - *this + 1);
81+
}
82+
7283
inline Buffer<enc>&
7384
operator+=(npy_int64 rhs)
7485
{
@@ -173,28 +184,24 @@ struct Buffer {
173184
}
174185

175186
inline void
176-
buffer_memcpy(void *out, size_t n_chars)
187+
buffer_memcpy(Buffer<enc> out, size_t n_chars)
177188
{
178189
switch (enc) {
179190
case ENCODING::ASCII:
180-
memcpy(out, buf, n_chars);
191+
memcpy(out.buf, buf, n_chars);
181192
break;
182193
case ENCODING::UTF32:
183-
memcpy(out, buf, n_chars * sizeof(npy_ucs4));
194+
memcpy(out.buf, buf, n_chars * sizeof(npy_ucs4));
184195
break;
185196
}
186197
}
187198

188199
inline void
189-
buffer_memcpy_with_offset(void *out, size_t offset, size_t n_chars)
200+
buffer_fill_with_zeros(size_t start_index)
190201
{
191-
switch (enc) {
192-
case ENCODING::ASCII:
193-
buffer_memcpy((char *) out + offset, n_chars);
194-
break;
195-
case ENCODING::UTF32:
196-
buffer_memcpy((char *) out + offset * sizeof(npy_ucs4), n_chars);
197-
break;
202+
Buffer<enc> offset = *this + start_index;
203+
for (char *tmp = offset.buf; tmp < after; tmp++) {
204+
*tmp = 0;
198205
}
199206
}
200207

0 commit comments

Comments
 (0)
0