8000 py/objstr: Don't treat bytes as unicode in str.count. · micropython/micropython@9d6f474 · GitHub
[go: up one dir, main page]

Skip to content

Commit 9d6f474

Browse files
jimmodpgeorge
authored andcommitted
py/objstr: Don't treat bytes as unicode in str.count.
`b'\xaa \xaa'.count(b'\xaa')` now (correctly) returns 2 instead of 1. Fixes issue #9404. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
1 parent dd9dcb5 commit 9d6f474

File tree

2 files changed

+10
-1
lines changed

2 files changed

+10
-1
lines changed

py/objstr.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1768,14 +1768,16 @@ STATIC mp_obj_t str_count(size_t n_args, const mp_obj_t *args) {
17681768
return MP_OBJ_NEW_SMALL_INT(utf8_charlen(start, end - start) + 1);
17691769
}
17701770

1771+
bool is_str = self_type == &mp_type_str;
1772+
17711773
// count the occurrences
17721774
mp_int_t num_occurrences = 0;
17731775
for (const byte *haystack_ptr = start; haystack_ptr + needle_len <= end;) {
17741776
if (memcmp(haystack_ptr, needle, needle_len) == 0) {
17751777
num_occurrences++;
17761778
haystack_ptr += needle_len;
17771779
} else {
1778-
haystack_ptr = utf8_next_char(haystack_ptr);
1780+
haystack_ptr = is_str ? utf8_next_char(haystack_ptr) : haystack_ptr + 1;
17791781
}
17801782
}
17811783

tests/basics/bytes_count.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@
4848
print(b"aaaa".count(b'a', -1, 5))
4949
print(b"abbabba".count(b"abba"))
5050

51+
print(b'\xaa \xaa'.count(b'\xaa'))
52+
print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'))
53+
print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'), 1)
54+
print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'), 2)
55+
print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'), 1, 3)
56+
print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'), 2, 3)
57+
5158
def t():
5259
return True
5360

0 commit comments

Comments
 (0)
0