-
Notifications
You must be signed in to change notification settings - Fork 4.7k
Expand file tree
/
Copy pathknuth_morris_pratt.py
More file actions
55 lines (43 loc) · 1.75 KB
/
knuth_morris_pratt.py
File metadata and controls
55 lines (43 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""
Knuth-Morris-Pratt String Search
Given two sequences (text and pattern), return the list of start indexes in
text that match the pattern using the KMP algorithm.
Reference: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
Complexity:
Time: O(n + m) where n = len(text), m = len(pattern)
Space: O(m) for the prefix table
"""
from __future__ import annotations
from collections.abc import Sequence
def knuth_morris_pratt(text: Sequence, pattern: Sequence) -> list[int]:
"""Find all occurrences of pattern in text using the KMP algorithm.
Args:
text: The sequence to search in.
pattern: The pattern sequence to search for.
Returns:
A list of starting indices where pattern occurs in text.
Examples:
>>> knuth_morris_pratt('hello there hero!', 'he')
[0, 7, 12]
"""
text_length = len(text)
pattern_length = len(pattern)
prefix_table = [0 for _ in range(pattern_length)]
match_length = 0
for index in range(1, pattern_length):
while match_length and pattern[index] != pattern[match_length]:
match_length = prefix_table[match_length - 1]
if pattern[index] == pattern[match_length]:
match_length += 1
prefix_table[index] = match_length
match_length = 0
matches: list[int] = []
for index in range(text_length):
while match_length and text[index] != pattern[match_length]:
match_length = prefix_table[match_length - 1]
if text[index] == pattern[match_length]:
match_length += 1
if match_length == pattern_length:
matches.append(index - pattern_length + 1)
match_length = prefix_table[match_length - 1]
return matches