8000 使用kmp算法实现字符串搜索 · ssjssh/javaalgorithm@0647808 · GitHub
[go: up one dir, main page]

Skip to content

Commit 0647808

Browse files
author
shengshijun
committed
使用kmp算法实现字符串搜索
1 parent c203d5e commit 0647808

File tree

2 files changed

+138
-8
lines changed

2 files changed

+138
-8
lines changed

src/main/java/ssj/algorithm/string/StringUntil.java

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,63 @@ public static String compress(String str) {
165165
return (com_str.length() < str.length()) ? com_str : str;
166166
}
167167

168+
/**
169+
* kmp算法实现,返回第一个匹配发生的位置
170+
*
171+
* @param str
172+
8000 * @param pattern
173+
* @return
174+
*/
175+
public static int search(String str, String pattern) {
176+
Preconditions.checkNotNull(str);
177+
Preconditions.checkNotNull(pattern);
178+
Preconditions.checkArgument(!pattern.isEmpty());
179+
180+
int[] next_arr = buildNext(pattern);
181+
182+
for (int cur_index = 0, pattern_index = 0; cur_index < str.length(); cur_index++) {
183+
while (pattern_index > 0 && str.charAt(cur_index) != pattern.charAt(pattern_index)) {
184+
pattern_index = next_arr[pattern_index - 1];
185+
}
186+
187+
if (pattern.charAt(pattern_index) == str.charAt(cur_index)) {
188+
pattern_index++;
189+
}
190+
191+
if (pattern_index == pattern.length()) {
192+
return cur_index - pattern_index + 1;
193+
}
194+
}
168195

169-
public int search(String str) {
170-
//TODO KMP实现字符查找
171196
return -1;
172197
}
173198

199+
/**
200+
* 创建数组对应移动表
201+
* A B C D A B D
202+
* 0 0 0 0 1 2 0
203+
* 使用这种表示格式近似表示移动位置,但是在不匹配发生的时候都是用前面元素的位置作为移动
204+
* 比如如果最后一个D发送不匹配时,使用B的移动位置也就是2移动
205+
*
206+
* @param pattern
207+
* @return
208+
*/
209+
private static int[] buildNext(String pattern) {
210+
int[] result = new int[pattern.length()];
211+
for (int cur_index = 2, last_match = 0; cur_index < pattern.length(); cur_index++) {
212+
while (last_match > 0 && pattern.charAt(cur_index) != pattern.charAt(last_match)) {
213+
last_match = result[last_match];
214+
}
215+
216+
if (pattern.charAt(cur_index) == pattern.charAt(last_match)) {
217+
last_match++;
218+
}
219+
220+
result[cur_index] = last_match;
221+
}
222+
return result;
223+
}
224+
174225
/**
175226
* 要处理变位词需要对输入的单词进行两次排序
176227
* 1, 按照字母表顺序对每一个串中的字母进行排序

src/test/java/ssj/algorithm/Main.java

Lines changed: 85 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,97 @@
11
package ssj.algorithm;
22

3-
import ssj.algorithm.collections.AVLTree;
3+
import ssj.algorithm.string.StringUntil;
44

55
/**
66
* Created by shenshijun on 15/2/1.
77
*/
88
public class Main {
99

1010
public static void main(String[] args) {
11-
int[] data = new int[]{1, 2, 3, 4, 34, 1, 234};
12-
AVLTree<Integer> tree = new AVLTree<>();
13-
for (int i : data) {
14-
tree.add(i);
11+
System.out.println(StringUntil.search("BBC ABCDAB ABCDABCDABDE", "ABCDABD"));
12+
}
13+
14+
/**
15+
* Returns the index within this string of the first occurrence of the
16+
* specified substring. If it is not a substring, return -1.
17+
*
18+
* @param haystack The string to be scanned
19+
* @param needle The target string to search
20+
* @return The start index of the substring
21+
*/
22+
public static int indexOf(char[] haystack, char[] needle) {
23+
if (needle.length == 0) {
< 6D40 /code>24+
return 0;
25+
}
26+
int charTable[] = makeCharTable(needle);
27+
int offsetTable[] = makeOffsetTable(needle);
28+
for (int i = needle.length - 1, j; i < haystack.length; ) {
29+
for (j = needle.length - 1; needle[j] == haystack[i]; --i, --j) {
30+
if (j == 0) {
31+
return i;
32+
}
33+
}
34+
// i += needle.length - j; // For naive method
35+
i += Math.max(offsetTable[needle.length - 1 - j], charTable[haystack[i]]);
36+
}
37+
return -1;
38+
}
39+
40+
/**
41+
* Makes the jump table based on the mismatched character information.
42+
*/
43+
private static int[] makeCharTable(char[] needle) {
44+
final int ALPHABET_SIZE = 256;
45+
int[] table = new int[ALPHABET_SIZE];
46+
for (int i = 0; i < table.length; ++i) {
47+
table[i] = needle.length;
48+
}
49+
for (int i = 0; i < needle.length - 1; ++i) {
50+
table[needle[i]] = needle.length - 1 - i;
51+
}
52+
return table;
53+
}
54+
55+
/**
56+
* Makes the jump table based on the scan offset which mismatch occurs.
57+
*/
58+
private static int[] makeOffsetTable(char[] needle) {
59+
int[] table = new int[needle.length];
60+
int lastPrefixPosition = needle.length;
61+
for (int i = needle.length - 1; i >= 0; --i) {
62+
if (isPrefix(needle, i + 1)) {
63+
lastPrefixPosition = i + 1;
64+
}
65+
table[needle.length - 1 - i] = lastPrefixPosition - i + needle.length - 1;
66+
}
67+
for (int i = 0; i < needle.length - 1; ++i) {
68+
int slen = suffixLength(needle, i);
69+
table[slen] = needle.length - 1 - i + slen;
70+
}
71+
return table;
72+
}
73+
74+
/**
75+
* Is needle[p:end] a prefix of needle?
76+
*/
77+
private static boolean isPrefix(char[] needle, int p) {
78+
for (int i = p, j = 0; i < needle.length; ++i, ++j) {
79+
if (needle[i] != needle[j]) {
80+
return false;
81+
}
82+
}
83+
return true;
84+
}
85+
86+
/**
87+
* Returns the maximum length of the substring ends at p and is a suffix.
88+
*/
89+
private static int suffixLength(char[] needle, int p) {
90+
int len = 0;
91+
for (int i = p, j = needle.length - 1;
92+
i >= 0 && needle[i] == needle[j]; --i, --j) {
93+
len += 1;
1594
}
16-
System.out.println(tree);
95+
return len;
1796
}
1897
}

0 commit comments

Comments
 (0)
0