0%

数据结构与算法之字符串的模式匹配

本节主要介绍了栈和队列的实现及其应用。

朴素模式匹配

image-20221212123315037

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
//在主串s中找子串t,若找到返回字串在主串中的索引;若没找到返回-1
#include <iostream>
#include <string>

using namespace std;

int Index(string s, string t){
int lens = s.length();//计算串s、t的长度
int lent = t.length();
int i = 0;
int j = 0;
while (i < lens&&j < lent){//如果i、j都各自小于lens和lent
if (t[j] == s[i]){//如果子串的t[j]和主串的s[i]相等
++i;//各自索引都自增
++j;
}
else{//否则,主串的索引比刚开始后移一个;子串的索引变为0
i = i - j + 1;
j = 0;
}
}
if (j == lent){//如果最后j和lent的大小一样,证明找到了,返回子串在主串中的索引
return i - lent;
}
else{//否则返回-1
return -1;
}
}

int main(){
string s = "goodgoogle";
string t = "google";
int pos = Index(s, t);
if (pos != -1){
cout << "find " << t << " at the index " << pos << " of " << s << endl;
}
else{
cout << "can't find " << t << " in " << s << endl;
}
return 0;
}

结果如下:

1
find google at the index 4 of goodgoogle

KMP算法

  • 算法核心:通过next数组控制出现不匹配时字串回退的位置

  • next数组中存储的值为最长公共前后缀+1

    image-20221212161105967

    image-20221212161233960

  • 公共前后缀:

    image-20221212160823648

    image-20221212160844675

  • 具体原理解释见:【天勤考研】KMP算法易懂版_哔哩哔哩_bilibili

  • 设置next[0]=0时:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    //在主串s中找子串t,若找到返回字串在主串中的索引;若没找到返回-1
    #include <iostream>
    #include <string>

    using namespace std;

    void get_next(string t, int *next);
    int Index_KMP(string s, string t);

    int main(){
    string s = "ababaacababaaababaa";
    string t = "ababaaababaa";
    int pos = Index_KMP(s, t);
    if (pos != -1){
    cout << "find " << t << " at the index " << pos << " of " << s << endl;
    }
    else{
    cout << "can't find " << t << " in " << s << endl;
    }

    return 0;
    }

    int Index_KMP(string s, string t){
    int lens = s.length();
    int lent = t.length();
    int *next = new int[lent];
    get_next(t, next); //对子串t作分析,得到next数组
    cout << "next: "; //输出next测试而已
    for (int i = 0; i < lent; ++i){
    cout << next[i];
    }
    cout << endl;
    int i = 0;
    int j = 0;
    while (i < lens&&j < lent){//两字母相等则继续,与朴素算法增加了j==0的判断
    if (j == 0 || t[j] == s[i]){
    ++i;
    ++j;
    }
    else{
    j = next[j]-1;//j退回合适位置,i值不变
    cout<<"new j:"<<j<<endl;
    }
    }
    if (j == lent){//如果最j和lent的大小一样,证明找到了,返回子串在主串中的索引
    return i - lent;
    }
    else{//否则返回-1
    return -1;
    }
    }

    void get_next(string t, int *next){
    int lent = t.length();
    int i = 0;
    int j = 0;
    next[0] = 0;
    while (i < lent){//i小于t的长度
    if (j == 0 || t[i] == t[j - 1]){//t[i]表示后缀的单个字符
    ++i; //t[j]表示前缀的单个字符
    ++j;
    next[i] = j;
    }
    else{
    j = next[j - 1]; //若字符不相同,则j值回溯
    }
    }
    }

    结果如下:

    1
    2
    3
    4
    5
    next: 011234223456
    new j:1
    new j:0
    new j:0
    find ababaaababaa at the index 7 of ababaacababaaababaa
  • 设置next[0]=-1时

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    //在主串s中找子串t,若找到返回字串在主串中的索引;若没找到返回-1
    #include <iostream>
    #include <string>

    using namespace std;

    void get_next(string t, int *next);
    int Index_KMP(string s, string t);

    int main(){
    string s = "ababaacababaaababaa";
    string t = "ababaaababaa";
    int pos = Index_KMP(s, t);
    if (pos != -1){
    cout << "find " << t << " at the index " << pos << " of " << s << endl;
    }
    else{
    cout << "can't find " << t << " in " << s << endl;
    }

    return 0;
    }

    int Index_KMP(string s, string t){
    int lens = s.length();
    int lent = t.length();
    int *next = new int[lent];
    get_next(t, next); //对子串t作分析,得到next数组
    cout << "next: "; //输出next测试而已
    for (int i = 0; i < lent; ++i){
    cout << next[i];
    }
    cout << endl;
    int i = 0;
    int j = 0;
    while (i < lens&&j < lent){//两字母相等则继续,与朴素算法增加了j==-1的判断
    if (j == -1 || t[j] == s[i]){
    ++i;
    ++j;
    }
    else{
    j = next[j];//j退回合适位置,i值不变
    cout<<"new j:"<<j<<endl;
    }
    }
    if (j == lent){//如果最j和lent的大小一样,证明找到了,返回子串在主串中的索引
    return i - lent;
    }
    else{//否则返回-1
    return -1;
    }
    }

    void get_next(string t, int *next){
    int lent = t.length();
    int i = 0;
    int j = -1;
    next[0] = -1;
    while (i < lent-1){//i小于t的长度
    if (j == -1 || t[i] == t[j]){//t[i]表示后缀的单个字符,t[j]表示前缀的单个字符
    next[++i] = ++j;
    }
    else{
    j = next[j]; //若字符不相同,则j值回溯
    }
    }
    }

    结果如下:

    1
    2
    3
    4
    5
    next: -100123112345
    new j:1
    new j:0
    new j:-1
    find ababaaababaa at the index 7 of ababaacababaaababaa
欢迎来到ssy的世界