后缀自动机(SAM)是一种用于处理字符串的高效数据结构,时间复杂度为$O(|S| * CHARSET_SIZE)$。其应用一般与子串有关,比如求解最长公共子串、求解不同子串的个数、求字典序第k小的子串。
题目传送门
SPOJ LCS Longest Common Substring
题意
给两个字符串,求它们的最长公共子串。
分析
由于字符串长度上限250000,所以$O(n^2)$的算法是行不通的。
我们可以考虑给其中一个字符串建立后缀自动机,该自动机保存了这个字符串的所有子串。接着,我们在这个自动机上跑另一个字符串,假如匹配,就走下一步,假如不匹配,就走失配指针,直到匹配或者回到了根节点。在这个过程中,维护一下匹配的长度,不断取max即可。
代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
| #include<bits/stdc++.h> #define x first #define y second #define ok cout << "ok" << endl; #define okd(d) cout << "ok " << d << endl; using namespace std; typedef long long ll; typedef unsigned long long ull; typedef vector<int> vi; typedef pair<int, int> pii; typedef pair<ll, ll> pll; const long double PI = acos(-1.0); const int INF = 0x3f3f3f3f; const ll LINF = 9223372036854775807; const double Eps = 1e-7; const int N = 250009;
const int CHARSET_SIZE = 26; const int MAXN = N << 1;
struct SuffixAutomaton { int ch[MAXN][CHARSET_SIZE], len[MAXN], fail[MAXN], sz, last, cntPos[MAXN]; int tong[MAXN], topo[MAXN]; void init() { len[0] = 0; sz = 1; last = newnode(0); } int newnode(int le) { len[sz] = le; fail[sz] = 0; for(int i = 0; i < 26; i++) ch[sz][i] = 0; return sz++; } void insert(int c) { c -= 'a'; int v = last, u = newnode(len[v] + 1); last = u; cntPos[u] = 1; for(; v && !ch[v][c]; v = fail[v]) ch[v][c] = u; if(!v){fail[u] = 1; return;} int o = ch[v][c]; if(len[v] + 1 == len[o]) fail[u] = o; else { int n = newnode(len[v] + 1); cntPos[n] = 0; memcpy(ch[n], ch[o], sizeof(ch[0])); fail[n] = fail[o]; fail[u] = fail[o] = n; for(; ch[v][c] == o; v = fail[v]) ch[v][c] = n; } } void topoSort() { memset(tong, 0, sizeof tong); tong[0] = 1; for(int i = 1; i < sz; i++) tong[len[i]]++; for(int i = 1; i < MAXN; i++) tong[i] += tong[i - 1]; for(int i = sz - 1; i >= 1; i--) topo[--tong[len[i]]] = i; }
void solve(char *s) { int ans = 0, now = 1, le = strlen(s), cnt = 0; for(int i = 0; i < le; i++) { int to = s[i] - 'a'; if(ch[now][to]) { ans = max(ans, ++cnt); now = ch[now][to]; } else { while(now && !ch[now][to]) now = fail[now]; if(now) { cnt = len[now] + 1; now = ch[now][to]; ans = max(ans, cnt); } else { now = 1; cnt = 0; } } } printf("%d\n", ans); } }sam;
char s[N];
int main(void) { if(fopen("in", "r")!=NULL) {freopen("in", "r", stdin); freopen("out", "w", stdout);} sam.init(); scanf("%s", s); int len = strlen(s); for(int i = 0; i < len; i++) sam.insert(s[i]); scanf("%s", s); sam.solve(s);
return 0; }
|
SPOJ LCS2 Longest Common Substring II
题意
给n($2 <= n <= 10$)个字符串,求它们的最长公共子串。
分析
这一题在上一题的基础上进行了一定的拓展。我们还是给其中一个字符串建立后缀自动机,然后将其余字符串在自动机上跑一遍。在每个节点上,我们需要维护一个$f$,表示所有字符串到达该结点时的最大匹配长度,初始化为第一个字符串的在该点的$len$。当其余字符串进来匹配的时候,用当前最大匹配长度来更新这个$f$,不断取min。这里需要注意的是,某个结点$u$被访问到,但其后缀链指向的结点$v$可能没被访问到,但是,根据SAM后缀树的性质,$v$结点表示的最大子串是会被完全匹配的。所以,我们将字符串在自动机跑完一遍后,还需要根据拓扑序再反向更新一下。
代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
| #include<bits/stdc++.h> #define x first #define y second #define ok cout << "ok" << endl; #define okd(d) cout << "ok " << d << endl; using namespace std; typedef long long ll; typedef unsigned long long ull; typedef vector<int> vi; typedef pair<int, int> pii; typedef pair<ll, ll> pll; const long double PI = acos(-1.0); const int INF = 0x3f3f3f3f; const ll LINF = 9223372036854775807; const double Eps = 1e-7; const int N = 1e5+9;
const int CHARSET_SIZE = 26; const int MAXN = N << 1;
struct SuffixAutomaton { int ch[MAXN][CHARSET_SIZE], len[MAXN], fail[MAXN], sz, last, cntPos[MAXN], mat[MAXN], f[MAXN]; int tong[MAXN], topo[MAXN]; void init() { len[0] = 0; sz = 1; last = newnode(0); } int newnode(int le) { len[sz] = le; fail[sz] = 0; for(int i = 0; i < 26; i++) ch[sz][i] = 0; return sz++; } void insert(int c) { c -= 'a'; int v = last, u = newnode(len[v] + 1); last = u; cntPos[u] = 1; for(; v && !ch[v][c]; v = fail[v]) ch[v][c] = u; if(!v){fail[u] = 1; return;} int o = ch[v][c]; if(len[v] + 1 == len[o]) fail[u] = o; else { int n = newnode(len[v] + 1); cntPos[n] = 0; memcpy(ch[n], ch[o], sizeof(ch[0])); fail[n] = fail[o]; fail[u] = fail[o] = n; for(; ch[v][c] == o; v = fail[v]) ch[v][c] = n; } } void topoSort() { memset(tong, 0, sizeof tong); tong[0] = 1; for(int i = 1; i < sz; i++) tong[len[i]]++; for(int i = 1; i < MAXN; i++) tong[i] += tong[i - 1]; for(int i = sz - 1; i >= 1; i--) topo[--tong[len[i]]] = i; } void pre() { for(int i = 1; i < sz; i++) mat[i] = INF; } void update(char *s) { int le = strlen(s), lenn = 0, now = 1; for(int i = 1; i < sz; i++) { f[i] = 0; } for(int i = 0; i < le; i++) { int ne = s[i] - 'a'; while(now != 1 && !ch[now][ne]) now = fail[now], lenn = len[now]; if(ch[now][ne]) { now = ch[now][ne]; f[now] = max(f[now], ++lenn); } } for(int i = sz - 1; i >= 1; i--) { if(f[topo[i]]) f[fail[topo[i]]] = len[fail[topo[i]]]; } for(int i = 1; i < sz; i++) { mat[i] = min(mat[i], f[i]); } } void solve() { int ans = 0; for(int i = 1; i < sz; i++) ans = max(ans, mat[i]); printf("%d\n", ans); } }sam;
char s[N]; int len;
int main(void) {
if(fopen("in", "r")!=NULL) {freopen("in", "r", stdin); freopen("out", "w", stdout);} scanf("%s", s); sam.init(); len = strlen(s); for(int i = 0; i < len; i++) { sam.insert(s[i]); } sam.topoSort(); sam.pre(); while(~scanf("%s", s)) { sam.update(s); } sam.solve();
return 0; }
|
SPOJ NSUBSTR Substrings
题意
根据子串的长度,可将一个长为$|S|$的字符串的所有子串分到$|S|$个集合中。问在各个集合中,在母串中出现次数最多的子串的出现次数?
分析
要统计某一子串出现的次数,需要按照拓扑序逆着更新。当访问到某一节点时,我们把最大长度子串所属集合的答案更新一下,然后传递一下$cntPos$给失配指针所指的结点。我们知道,一个结点表示了多个子串,为什么只更新最大长度子串所属的集合就行了呢?可以这样理解,假设某一节点的最大长度子串为$S[L…R]$,根据SAM的性质,$S[L..(R-1)]$一定不与S[L…R]$在同一结点,且它出现的次数不少于$S[L…R]$,因此,后续访问到该结点时会更新$(R-1)$的答案,而长度更短的同理,也是会被更新到。
代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
| #include<bits/stdc++.h> #define x first #define y second #define ok cout << "ok" << endl; using namespace std; typedef long long ll; typedef unsigned long long ull; typedef vector<int> vi; typedef pair<int, int> pii; typedef pair<ll, ll> pll; const long double PI = acos(-1.0); const int INF = 0x3f3f3f3f; const ll LINF = 9223372036854775807; const double Eps = 1e-7; const int N = 250009;
char s[N];
const int CHARSET_SIZE = 26; const int MAXN = N << 1;
struct SuffixAutomaton { int ch[MAXN][CHARSET_SIZE], len[MAXN] = {-1}, fail[MAXN], sz = 2, last = 1, cntPos[MAXN]; int tong[MAXN], topo[MAXN]; void init() { memset(ch, 0, sizeof ch); memset(fail, 0, sizeof fail); memset(len, 0, sizeof len); memset(tong, 0, sizeof tong); memset(cntPos, 0, sizeof cntPos); last = 1; sz = 2; tong[0] = 1; } void insert(int c) { c -= 'a'; int v = last, u = last = sz++; cntPos[u] = 1; len[u] = len[v] + 1; for(; v && !ch[v][c]; v = fail[v]) ch[v][c] = u; if(!v){fail[u] = 1; return;} int o = ch[v][c]; if(len[v] + 1 == len[o]) fail[u] = o; else { int n = sz++; len[n] = len[v] + 1; cntPos[n] = 0; memcpy(ch[n], ch[o], sizeof(ch[0])); fail[n] = fail[o]; fail[u] = fail[o] = n; for(; ch[v][c] == o; v = fail[v]) ch[v][c] = n; } } void topoSort() { for(int i = 1; i < sz; i++) tong[len[i]]++; for(int i = 1; i < MAXN; i++) tong[i] += tong[i - 1]; for(int i = sz - 1; i >= 1; i--) topo[--tong[len[i]]] = i; } void solve(char *s) { int ans[MAXN]; int le = strlen(s); memset(ans, 0, sizeof ans); for(int i = sz - 1; i >= 1; i--) { int u = topo[i]; ans[len[u]] = max(ans[len[u]], cntPos[u]); cntPos[fail[u]] += cntPos[u]; } for(int i = 1; i <= le; i++) { printf("%d\n", ans[i]); } } }sam;
int main(void) { if(fopen("in", "r")!=NULL) {freopen("in", "r", stdin); freopen("out", "w", stdout);} while(~scanf("%s", s)) { sam.init(); int len = strlen(s); for(int i = 0; i < len; i++) { sam.insert(s[i]); } sam.topoSort(); } sam.solve(s);
return 0; }
|
SPOJ SUBLEX Lexicographical Substring Search
题意
给一个字符串,求字典序第k小的子串。
分析
首先给字符串建立SAM,该SAM保存该字符串的所有子串。
接着,从根出发,dfs一遍,对各个结点统计从该结点出发的子串数。
然后,我们再从根出发,贪心地走字符较小的边。假如一条边到达的结点的子串数大于等于k,那么这条边就可以走。否则,我们减去该结点的子串数,然后找另一条边。一直做下去,直到k等于0了就停止。
代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
| #include<bits/stdc++.h> #define x first #define y second #define ok cout << "ok" << endl; using namespace std; typedef long long ll; typedef unsigned long long ull; typedef vector<int> vi; typedef pair<int, int> pii; typedef pair<ll, ll> pll; const long double PI = acos(-1.0); const int INF = 0x3f3f3f3f; const ll LINF = 9223372036854775807; const double Eps = 1e-7; const int N = 1e5+9;
const int CHARSET_SIZE = 26; const int MAXN = N << 1;
pii G[MAXN][26];
int ch[MAXN][CHARSET_SIZE], len[MAXN] = {-1}, fail[MAXN], sz = 2, last = 1, cntPos[MAXN]; int tong[MAXN], topo[MAXN]; int path[MAXN], p[MAXN];
struct SuffixAutomaton { void init() { last = 1; sz = 2; tong[0] = 1; } void insert(int c) { c -= 'a'; int v = last, u = last = sz++; cntPos[u] = 1; len[u] = len[v] + 1; for(; v && !ch[v][c]; v = fail[v]) ch[v][c] = u; if(!v){fail[u] = 1; return;} int o = ch[v][c]; if(len[v] + 1 == len[o]) fail[u] = o; else { int n = sz++; len[n] = len[v] + 1; cntPos[n] = 0; memcpy(ch[n], ch[o], sizeof(ch[0])); fail[n] = fail[o]; fail[u] = fail[o] = n; for(; ch[v][c] == o; v = fail[v]) ch[v][c] = n; } } int cntPath(int u) { if(path[u]) return path[u]; p[u] = 0; for(int i = 0; i < 26; i++) { if(ch[u][i]) { path[u] += cntPath(ch[u][i]); G[u][p[u]++] = pii(ch[u][i], i); } } path[u]++; return path[u]; } void findK(int u, int k) { while(k) { for(int i = 0; i < p[u]; i++) { int v = G[u][i].x; if(v && path[v] >= k) { printf("%c", G[u][i].y + 'a'); k--; u = v; break; } else { k -= path[v]; } } } printf("\n"); } }sam;
char ss[N]; int q, lenn, k;
int main(void) { if(fopen("in", "r")!=NULL) {freopen("in", "r", stdin); freopen("out", "w", stdout);} scanf("%s", ss); sam.init(); lenn = strlen(ss); for(int i = 0; i < lenn; i++) sam.insert(ss[i]); scanf("%d", &q); sam.cntPath(1); while(q--) { scanf("%d", &k); sam.findK(1, k); } return 0; }
|
SPOJ COT4 Count on a trie
留坑待填。
HDU 4416 Good Article Good sentence
题意
给一个字符串,可以有很多子串,记为集合A。然后再给一些字符串,同样可以有很多子串,记为集合B。现在需要求集合A和集合B的差的大小,即在A集合里出现但不在B集合里出现的元素个数。
分析
首先给第一个字符串建立SAM,然后依次将其余的字符串取更新这个SAM。
SAM中的各个结点需要保存一个值f,表示被其余字符串匹配到最大长度。对于某一结点u,f初始化为len[fail[u]]。
这里需要注意的是,同多个字符串的最长公共子串问题一样,在匹配完整个字符串后,还需要根据拓扑序逆着更新f一下。
最后,所有节点的len - f的和即为答案。
代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
| #include<bits/stdc++.h> #define x first #define y second #define ok cout << "ok" << endl; using namespace std; typedef long long ll; typedef unsigned long long ull; typedef vector<int> vi; typedef pair<int, int> pii; typedef pair<ll, ll> pll; const long double PI = acos(-1.0); const int INF = 0x3f3f3f3f; const ll LINF = 9223372036854775807; const double Eps = 1e-7; const int N = 1e5+9;
const int CHARSET_SIZE = 26; const int MAXN = N << 1;
struct SuffixAutomaton { int ch[MAXN][CHARSET_SIZE], len[MAXN] = {-1}, fail[MAXN], sz = 2, last = 1, cntPos[MAXN]; int tong[MAXN], topo[MAXN]; int f[MAXN]; bool vis[MAXN]; void init() { len[0] = 0; sz = 1; last = newnode(0); } int newnode(int le) { len[sz] = le; fail[sz] = 0; for(int i = 0; i < 26; i++) ch[sz][i] = 0; return sz++; }
void insert(int c) { c -= 'a'; int v = last, u = newnode(len[v] + 1); last = u; cntPos[u] = 1; for(; v && !ch[v][c]; v = fail[v]) ch[v][c] = u; if(!v){fail[u] = 1; return;} int o = ch[v][c]; if(len[v] + 1 == len[o]) fail[u] = o; else { int n = newnode(len[v] + 1); cntPos[n] = 0; memcpy(ch[n], ch[o], sizeof(ch[0])); fail[n] = fail[o]; fail[u] = fail[o] = n; for(; ch[v][c] == o; v = fail[v]) ch[v][c] = n; } } void topoSort() { memset(tong, 0, sizeof tong); tong[0] = 1; for(int i = 1; i < sz; i++) tong[len[i]]++; for(int i = 1; i < MAXN; i++) tong[i] += tong[i - 1]; for(int i = sz - 1; i >= 1; i--) topo[--tong[len[i]]] = i; } void pre() { memset(vis, 0, sizeof vis); for(int i = 1; i < sz; i++) { f[i] = len[fail[i]]; } } void update(char *s) { int up = strlen(s), now = 1, le = 0; for(int i = 0; i < up; i++) { int v = s[i] - 'a'; while(now != 1 && !ch[now][v]) now = fail[now], le = len[now]; if(ch[now][v]) { now = ch[now][v]; vis[now] = true; le++; f[now] = max(f[now], le); } } } void solve() { for(int i = sz - 1; i >= 1; i--) { int u = topo[i]; if(vis[u]) { vis[fail[u]] = true; f[fail[u]] = len[fail[u]]; } } ll ans = 0; for(int i = 1; i < sz; i++) { ans += len[i] - f[i]; } printf("%lld\n", ans); } }sam;
int T, n, kase; char s[N];
int main(void) { if(fopen("in", "r")!=NULL) {freopen("in", "r", stdin); freopen("out", "w", stdout);} scanf("%d", &T); while(T--) { printf("Case %d: ", ++kase); scanf("%d", &n); scanf("%s", s); sam.init(); int len = strlen(s); for(int i = 0; i < len; i++) { sam.insert(s[i]); } sam.pre(); sam.topoSort(); for(int i = 0; i < n; i++) { scanf("%s", s); sam.update(s); } sam.solve(); }
return 0; }
|
POJ 3415 Common Substrings
题意
给两个字符串,求所有长度大于k的公共子串的对数。
分析
我们知道,SAM中某个节点代表的是长度连续的数个后缀,不妨设长度为$[L, R]$。在匹配的过程中,长度会落在$[L, R]$之间,我们只需要加上$[k, R]$的这段(可能为空)。
另外,还需要按照拓扑序逆着更新一下,这里的更新需要是需要计数的。
代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
| #include<set> #include<map> #include<ctime> #include<cmath> #include<queue> #include<bitset> #include<cctype> #include<cstdio> #include<vector> #include<string> #include<sstream> #include<cstdlib> #include<cstring> #include<cfloat> #include<climits> #include<iostream> #include<algorithm> #define x first #define y second #define ok cout << "ok" << endl; #define okd(d) cout << "ok " << d << endl; using namespace std; typedef long long ll; typedef unsigned long long ull; typedef vector<int> vi; typedef pair<int, int> pii; typedef pair<ll, ll> pll; const long double PI = acos(-1.0); const int INF = 0x3f3f3f3f; const ll LINF = 9223372036854775807; const double Eps = 1e-7; const int N = 1e5+9;
int k; char s[N], t[N];
const int CHARSET_SIZE = 59; const int MAXN = N << 1;
struct SuffixAutomaton { int ch[MAXN][CHARSET_SIZE], len[MAXN], fail[MAXN], sz, last, cntPos[MAXN]; int tong[MAXN], topo[MAXN]; int sum[MAXN], vis[MAXN]; void init() { len[0] = 0; sz = 1; last = newnode(0); } int newnode(int le) { len[sz] = le; fail[sz] = 0; for(int i = 0; i < CHARSET_SIZE; i++) ch[sz][i] = 0; return sz++; } void insert(int c) { c -= 'A'; int v = last, u = newnode(len[v] + 1); last = u; cntPos[u] = 1; for(; v && !ch[v][c]; v = fail[v]) ch[v][c] = u; if(!v){fail[u] = 1; return;} int o = ch[v][c]; if(len[v] + 1 == len[o]) fail[u] = o; else { int n = newnode(len[v] + 1); cntPos[n] = 0; memcpy(ch[n], ch[o], sizeof(ch[0])); fail[n] = fail[o]; fail[u] = fail[o] = n; for(; ch[v][c] == o; v = fail[v]) ch[v][c] = n; } } void topoSort() { memset(tong, 0, sizeof tong); tong[0] = 1; for(int i = 1; i < sz; i++) tong[len[i]]++; for(int i = 1; i < sz; i++) tong[i] += tong[i - 1]; for(int i = sz - 1; i >= 1; i--) topo[--tong[len[i]]] = i; } void solve(char *s) { topoSort(); for(int i = sz - 1; i >= 1; i--) { int u = topo[i]; cntPos[fail[u]] += cntPos[u]; vis[i] = sum[i] = 0; } int now = 1, le = 0; ll ans = 0; for(int i = 0, up = strlen(s); i < up; i++) { int v = s[i] - 'A'; while(now != 1 && !ch[now][v]) now = fail[now], le = len[now]; if(ch[now][v]) { now = ch[now][v]; le++; vis[now]++; if(le >= k) { ans += 1LL * cntPos[now] * (le - max(len[fail[now]] + 1, k) + 1); } } } for(int i = sz - 1; i >= 1; i--) { int u = topo[i]; if(vis[u]) { sum[fail[u]] += vis[u]; vis[fail[u]] += vis[u]; } } for(int i = 1; i < sz; i++) { if(len[i] >= k) { ans += 1LL * sum[i] * cntPos[i] * (len[i] - max(len[fail[i]] + 1, k) + 1); } } printf("%lld\n", ans); } }sam;
int main(void) { if(fopen("in", "r")!=NULL) {freopen("in", "r", stdin); freopen("out", "w", stdout);} while(~scanf("%d", &k) && k) { scanf("%s%s", s, t); sam.init(); for(int i = 0, len = strlen(s); i < len; i++) { sam.insert(s[i]); } sam.solve(t); }
return 0; }
|
HDU 3518 Boring counting
题意
分析
首先给字符串建立SAM,对于各个结点,需要额外维护最长子串首先出现的位置以及最后出现的位置。最后出现的位置需要按照拓扑序逆着来更新。
然后扫一遍,根据某个结点代表的最短字符串、最长字符串与最长子串首先出现、最后出现位置,分情况讨论一下即可。
一开始没有注意到出现次数大于等于2这个条件,多想了一下。假如要求出现次数大于等于k的话,那么我们可能需要维护的是各个节点的最长子串出现的具体位置,这可能需要就需要用到LCT来维护了。
代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
| #include<bits/stdc++.h> #define x first #define y second #define ok cout << "ok" << endl; #define okd(d) cout << "ok " << d << endl; using namespace std; typedef long long ll; typedef unsigned long long ull; typedef vector<int> vi; typedef pair<int, int> pii; typedef pair<ll, ll> pll; const long double PI = acos(-1.0); const int INF = 0x3f3f3f3f; const ll LINF = 9223372036854775807; const double Eps = 1e-7; const int N = 1e5+9;
const int CHARSET_SIZE = 26; const int MAXN = N << 1;
struct SuffixAutomaton { int ch[MAXN][CHARSET_SIZE], len[MAXN], fail[MAXN], sz, last, cntPos[MAXN], l[MAXN], r[MAXN]; int tong[MAXN], topo[MAXN]; void init() { len[0] = 0; sz = 1; last = newnode(0); memset(l, -1, sizeof l); memset(r, -1, sizeof r); } int newnode(int le) { len[sz] = le; fail[sz] = 0; for(int i = 0; i < CHARSET_SIZE; i++) ch[sz][i] = 0; return sz++; } void insert(int c, int po) { c -= 'a'; int v = last, u = newnode(len[v] + 1); last = u; cntPos[u] = 1; l[u] = r[u] = po; for(; v && !ch[v][c]; v = fail[v]) ch[v][c] = u; if(!v){fail[u] = 1; return;} int o = ch[v][c]; if(len[v] + 1 == len[o]) fail[u] = o; else { int n = newnode(len[v] + 1); cntPos[n] = 0; l[n] = r[n] = l[o]; memcpy(ch[n], ch[o], sizeof(ch[0])); fail[n] = fail[o]; fail[u] = fail[o] = n; for(; ch[v][c] == o; v = fail[v]) ch[v][c] = n; } } void topoSort() { memset(tong, 0, sizeof tong); tong[0] = 1; for(int i = 1; i < sz; i++) tong[len[i]]++; for(int i = 1; i < sz; i++) tong[i] += tong[i - 1]; for(int i = sz - 1; i >= 1; i--) topo[--tong[len[i]]] = i; } void solve() { topoSort(); for(int i = sz - 1; i >= 1; i--) { int u = topo[i]; r[fail[u]] = max(r[fail[u]], r[u]); } ll ans = 0; for(int i = 1; i < sz; i++) { if(len[i] <= r[i] - l[i]) { ans += len[i] - len[fail[i]]; } else { ans += max(0, r[i] - l[i] - len[fail[i]]); } } printf("%lld\n", ans); } }sam;
char s[N];
int main(void) { if(fopen("in", "r")!=NULL) {freopen("in", "r", stdin); freopen("out", "w", stdout);} while(~scanf("%s", s) && s[0] != '#') { sam.init(); for(int i = 0, up = strlen(s); i < up; i++) { sam.insert(s[i], i); } sam.solve(); }
return 0; }
|
HDU 4622 Reincarnation
题意
给一个字符串,然后有$Q$次询问,求该字符串$[L, R]$本质不同的字符串个数。
分析
首先建立SAM,然后对于每次询问,分别处理,维护的是最大匹配长度。
然后,扫一遍各个节点,累加最大匹配长度 - 最小字符串 + 1 即可。
代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
| #include<bits/stdc++.h> #define x first #define y second #define ok cout << "ok" << endl; #define okd(d) cout << "ok " << d << endl; using namespace std; typedef long long ll; typedef unsigned long long ull; typedef vector<int> vi; typedef pair<int, int> pii; typedef pair<ll, ll> pll; const long double PI = acos(-1.0); const int INF = 0x3f3f3f3f; const ll LINF = 9223372036854775807; const double Eps = 1e-7; const int N = 2009;
const int CHARSET_SIZE = 26; const int MAXN = N << 1;
struct SuffixAutomaton { int ch[MAXN][CHARSET_SIZE], len[MAXN], fail[MAXN], sz, last, cntPos[MAXN]; int tong[MAXN], topo[MAXN]; void init() { len[0] = 0; sz = 1; last = newnode(0); } int newnode(int le) { len[sz] = le; fail[sz] = 0; for(int i = 0; i < CHARSET_SIZE; i++) ch[sz][i] = 0; return sz++; } void insert(int c) { c -= 'a'; int v = last, u = newnode(len[v] + 1); last = u; cntPos[u] = 1; for(; v && !ch[v][c]; v = fail[v]) ch[v][c] = u; if(!v){fail[u] = 1; return;} int o = ch[v][c]; if(len[v] + 1 == len[o]) fail[u] = o; else { int n = newnode(len[v] + 1); cntPos[n] = 0; memcpy(ch[n], ch[o], sizeof(ch[0])); fail[n] = fail[o]; fail[u] = fail[o] = n; for(; ch[v][c] == o; v = fail[v]) ch[v][c] = n; } } void topoSort() { memset(tong, 0, sizeof tong); tong[0] = 1; for(int i = 1; i < sz; i++) tong[len[i]]++; for(int i = 1; i < sz; i++) tong[i] += tong[i - 1]; for(int i = sz - 1; i >= 1; i--) topo[--tong[len[i]]] = i; } void solve(char *s, int l, int r) { int now = 1, le = 0, f[MAXN]; ll ans = 0; memset(f, 0, sizeof f); for(int i = l; i <= r; i++) { int v = s[i] - 'a'; now = ch[now][v]; le++; f[now] = max(f[now], le); } for(int i = sz - 1; i >= 1; i--) { int u = topo[i]; if(f[u]) { f[fail[u]] = len[fail[u]]; } } for(int i = sz - 1; i >= 1; i--) { ans += max(0, f[i] - len[fail[i]]); } printf("%lld\n", ans); } }sam;
int n, T, l, r; char s[N];
int main(void) { if(fopen("in", "r")!=NULL) {freopen("in", "r", stdin); freopen("out", "w", stdout);} scanf("%d", &T); while(T--) { scanf("%s", s); sam.init(); for(int i = 0, len = strlen(s); i < len; i++) { sam.insert(s[i]); } sam.topoSort(); scanf("%d", &n); while(n--) { scanf("%d%d", &l, &r); sam.solve(s, l - 1, r - 1); } }
return 0; }
|
HDU 4436 str2int
留坑待填。