1
0
Fork 0
cp-templates/string/sa.cc

60 lines
1.5 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <algorithm>
#include <cstdio>
#include <cstring>
#include <iostream>
using namespace std;
const int N = 1000010;
char s[N];
// key1[i] = rk[id[i]](作为基数排序的第一关键字数组)
int n, sa[N], rk[N], oldrk[N << 1], id[N], key1[N], cnt[N], height[N];
bool cmp(int x, int y, int w) {
return oldrk[x] == oldrk[y] && oldrk[x + w] == oldrk[y + w];
}
int main() {
int i, m = 127, p, w;
scanf("%s", s + 1);
n = strlen(s + 1);
// calc sa[]
for (i = 1; i <= n; ++i) ++cnt[rk[i] = s[i]];
for (i = 1; i <= m; ++i) cnt[i] += cnt[i - 1];
for (i = n; i >= 1; --i) sa[cnt[rk[i]]--] = i;
for (w = 1;; w <<= 1, m = p) { // m=p 就是优化计数排序值域
for (p = 0, i = n; i > n - w; --i) id[++p] = i;
for (i = 1; i <= n; ++i)
if (sa[i] > w) id[++p] = sa[i] - w;
memset(cnt, 0, sizeof(cnt));
for (i = 1; i <= n; ++i) ++cnt[key1[i] = rk[id[i]]];
// 注意这里px[i] != i因为rk没有更新是上一轮的排名数组
for (i = 1; i <= m; ++i) cnt[i] += cnt[i - 1];
for (i = n; i >= 1; --i) sa[cnt[key1[i]]--] = id[i];
memcpy(oldrk + 1, rk + 1, n * sizeof(int));
for (p = 0, i = 1; i <= n; ++i)
rk[sa[i]] = cmp(sa[i], sa[i - 1], w) ? p : ++p;
if (p == n) {
break;
}
}
// calc height[]
for (i = 1, k = 0; i <= n; ++i) {
if (rk[i] == 0) continue;
if (k) --k;
while (s[i + k] == s[sa[rk[i] - 1] + k]) ++k;
height[rk[i]] = k;
}
for (i = 1; i <= n; ++i) printf("%d ", sa[i]);
return 0;
}