KMP preprocesses the pattern into an LPS (longest proper prefix which is also suffix) table. On mismatch, it reuses previous match information instead of restarting from scratch, guaranteeing linear O(n + m) time.
vector<int> buildLps(const string& p) {
vector<int> lps(p.size(), 0);
for (int i = 1, len = 0; i < (int)p.size();) {
if (p[i] == p[len]) lps[i++] = ++len;
else if (len) len = lps[len - 1];
else lps[i++] = 0;
}
return lps;
}
vector<int> kmp(string t, string p) {
auto lps = buildLps(p);
vector<int> res;
for (int i = 0, j = 0; i < (int)t.size();) {
if (t[i] == p[j]) { i++; j++; }
if (j == (int)p.size()) { res.push_back(i - j); j = lps[j - 1]; }
else if (i < (int)t.size() && t[i] != p[j]) {
if (j) j = lps[j - 1];
else i++;
}
}
return res;
}LPS tells where to resume in pattern after mismatch.
Text pointer only moves forward, ensuring linear time.
Reliable worst-case performance for large text streams.
Quiz coming soon for this algorithm.