This question is in reference to the LightOj problem (1255 - Substring Frequency): http://lightoj.com/volume_showproblem.php?problem=1255 [You have to login to view the problem]
Basically, the problem is a substring matching and counting problem.
Here is the KMP code:
#include <iostream>
#include <cstdio>
#include <cmath>
#include <cstdlib>
#include <vector>
#include <string>
#define MOD 1000000007
#define ull unsigned long long int
#define dll int
#define dl long int
#define ul unsigned long int
#define gc getchar_unlocked
#define cn int
using namespace std;
template <class T> void scanNum(T &x)
{
register T c = gc();
x = 0;
int neg = 0;
for(;((c<48 || c>57) && c != '-');c = gc());
if(c=='-') {neg=1;c=gc();}
for(;c>47 && c<58;c = gc()) {x = (x<<1) + (x<<3) + c - 48;}
if(neg) x=-x;
}
inline void scanString(string& str)
{
register char c = 0;
register int i = 0;
while (c < 33)
c = getchar_unlocked();
while (c != '\n')
{
str =str + c;
c = getchar_unlocked();
}
str = str + '\0';
}
class KMP
{
string txt, pat;
dll M,N,c;
dll *lps;
public:
KMP(string t,string p)
{
txt=t;
pat=p;
N=txt.length()-1;
M=pat.length()-1;
c=0;
}
void preprocess()
{
dll len=0,i=1;
lps=(dll *)malloc(M*sizeof(dll));
lps[0]=0;
while(i<M)
{
if(pat[i]==pat[len])
{
lps[i++]=++len;
}
else
{
if(len!=0)
{
len=lps[len-1];
}
else
{
lps[i++]=0;
}
}
}
}
dll KMPalgo()
{
preprocess();
dll j=0,i=0;
while(i<N)
{
if(pat[j]==txt[i])
{
i++;
j++;
}
if(j==M)
{
c++;
j=lps[j-1];
}
else if((i<N) && (pat[j]!=txt[i]))
{
if(j!=0)
j=lps[j-1];
else
i++;
}
}
free(lps);
return c;
}
};
int main()
{
cn t;
scanNum<cn>(t);
for(cn i=1;i<=t;i++)
{
string txt,pat;
scanString(txt);
scanString(pat);
KMP strMatch(txt,pat);
cn v = strMatch.KMPalgo();
printf("Case %d: %d\n",i,v);
//nl;
}
return 0;
}
Here is the Z-algorithm code:
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <string>
#include <algorithm>
#define ull unsigned long long int
#define dll long long int
#define dl long int
#define ul unsigned long int
#define gc getchar_unlocked
#define cn dll
#define nl printf("\n");
using namespace std;
template <class T> void scanNum(T &x)
{
register T c = gc();
x = 0;
int neg = 0;
for(;((c<48 || c>57) && c != '-');c = gc());
if(c=='-') {neg=1;c=gc();}
for(;c>47 && c<58;c = gc()) {x = (x<<1) + (x<<3) + c - 48;}
if(neg) x=-x;
}
inline void scanString(string& str)
{
register char c = 0;
register int i = 0;
while (c < 33)
c = getchar_unlocked();
while (c != '\n')
{
str =str + c;
c = getchar_unlocked();
}
str = str + '\0';
}
class ZAlgo
{
string txt,pat,fin;
dll *z,patl,txtl,n;
public:
ZAlgo(string txt,string pat)
{
this->txt=txt;
this->pat=pat;
fin=pat+"@"+txt;
patl=pat.length()-1;//cout<<patl<<endl;
txtl=txt.length()-1;//cout<<txtl<<endl;
n=patl+txtl+1;//cout<<fin<<" "<<fin.length()-1<<endl;
}
dll run()
{
dll c=0;
z=(dll *)malloc((n+1)*sizeof(dll));
dll r=0,l=0;
z[0]=0;
for(dll k=1;k<n+1;k++)
{
if(k>r)
{
l=r=k;
while((r<n+1)&&fin[r]==fin[r-l])
{
r++;
}
z[k]=(r--)-l;
}
else
{
dll k1=k-l;
if(z[k1]<r-k+1)
{
z[k]=z[k1];
}
else
{
l=k;
while((r<n+1)&&fin[r]==fin[r-l])
{
r++;
}
z[k]=(r--)-l;
}
}
if(z[k]==patl)
c++;
}
return c;
}
};
int main()
{
dll t;
scanNum<dll>(t);
for (dll i=1;i<=t;i++)
{
string txt;
string pat;
scanString(txt);
scanString(pat);
ZAlgo o(txt,pat);
printf("Case %lld: %lld\n",i,o.run());
}
return 0;
}
Both the solutions are giving TLE in the website.
Aho-Corsick algo can be used when there is a dictionary of words. Boyer-Moore has complexity of O(mn) in worst case i.e., case of something like txt=aaaaaaaaa and pat=aa which is present in the test cases of the problem.
Is there a better algorithm which can solve the problem? I couldn't find any proper solution, so I had to post it here.