0

This question is in reference to the LightOj problem (1255 - Substring Frequency): http://lightoj.com/volume_showproblem.php?problem=1255 [You have to login to view the problem]

Basically, the problem is a substring matching and counting problem.

Here is the KMP code:

#include <iostream>
#include <cstdio>
#include <cmath>
#include <cstdlib>
#include <vector>
#include <string>
#define MOD 1000000007
#define ull unsigned long long int
#define dll int
#define dl long int
#define ul unsigned long int
#define gc getchar_unlocked
#define cn int
using namespace std;
template <class T> void scanNum(T &x)
{
    register T c = gc();
    x = 0;
    int neg = 0;
    for(;((c<48 || c>57) && c != '-');c = gc());
    if(c=='-') {neg=1;c=gc();}
    for(;c>47 && c<58;c = gc()) {x = (x<<1) + (x<<3) + c - 48;}
    if(neg) x=-x;
}

inline void scanString(string& str)
{
    register char c = 0;
    register int i = 0;
    while (c < 33)
    c = getchar_unlocked();
    while (c != '\n')
    {
        str =str + c;
        c = getchar_unlocked();
    }
    str = str + '\0';
}

class KMP
{
string txt, pat;
dll M,N,c;
dll *lps;
public:
KMP(string t,string p)
{
    txt=t;
    pat=p;
    N=txt.length()-1;
    M=pat.length()-1;
    c=0;
}
void preprocess()
{

    dll len=0,i=1;
    lps=(dll *)malloc(M*sizeof(dll));
    lps[0]=0;

    while(i<M)
    {
        if(pat[i]==pat[len])
        {
            lps[i++]=++len; 
        }
        else
        {
            if(len!=0)
            {
                len=lps[len-1];
            }
            else
            {
                lps[i++]=0;
            }
        }
    }
}

dll KMPalgo()
{
    preprocess();
    dll j=0,i=0;
    while(i<N)
    {
        if(pat[j]==txt[i])
        {
            i++;
            j++;
        }
        if(j==M)
        {
            c++;
            j=lps[j-1];
        }
        else if((i<N) && (pat[j]!=txt[i]))
        {
            if(j!=0)
                j=lps[j-1];
            else
                i++;
        }
    }
    free(lps);
    return c;
}
};


int main()
{
cn t;
scanNum<cn>(t);
for(cn i=1;i<=t;i++)
{
    string txt,pat;
    scanString(txt);
    scanString(pat);
    KMP strMatch(txt,pat);
    cn v = strMatch.KMPalgo();
    printf("Case %d: %d\n",i,v);
        //nl;
}
return 0;
}

Here is the Z-algorithm code:

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <string>
#include <algorithm>
#define ull unsigned long long int
#define dll long long int
#define dl long int
#define ul unsigned long int
#define gc getchar_unlocked
#define cn dll
#define nl printf("\n");
using namespace std;

template <class T> void scanNum(T &x)
{
register T c = gc();
x = 0;
int neg = 0;
for(;((c<48 || c>57) && c != '-');c = gc());
if(c=='-') {neg=1;c=gc();}
for(;c>47 && c<58;c = gc()) {x = (x<<1) + (x<<3) + c - 48;}
if(neg) x=-x;
}
inline void scanString(string& str)
{
register char c = 0;
register int i = 0;
while (c < 33)
c = getchar_unlocked();
while (c != '\n')
{
    str =str + c;
    c = getchar_unlocked();
}
str = str + '\0';
}
class ZAlgo
{
string txt,pat,fin;
dll *z,patl,txtl,n;
public:
ZAlgo(string txt,string pat)
{
    this->txt=txt;
    this->pat=pat;
    fin=pat+"@"+txt;
    patl=pat.length()-1;//cout<<patl<<endl;
    txtl=txt.length()-1;//cout<<txtl<<endl;
    n=patl+txtl+1;//cout<<fin<<" "<<fin.length()-1<<endl;
}
dll run()
{
    dll c=0;
    z=(dll *)malloc((n+1)*sizeof(dll));
    dll r=0,l=0;
    z[0]=0;
    for(dll k=1;k<n+1;k++)
    {
        if(k>r)
        {
            l=r=k;
            while((r<n+1)&&fin[r]==fin[r-l])
            {
                r++;
            }
            z[k]=(r--)-l;
        }
        else
        {
            dll k1=k-l;
            if(z[k1]<r-k+1)
            {
                z[k]=z[k1];
            }
            else
            {
                l=k;
                while((r<n+1)&&fin[r]==fin[r-l])
                {
                    r++;
                }
                z[k]=(r--)-l;
            }
        }
        if(z[k]==patl)
            c++;
    }
    return c;
}
};
int main()
{
dll t;
scanNum<dll>(t);
for (dll i=1;i<=t;i++)
{
    string txt;
    string pat;
    scanString(txt);
    scanString(pat);
    ZAlgo o(txt,pat);
    printf("Case %lld: %lld\n",i,o.run());

}
return 0;
}

Both the solutions are giving TLE in the website.

Aho-Corsick algo can be used when there is a dictionary of words. Boyer-Moore has complexity of O(mn) in worst case i.e., case of something like txt=aaaaaaaaa and pat=aa which is present in the test cases of the problem.

Is there a better algorithm which can solve the problem? I couldn't find any proper solution, so I had to post it here.

Tarun Maganti
  • 3,076
  • 2
  • 35
  • 64
  • 1
    why do you use `register` ? – 463035818_is_not_an_ai Jun 17 '16 at 20:45
  • So that CPU can get access to the variable faster and process is faster. I was desperate on submitting the question, I was trying to make the input faster in anyway I can.As far as I know, O(M+N), respective lengths of text and pattern, is the fastest way because it the minimum number of times to evaluate every character. The scanString and scanNum are small functions and last for very less time hence using register, I though was justified. – Tarun Maganti Jun 17 '16 at 20:57
  • see [here](http://stackoverflow.com/questions/3207018/register-keyword-in-c) – 463035818_is_not_an_ai Jun 17 '16 at 20:57
  • http://programmers.stackexchange.com/questions/183725/which-string-search-algorithm-is-actually-the-fastest – Arunmu Jun 17 '16 at 20:59
  • Is it any better to use a static array instead of calling malloc and free for each new test case? – Peter de Rivaz Jun 17 '16 at 21:10
  • Well, since we're talking C++ here, just about anything is better than using `malloc` and `free`. – user4581301 Jun 17 '16 at 21:31
  • @tobi303 on `register`, back when KMP showed up, shortly after the invention of fire, `register` made a lot of sense in a C implementation. Now, not so much, but it's harmless and why change code that works? Edit, mind you that leads to the question, "Does OP's code work?" Dunno. Looking at that code gives me a headache, so I can't be bothered to find out. – user4581301 Jun 17 '16 at 21:44

0 Answers0