0

I have this Sunday Algorithm, which checks some occurrences in some text.

class Program
{
    static int alphabet = 512;
    static int[] table = new int[alphabet];

    static int[] occurence(string pattern)
    {
        for(char a = (char)0; a<(char)alphabet;a++)
        {
            table[(int)a] = -1;
        }

        for(int i = 0; i< pattern.Length;i++)
        {
            char a = pattern[i];
            table[(int)a] = i;
        }
        return table;
    }

    public static int Sunday(string text, string pattern)
    {
        Stopwatch timer = new Stopwatch();
        timer.Start();
        int k = 0;
        int i = 0;
        int[] table = new int[pattern.Length];
        table = occurence(pattern);
        while(i <= text.Length - pattern.Length)
        {
            int j = 0;
            while(j<pattern.Length && text[i+j] == pattern[j])
            {
                j++;
            }
            if(j==pattern.Length)
            {
                k++;
            }
            i += pattern.Length;
            if(i<text.Length)
            {
                i -= table[(int)text[i]];
            }
        }
        timer.Stop();
        Console.WriteLine(timer.Elapsed);
        return k;
    }

    static void Main(string[] args)
    {
        string text = File.ReadAllText(@"C:\Users\Bacho\Desktop\Studies\Advanced Algorithms\Test Patterns\Book1.txt");
        string pattern = "frankenstein";
        Console.WriteLine(Sunday(text, pattern));
    }
}

This is my algorithm, which works well on small input of text. In the code, I try to read a text file which consists of roughly 80 000 words and 450 000 characters. I get this exception:

Unhandled Exception: System.IndexOutOfRangeException: Index was outside the bounds of the array. at Sunday1.Program.Sunday(String text, String pattern) in C:\Users\Bacho\Desktop\Studies\Advanced Algorithms\Sunday1\Sunday1\Program.cs:line 55 at Sunday1.Program.Main(String[] args) in C:\Users\Bacho\Desktop\Studies\Advanced Algorithms\Sunday1\Sunday1\Program.cs:line 68

..at the following line:

i -= table[(int)text[i]];

Is it because it can not fit in string or is it something else?

Changing the algorithm to read and check line by line may be a solution, but is there a way to avoid it?

  • When you post a debugging question that includes an exception, you should also mention the line that causes the exception to occur. Based on the line number, I added that line to your post. Please feel free to correct that if I got the wrong line. – 41686d6564 stands w. Palestine Apr 06 '19 at 23:26

1 Answers1

1

A char has a length of two bytes or 16 bits not 9. So I guess your alphabet should therefore probably be 65336 or simply char.MaxValue instead of 512.

Also there's no need of the static fields. You can make them local to occurence(). And in Sunday, you don't need to initialize table with new int[pattern.Length], you can directly use occurence(pattern).

class Program
{
    static int[] occurence(string pattern)
    {
        int[] table = new int[char.MaxValue + 1];

        for (int a = 0; a < char.MaxValue + 1; a++)
        {
            table[a] = -1;
        }

        for (int i = 0; i < pattern.Length; i++)
        {
            char a = pattern[i];
            table[(int)a] = i;
        }
        return table;
    }

    public static int Sunday(string text, string pattern)
    {
        Stopwatch timer = new Stopwatch();
        timer.Start();
        int k = 0;
        int i = 0;
        int[] table = occurence(pattern);
        while (i <= text.Length - pattern.Length)
        {
            int j = 0;
            while (j < pattern.Length && text[i + j] == pattern[j])
            {
                j++;
            }
            if (j == pattern.Length)
            {
                k++;
            }
            i += pattern.Length;
            if (i < text.Length)
            {
                i -= table[(int)text[i]];
            }
        }
        timer.Stop();
        Console.WriteLine(timer.Elapsed);
        return k;
    }

    static void Main(string[] args)
    {
        string text = File.ReadAllText(@"C:\Users\Bacho\Desktop\Studies\Advanced Algorithms\Test Patterns\Book1.txt");
        string pattern = "frankenstein";
        Console.WriteLine(Sunday(text, pattern));
    }
}
sticky bit
  • 36,626
  • 12
  • 31
  • 42