2

Possible Duplicate:
byte[] array pattern search

Hello,

Whats the best way to search if a byte[] is in another byte[].

For example

byte[] first = new byte[] { 0x01, 0x02, 0x03, 0x04, 0x05 };
byte[] second = new byte[] { 0x01, 0x02 };
byte[] third = new byte[] { 0x01, 0x03 };

The method would return:

first.Contains(second); // true
first.Contains(third); // false
second.Contains(third); // false

Thanks!

Community
  • 1
  • 1
Mike
  • 21
  • 1
  • 2

1 Answers1

3

you could use Jb's Locate method and early out on the first match

Eg:

static class ByteArrayRocks
{

    public static bool Contains(this byte[] self, byte[] candidate)
    {
        if (IsEmptyLocate(self, candidate))
            return false;

        for (int i = 0; i < self.Length; i++)
        {
            if (IsMatch(self, i, candidate))
                return true;
        }

        return false;
    }

    static bool IsMatch(byte[] array, int position, byte[] candidate)
    {
        if (candidate.Length > (array.Length - position))
            return false;

        for (int i = 0; i < candidate.Length; i++)
            if (array[position + i] != candidate[i])
                return false;

        return true;
    }

    static bool IsEmptyLocate(byte[] array, byte[] candidate)
    {
        return array == null
                || candidate == null
                || array.Length == 0
                || candidate.Length == 0
                || candidate.Length > array.Length;
    }
}

class Program
{
    static void Main()
    {
        var data = new byte[] { 23, 36, 43, 76, 125, 56, 34, 234, 12, 3, 5, 76, 8, 0, 6, 125, 234, 56, 211, 122, 22, 4, 7, 89, 76, 64, 12, 3, 5, 76, 8, 0, 6, 125 };
        var pattern = new byte[] { 12, 3, 5, 76, 8, 0, 6, 125,11 };

        Console.WriteLine(data.Contains(pattern));


        Console.ReadKey();
    }
}

This would be much less efficient than Boyer-Moore for certain arrays, as it is able to skip through the array much faster if there is a mismatch. There are C# implementation of this amongst many others algorithms here.

This is an adaptation using it and the wikipedia implementation of horspool.

static class Horspool
{
    private static int[] BuildBadSkipArray(byte[] needle)
    {
        const int MAX_SIZE = 256;

        int[] skip = new int[MAX_SIZE];
        var needleLength = needle.Length;

        for (int c = 0; c < MAX_SIZE; c += 1)
        {
            skip[c] = needleLength;
        }

        var last = needleLength - 1;

        for (int scan = 0; scan < last; scan++)
        {
            skip[needle[scan]] = last - scan;
        }

        return skip;
    }

    public static bool ContainsHorspool(this byte[] haystack, byte[] needle)
    {
        var hlen = haystack.Length;
        var nlen = needle.Length;
        var badCharSkip = BuildBadSkipArray(needle);
        var last = nlen - 1;

        int offset = 0;
        int scan = nlen;

        while (offset + last < hlen)
        {

            for (scan = last; haystack[scan + offset] == needle[scan]; scan = scan - 1)
            {
                if (scan == 0)
                {
                    return true;
                }
            }

            offset += badCharSkip[haystack[scan + offset]];

        }

        return false;
    }
}
Community
  • 1
  • 1
Sam Saffron
  • 128,308
  • 78
  • 326
  • 506
  • 1
    +1 for suggesting Boyer-Moore. Any string search algorithm should be applicable here. – Ben Voigt Feb 21 '11 at 06:02
  • Second listing above wasn't working for me and turned out to be a typo. `offset+= line` should have read `offset += badCharSkip[haystack[last + offset]];` – andrewf Jan 26 '12 at 18:28