For performance you'll want something like the Boyer-Moore string search algorithm. While it's designed for strings, it should work just as well on byte arrays, and is much more performant than a brute-force solution.
The Wikipedia article provides several implementations, including one in Java and one in C, so creating a C# implementation should be fairly painless.
As it turns out, translating the Wikipedia article's Java implementation of Boyer-Moore to C# (and char
to byte
) was painless indeed. Here it is:
public static class BoyerMoore
{
public static int IndexOf(byte[] haystack, byte[] needle)
{
if (needle.Length == 0)
{
return 0;
}
int[] charTable = MakeCharTable(needle);
int[] offsetTable = MakeOffsetTable(needle);
for (int i = needle.Length - 1; i < haystack.Length;)
{
int j;
for (j = needle.Length - 1; needle[j] == haystack[i]; --i, --j)
{
if (j == 0)
{
return i;
}
}
i += Math.Max(offsetTable[needle.Length - 1 - j], charTable[haystack[i]]);
}
return -1;
}
private static int[] MakeCharTable(byte[] needle)
{
const int ALPHABET_SIZE = 256;
int[] table = new int[ALPHABET_SIZE];
for (int i = 0; i < table.Length; ++i)
{
table[i] = needle.Length;
}
for (int i = 0; i < needle.Length - 1; ++i)
{
table[needle[i]] = needle.Length - 1 - i;
}
return table;
}
private static int[] MakeOffsetTable(byte[] needle)
{
int[] table = new int[needle.Length];
int lastPrefixPosition = needle.Length;
for (int i = needle.Length - 1; i >= 0; --i)
{
if (IsPrefix(needle, i + 1))
{
lastPrefixPosition = i + 1;
}
table[needle.Length - 1 - i] = lastPrefixPosition - i + needle.Length - 1;
}
for (int i = 0; i < needle.Length - 1; ++i)
{
int slen = SuffixLength(needle, i);
table[slen] = needle.Length - 1 - i + slen;
}
return table;
}
private static bool IsPrefix(byte[] needle, int p)
{
for (int i = p, j = 0; i < needle.Length; ++i, ++j)
{
if (needle[i] != needle[j])
{
return false;
}
}
return true;
}
private static int SuffixLength(byte[] needle, int p)
{
int len = 0;
for (int i = p, j = needle.Length - 1; i >= 0 && needle[i] == needle[j]; --i, --j)
{
len += 1;
}
return len;
}
}
The algorithm spends a linear bit of time at the start, building its tables; from then it's blazingly fast.