Are you looking for something like longest common substring?
There are fast, but quite complicated algorithms which solve the task by building and using suffix trees. They have O(n)
time for constant size alphabet, and O(n log(n))
time in the worst case, where n
is the maximal length of strings.
Below is a possible C# implementation (from http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_substring). It is not optimal, but could be sufficient in our case.
public int LongestCommonSubstring(string str1, string str2, out string sequence)
{
sequence = string.Empty;
if (String.IsNullOrEmpty(str1) || String.IsNullOrEmpty(str2))
return 0;
int[,] num = new int[str1.Length, str2.Length];
int maxlen = 0;
int lastSubsBegin = 0;
StringBuilder sequenceBuilder = new StringBuilder();
for (int i = 0; i < str1.Length; i++)
{
for (int j = 0; j < str2.Length; j++)
{
if (str1[i] != str2[j])
num[i, j] = 0;
else
{
if ((i == 0) || (j == 0))
num[i, j] = 1;
else
num[i, j] = 1 + num[i - 1, j - 1];
if (num[i, j] > maxlen)
{
maxlen = num[i, j];
int thisSubsBegin = i - num[i, j] + 1;
if (lastSubsBegin == thisSubsBegin)
{//if the current LCS is the same as the last time this block ran
sequenceBuilder.Append(str1[i]);
}
else //this block resets the string builder if a different LCS is found
{
lastSubsBegin = thisSubsBegin;
sequenceBuilder.Length = 0; //clear it
sequenceBuilder.Append(str1.Substring(lastSubsBegin, (i + 1) - lastSubsBegin));
}
}
}
}
}
sequence = sequenceBuilder.ToString();
return maxlen;
}