I would suggest you create an IEqualityComparer to encapsulate the logic to determine if two strings are equal.
An example if you wanted to mix and match SoundEx and Levenshtein might be something like
public class CompanyNameComparer : IEqualityComparer<string>
{
public bool Equals(string x, string y)
{
if (x == null && y == null)
{
return true;
}
if (x == null || y == null)
{
return false;
}
var src1 = FormatString(x);
var src2 = FormatString(y);
if (src1 == src2)
{
return true;
}
var difference = CalcLevenshteinDistance(src1, src2);
// arbitrary number you will need to find what works
return difference < 7;
}
private string FormatString(string source)
{
return source.Trim().ToUpper();
}
// code taken from http://stackoverflow.com/a/9453762/1798889
private int CalcLevenshteinDistance(string a, string b)
{
// code not included
}
public int GetHashCode(string obj)
{
return Soundex(obj).GetHashCode();
}
private string Soundex(string data)
{
// code not included
}
}
I didn't include all the code because that's not the main point. Only you will know if SoundEx and Levenshtein will work or if it needs to be something else. But if you put that decision making in it's own class if it needs to be tweaked it's just one place that needs to be changed.
Then you can get a unique list either with Linq or a HashSet. Assuming data is the name of your variable of a List
var uniqueEnumerable = data.Distinct(new CompanyNameComparer());
var uniqueSet = new HashSet<string>(data, new CompanyNameComparer());