Using some extension methods on String
, you can create an IComparer<IEnumerable<string>>
for sorting IEnumerable<string>
such that non-numeric strings sort normally, and numeric strings sort by length, then by string value. Then you can split the addresses into words, and sort using the new comparer.
public static class StringExt {
static Regex wordRE = new(@"\w+", RegexOptions.Compiled | RegexOptions.NonBacktracking);
public static IEnumerable<string> Words(this string s) => s.Matches(wordRE);
public static IEnumerable<string> Matches(this string s, Regex re) => re.Matches(s).Cast<Match>().Select(m => m.Value);
public static bool IsAllDigits(this string s) => s.All(ch => Char.IsDigit(ch)); // faster than Regex
}
public class WordsAndNumbersComparer : IComparer<IEnumerable<string>> {
StringComparer cmp;
protected WordsAndNumbersComparer(StringComparer c) => cmp = c;
public WordsAndNumbersComparer Create(CultureInfo ci, bool ignoreCase)
=> new WordsAndNumbersComparer(StringComparer.Create(ci, ignoreCase));
static WordsAndNumbersComparer currentCultureComparer;
public static WordsAndNumbersComparer CurrentCulture
=> currentCultureComparer ??= new WordsAndNumbersComparer(StringComparer.CurrentCulture);
static WordsAndNumbersComparer currentCultureIgnoreCaseComparer;
public static WordsAndNumbersComparer CurrentCultureIgnoreCase
=> currentCultureIgnoreCaseComparer ??= new WordsAndNumbersComparer(StringComparer.CurrentCultureIgnoreCase);
static WordsAndNumbersComparer invariantCultureComparer;
public static WordsAndNumbersComparer InvariantCulture
=> invariantCultureComparer ??= new WordsAndNumbersComparer(StringComparer.InvariantCulture);
static WordsAndNumbersComparer invariantCultureIgnoreCaseComparer;
public static WordsAndNumbersComparer InvariantCultureIgnoreCase
=> invariantCultureIgnoreCaseComparer ??= new WordsAndNumbersComparer(StringComparer.InvariantCultureIgnoreCase);
static WordsAndNumbersComparer ordinalComparer;
public static WordsAndNumbersComparer Ordinal
=> ordinalComparer ??= new WordsAndNumbersComparer(StringComparer.Ordinal);
static WordsAndNumbersComparer ordinalIgnoreCaseComparer;
public static WordsAndNumbersComparer OrdinalIgnoreCase
=> ordinalIgnoreCaseComparer ??= new WordsAndNumbersComparer(StringComparer.OrdinalIgnoreCase);
public int Compare(IEnumerable<string> firstWords, IEnumerable<string> secondWords) {
return firstWords.Zip(secondWords)
.Select(fst => {
if (fst.First.IsAllDigits() && fst.Second.IsAllDigits()) {
var lenCmp = fst.First.Length.CompareTo(fst.Second.Length);
if (lenCmp != 0)
return lenCmp;
}
return cmp.Compare(fst.First, fst.Second);
})
.FirstOrDefault(c => c != 0, 0);
}
}
With these defined, you can use them like so:
var ans = src.OrderBy(s => s.Words(), WordsAndNumbersComparer.CurrentCultureIgnoreCase);
which sorts your example street addresses as expected.
NOTE: Using the .Net 7 Regex
enhancements around ReadOnlySpan<char>
, you could probably write a version of WordsAndNumbersComparer
that takes a string
and uses the enhancements internally to reduce object (string) creation for the comparison. But because OrderBy
caches keys, pulling the invariant key computation out may be more efficient than constantly slicing the address when sorting.