This is the best you can get. It's not a maintainable solution, it is not a "fast" solution. (GetDomain.GetDomainFromUrl
should be optimized).
- Use GetDomain.GetDomainFromUrl
- In
TldPatterns.EXACT
add "co.uk"
(I don't know why it doesn't exist in the first place)
- Some other minor string manipulations
This what it should look like:
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
class TldPatterns
{
private TldPatterns()
{
// Prevent instantiation.
}
/**
* If a hostname is contained in this set, it is a TLD.
*/
static public string[] EXACT = new string[] {
"gov.uk",
"mil.uk",
"co.uk",
//...
public class Program
{
static void Main(string[] args)
{
string[] urls = new[] {"www.google.com", "http://www.google.co.uk/path1/path2 ", "http://google.co.uk/path1/path2 ",
"http://google.com", "http://google.co.in"};
foreach (var item in urls)
{
string url = item;
if (!Regex.IsMatch(item, "^\\w+://"))
url = "http://" + item;
var domain = GetDomain.GetDomainFromUrl(url);
Console.WriteLine("Original : " + item);
Console.WriteLine("URL : " + url);
Console.WriteLine("Domain : " + domain);
Console.WriteLine("Domain Part : " + domain.Substring(0, domain.IndexOf('.')));
Console.WriteLine();
}
}
}
Outputs:
Original : www.google.com
URL : http://www.google.com
Domain : google.com
Domain Part : google
Original : http://www.google.co.uk/path1/path2
URL : http://www.google.co.uk/path1/path2
Domain : google.co.uk
Domain Part : google
Original : http://google.co.uk/path1/path2
URL : http://google.co.uk/path1/path2
Domain : google.co.uk
Domain Part : google
Original : http://google.com
URL : http://google.com
Domain : google.com
Domain Part : google
Original : http://google.co.in
URL : http://google.co.in
Domain : google.co.in
Domain Part : google