1

How to get the root domain name from a given URL in javascript without the extension?

Example:

rootDomain("https://www.etsy.com/market/tumblr_clothing") -> etsy

rootDomain("https://petitions.whitehouse.gov/") -> whitehouse

rootDomain("facebook.com") -> facebook

rootDomain("google.ca") -> google

I was able to find a incomplete solution: How to get domain name only using javascript? and Extract hostname name from string

The main problem is that I'm having trouble creating a solution without the extension or subdomain.

How would be the best way to tackle this?

Community
  • 1
  • 1
Rohit Tigga
  • 2,373
  • 9
  • 43
  • 81
  • @AndrewLi I've tried http://stackoverflow.com/questions/8253136/how-to-get-domain-name-only-using-javascript ;however, my example cases are failing. I guess I have to get better with regex. – Rohit Tigga Jan 12 '17 at 03:22
  • @AndrewLi Is regex able to do this? I believe you can't do this with a regular expression because you don't know how many blocks are in the suffix. – Rohit Tigga Jan 15 '17 at 00:29

2 Answers2

3

Try:

function extractHostname(url) {
    var hostname;
    if (url.indexOf("//") > -1) {
        hostname = url.split('/')[2];
    }
    else {
        hostname = url.split('/')[0];
    }
    hostname = hostname.split(':')[0];
    hostname = hostname.split('?')[0];
    return hostname;
}

function extractRootDomainNoExt(url) {
    var domain = extractHostname(url),
        splitArr = domain.split('.'),
        arrLen = splitArr.length;

    if (arrLen == 2) {
      domain = splitArr[0]
    }
    else if (arrLen > 2) {
        domain = splitArr[arrLen - 2];
        //check to see if it's using a Country Code Top Level Domain (ccTLD) (i.e. ".me.uk")
        if (splitArr[arrLen - 2].length == 2 && splitArr[arrLen - 1].length == 2) {
            domain = splitArr[arrLen - 3];
        }
    }
    return domain;
}

//test extractRootDomainNoExt
console.log("== Testing extractRootDomainNoExt: ==");
console.log(extractRootDomainNoExt("https://www.etsy.com/market/tumblr_clothing"));
console.log(extractRootDomainNoExt("https://petitions.whitehouse.gov/"));
console.log(extractRootDomainNoExt("facebook.com"));
console.log(extractRootDomainNoExt("google.ca"));
console.log(extractRootDomainNoExt("http://www.blog.classroom.me.uk/index.php"));
console.log(extractRootDomainNoExt("http://www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractRootDomainNoExt("https://www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractRootDomainNoExt("www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractRootDomainNoExt("ftps://ftp.websitename.com/dir/file.txt"));
console.log(extractRootDomainNoExt("doesnothaveext/dir/file.txt"));
console.log(extractRootDomainNoExt("websitename.com:1234/dir/file.txt"));
console.log(extractRootDomainNoExt("ftps://websitename.com:1234/dir/file.txt"));
console.log(extractRootDomainNoExt("example.com?param=value"));
console.log(extractRootDomainNoExt("https://facebook.github.io/jest/"));
console.log(extractRootDomainNoExt("//youtube.com/watch?v=ClkQA2Lb_iE"));

*Hit "Run code snippet" to see this work in a variety of use cases.

This also works with extracting out subdomains and country coded domain names like classroom.co.uk -> classroom.

See also: Extract hostname name from string

Lewis Nakao
  • 7,079
  • 2
  • 26
  • 21
0
function rootDomain(url) {
    var rightPeriodIndex;
    for (var i = url.length - 1; i >= 0; i--) {
        if(url[i] == '.') {
            //console.log("rightPeriodIndex", i);
            rightPeriodIndex = i;
            var noExtension = url.substr(0,i);
            console.log("this is noExtension", noExtension);
            break;
        }
    }
    var result = noExtension.substring(noExtension.lastIndexOf(".") + 1);
    return result;
}

console.log(rootDomain("facebook.com"));
console.log(rootDomain("https://www.etsy.com/market/tumblr_clothing"));
console.log(rootDomain("https://petitions.whitehouse.gov/"));
console.log(rootDomain("google.ca"));

I just ended up going with iterating and parsing/stripping substring methods with javascript.

Rohit Tigga
  • 2,373
  • 9
  • 43
  • 81