Use regex to parse out the domain names
Example snippet:
String a = "http://www.google.com";
String tempString = a.substring(a.indexOf(".")+1, a.length()); // gets rid of everything before the first dot
String domainString = tempString.substring(0, tempString.indexOf(".")); // grabs everything before the second dot
System.out.println(domainString);
Outputs google
EDIT :
Here's a sample stand-alone demo that can deal with more complex domain structures and extract individual components.
You can add more domain test-cases inside the main method in the source below to test various domains but currently it's testing the following ones:
http://www.google.com/
ftp://www.google.com
http://google.com/
google.com
localhost:80
Here's the source (Pardon my lazy spaghetti):
package domain.parser.test;
public class Parseromatic {
public static void main(String[] args) {
Parseromatic parser = new Parseromatic();
parser.extract("http://www.google.com/");
parser.extract("ftp://www.google.com");
parser.extract("http://google.com/");
parser.extract("google.com");
parser.extract("localhost:80");
}
public void extract(String a){
if(a.contains(".")){ // Initial outOfBounds proof check in cases like (http://localhost:80)
String leadingString = a.substring(0, a.indexOf(".")); // First portion of the URL
boolean hasProto = protocol(leadingString);
// Now lets grab the rest
String trailingString = a.substring(a.indexOf(".")+1, a.length());
// Check if it contains a forward-slash
if(trailingString.contains("/")){
// We snip out everything before the slash
String middleString = snipOffPages(trailingString);
// Now we're only left with the domain related things
// Check if subdomain was left in the leadingString
if(middleString.contains(".")){
// Yep so lets deal with that
if(hasProto){ // If it had a protocol
System.out.println("Subdomain: "+leadingString.substring(leadingString.indexOf("://")+3, leadingString.length()));
} else { // If it didn't have a protocol
System.out.println("Subdomain: "+leadingString);
}
// Now let's split up the rest
String[] split1 = middleString.split("\\.");
System.out.println("Domain: "+split1[0]);
// Check for port
if (split1[1].contains(":")){
// Assuming port is specified
String[] split2 = split1[1].split(":");
System.out.println("Top-Domain: "+split2[0]);
System.out.println("Port: "+split2[1]);
} else {
// Assuming no port specified
System.out.println("Top-Domain: "+split1[1]);
System.out.println("Port: N/A");
}
} else {
// No subdomain was present
System.out.println("Subdomain: N/A");
if(hasProto){ // If it had a protocol
System.out.println("Domain: "+leadingString.substring(leadingString.indexOf("://")+3, leadingString.length()));
} else { // If it didn't have a protocol
System.out.println("Domain: "+leadingString);
}
// Check for port
if (middleString.contains(":")){
// Assuming port is specified
String[] split2 = middleString.split(":");
System.out.println("Top-Domain: "+split2[0]);
System.out.println("Port: "+split2[1]);
} else {
// Assuming no port specified
System.out.println("Top-Domain: "+middleString);
System.out.println("Port: N/A");
}
}
} else { // We assume it only contains domain related things
if(trailingString.contains(".")){
// Yep so lets deal with that
if(hasProto){ // If it had a protocol
System.out.println("Subdomain: "+leadingString.substring(leadingString.indexOf("://")+3, leadingString.length()));
} else { // If it didn't have a protocol
System.out.println("Subdomain: "+leadingString);
}
// Now let's split up the rest
String[] split1 = trailingString.split("\\.");
System.out.println("Domain: "+split1[0]);
// Check for port
if (split1[1].contains(":")){
// Assuming port is specified
String[] split2 = split1[1].split(":");
System.out.println("Top-Domain: "+split2[0]);
System.out.println("Port: "+split2[1]);
} else {
// Assuming no port specified
System.out.println("Top-Domain: "+split1[1]);
System.out.println("Port: N/A");
}
} else {
// No subdomain was present
System.out.println("Subdomain: N/A");
if(hasProto){ // If it had a protocol
System.out.println("Domain: "+leadingString.substring(leadingString.indexOf("://")+3, leadingString.length()));
} else { // If it didn't have a protocol
System.out.println("Domain: "+leadingString);
}
// Check for port
if (trailingString.contains(":")){
// Assuming port is specified
String[] split2 = trailingString.split(":");
System.out.println("Top-Domain: "+split2[0]);
System.out.println("Port: "+split2[1]);
} else {
// Assuming no port specified
System.out.println("Top-Domain: "+trailingString);
System.out.println("Port: N/A");
}
}
}
} else {
// Assuming only one level exists
boolean hasProto = protocol(a);
// Check if protocol was present
if(hasProto){
String noProto = a.substring(a.indexOf("://")+3, a.length());
// If some pages or something is specified
if(noProto.contains("/")){
noProto = snipOffPages(noProto);
}
// Check for port
if(noProto.contains(":")){
String[] split1 = noProto.split(":");
System.out.println("Subdomain: N/A");
System.out.println("Domain: "+split1[0]);
System.out.println("Top-Domain: N/A");
System.out.println("Port: "+split1[1]);
} else {
System.out.println("Subdomain: N/A");
System.out.println("Domain: "+noProto);
System.out.println("Top-Domain: N/A");
System.out.println("Port: N/A");
}
} else {
// If some pages or something is specified
if(a.contains("/")){
a = snipOffPages(a);
}
// Check for port
if(a.contains(":")){
String[] split1 = a.split(":");
System.out.println("Subdomain: N/A");
System.out.println("Domain: "+split1[0]);
System.out.println("Top-Domain: N/A");
System.out.println("Port: "+split1[1]);
} else {
System.out.println("Subdomain: N/A");
System.out.println("Domain: "+a);
System.out.println("Top-Domain: N/A");
System.out.println("Port: N/A");
}
}
}
System.out.println(); // Cosmetic empty line, can ignore
}
public String snipOffPages(String a){
return a.substring(0,a.indexOf("/"));
}
public boolean protocol(String a) {
// Protocol extraction
if(a.contains("://")){ // Check for existance of protocol declaration
String protocolString = a.substring(0, a.indexOf("://"));
System.out.println("Protocol: "+protocolString);
return true;
}
else {
System.out.println("Protocol: N/A");
return false;
}
}
}
And for the specified domains above it outputs:
Protocol: http
Subdomain: www
Domain: google
Top-Domain: com
Port: N/A
Protocol: ftp
Subdomain: www
Domain: google
Top-Domain: com
Port: N/A
Protocol: http
Subdomain: N/A
Domain: google
Top-Domain: com
Port: N/A
Protocol: N/A
Subdomain: N/A
Domain: google
Top-Domain: com
Port: N/A
Protocol: N/A
Subdomain: N/A
Domain: localhost
Top-Domain: N/A
Port: 80