How can I get the title of a web page for a given URL using an HTML parser? Is it possible to get the title using regular expressions? I would prefer to use an HTML parser.
I am working in the Java Eclipse IDE.
I have tried using the following code, but was unsuccessful.
Any ideas?
Thank in advance!
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.tags.TitleTag;
public class TestHtml {
public static void main(String... args) {
Parser parser = new Parser();
try {
parser.setResource("http://www.yahoo.com/");
NodeList list = parser.parse(null);
Node node = list.elementAt(0);
if (node instanceof TitleTag) {
TitleTag title = (TitleTag) node;
System.out.println(title.getText());
}
} catch (ParserException e) {
e.printStackTrace();
}
}
}