First use XPath
to parse out only the href value, then apply your replaceAll
to achieve what you desired.
And you don't have to download any additional frameworks or libraries for this to work.
Here's a quick demo class on how this works:
package com.example.test;
import java.io.StringReader;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.xml.sax.InputSource;
public class Test {
public static void main(String[]args){
String xml = "<tag blabla=\"title\"><a href=\"/test/tt123\"> TEST 1 </a></tag>";
XPath xPath = XPathFactory.newInstance().newXPath();
InputSource source = new InputSource(new StringReader(xml));
String hrefValue = null;
try {
hrefValue = (String) xPath.evaluate("//@href", source, XPathConstants.STRING);
} catch (XPathExpressionException e) {
e.printStackTrace();
}
String numbers = hrefValue.replaceAll("[^\\d.]", "");
System.out.println(numbers);
}
}