As advised in comments, reconsider using regex directly on HTML/XML documents as these are not regular languages. Instead, use regex on parsed text/value content but not to transform documents.
One great XML manipulator tool is XSLT, the transformation language and sibling to XPath. And Java ships with a built-in XSLT 1.0 processor, and can also call or source external processors (Xalan, Saxon, etc.). Consider the following setup:
XSLT Script (save as .xsl file used below; script removes empty nodes)
<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output version="1.0" encoding="UTF-8" indent="yes" />
<xsl:strip-space elements="*"/>
<!-- Identity Transform to Copy Document as is -->
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<!-- Empty Template to Remove Such Nodes -->
<xsl:template match="*[.='']"/>
</xsl:transform>
Java Code
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerException;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.OutputKeys;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
public class XMLTransform {
public static void main(String[] args) throws IOException, URISyntaxException,
SAXException, ParserConfigurationException,
TransformerException {
// Load XML and XSL Document
String inputXML = "path/to/Input.xml";
String xslFile = "path/to/XSLT/Script.xsl";
String outputXML = "path/to/Output.xml";
Source xslt = new StreamSource(new File(xslFile));
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
Document doc = docBuilder.parse (new File(inputXML));
// XSLT Transformation with pretty print
TransformerFactory prettyPrint = TransformerFactory.newInstance();
Transformer transformer = prettyPrint.newTransformer(xslt);
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.setOutputProperty(OutputKeys.STANDALONE, "yes");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File(outputXML));
transformer.transform(source, result);
}
}
Output
<ct>
<c>http://192.168.105.213</c>
<l>http://192.168.105.213</l>
<l>http://192.168.105.213</l>
<o>http://192.168.105.213</o>
</ct>
NAMESPACES
When working with namespaces such as the below XML:
<prefix:ct xmlns:prefix="http://www.example.com">
<c>http://192.168.105.213</c>
<l>http://192.168.105.213</l>
<o></o>
<l>http://192.168.105.213</l>
<o>http://192.168.105.213</o>
</prefix:ct>
Use the following XSLT with declaration in header and added template:
<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:prefix="http://www.example.com">
<xsl:output version="1.0" encoding="UTF-8" indent="yes" />
<xsl:strip-space elements="*"/>
<!-- Identity Transform -->
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<!-- Retain Namespace Prefix -->
<xsl:template match="ct">
<xsl:element name='prefix:{local-name()}' namespace='http://www.example.com'>
<xsl:copy-of select="namespace::*"/>
<xsl:apply-templates select="node()|@*"/>
</xsl:element>
</xsl:template>
<!-- Remove Empty Nodes -->
<xsl:template match="*[.='']"/>
</xsl:transform>
Output
<prefix:ct xmlns:prefix="http://www.example.com">
<c>http://192.168.105.213</c>
<l>http://192.168.105.213</l>
<l>http://192.168.105.213</l>
<o>http://192.168.105.213</o>
</prefix:ct>