In the example below, HTMLDocument::getElement(String id)
finds the Element
whose HTML.Attribute.id
attribute has the value "unique_id"
. The Element
is BranchElement(div) 1,6
I'm not sure where your Element
iteration goes awry, but you can see the unique_id
value in the BranchElement(div)
in the console output below. Because an HTMLDocument
models HTML, the enclosed HTMLReader
may synthesize HTML.Tag CONTENT
, such as the content in the implied paragraphs seen below.

Console:
BranchElement(div) 1,6
Element: 'BranchElement(html) 0,6', name: 'html', children: 2, attributes: 1, leaf: false
Attribute: 'name', Value: 'html'
Element: 'BranchElement(head) 0,1', name: 'head', children: 1, attributes: 1, leaf: false
Attribute: 'name', Value: 'head'
Element: 'BranchElement(p-implied) 0,1', name: 'p-implied', children: 1, attributes: 1, leaf: false
Attribute: 'name', Value: 'p-implied'
Element: 'LeafElement(content) 0,1', name: 'content', children: 0, attributes: 2, leaf: true
Attribute: 'CR', Value: 'true'
Attribute: 'name', Value: 'content'
Content (0-1): ''
Element: 'BranchElement(body) 1,6', name: 'body', children: 1, attributes: 1, leaf: false
Attribute: 'name', Value: 'body'
Element: 'BranchElement(div) 1,6', name: 'div', children: 1, attributes: 3, leaf: false
Attribute: 'align', Value: 'center'
Attribute: 'id', Value: 'unique_id'
Attribute: 'name', Value: 'div'
Element: 'BranchElement(p-implied) 1,6', name: 'p-implied', children: 2, attributes: 1, leaf: false
Attribute: 'name', Value: 'p-implied'
Element: 'LeafElement(content) 1,5', name: 'content', children: 0, attributes: 1, leaf: true
Attribute: 'name', Value: 'content'
Content (1-5): 'Test'
Element: 'LeafElement(content) 5,6', name: 'content', children: 0, attributes: 2, leaf: true
Attribute: 'CR', Value: 'true'
Attribute: 'name', Value: 'content'
Content (5-6): ''
Code:
import java.awt.EventQueue;
import java.util.Enumeration;
import javax.swing.JEditorPane;
import javax.swing.JFrame;
import javax.swing.text.AttributeSet;
import javax.swing.text.BadLocationException;
import javax.swing.text.Element;
import javax.swing.text.ElementIterator;
import javax.swing.text.StyleConstants;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
/**
* @see http://stackoverflow.com/a/5614370/230513
*/
public class Test {
private static final String TEXT
= "<html>"
+ "<head></head>"
+ "<body>"
+ "<div align=center id=unique_id>Test</div>"
+ "</body>"
+ "</html>";
public static void main(String[] args) throws Exception {
EventQueue.invokeLater(new Test()::display);
}
private void display() {
JFrame f = new JFrame("Test");
f.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
JEditorPane jep = new JEditorPane("text/html", TEXT);
jep.setEditable(false);
f.add(jep);
f.pack();
f.setLocationRelativeTo(null);
f.setVisible(true);
HTMLDocument htmlDoc = (HTMLDocument) jep.getDocument();
System.out.println(htmlDoc.getElement("unique_id"));
ElementIterator iterator = new ElementIterator(htmlDoc);
Element element;
while ((element = iterator.next()) != null) {
try {
printElement(htmlDoc, element);
} catch (BadLocationException e) {
e.printStackTrace(System.err);
}
}
}
private void printElement(HTMLDocument htmlDoc, Element element) throws BadLocationException {
AttributeSet attrSet = element.getAttributes();
System.out.println(""
+ "Element: '" + element.toString().trim()
+ "', name: '" + element.getName()
+ "', children: " + element.getElementCount()
+ ", attributes: " + attrSet.getAttributeCount()
+ ", leaf: " + element.isLeaf());
Enumeration attrNames = attrSet.getAttributeNames();
while (attrNames.hasMoreElements()) {
Object attr = attrNames.nextElement();
System.out.println(" Attribute: '" + attr + "', Value: '"
+ attrSet.getAttribute(attr) + "'");
Object tag = attrSet.getAttribute(StyleConstants.NameAttribute);
if (attr == StyleConstants.NameAttribute
&& tag == HTML.Tag.CONTENT) {
int startOffset = element.getStartOffset();
int endOffset = element.getEndOffset();
int length = endOffset - startOffset;
System.out.printf(" Content (%d-%d): '%s'\n", startOffset,
endOffset, htmlDoc.getText(startOffset, length).trim());
}
}
}
}