Now I can use the PDFBox to extract the outlines from PDF, but some PDF can get the outlines, others can't.
Every PDF has outlines and when I open a pdf use pdf read tool, I can click an outline to a certain page.
Here is my code:
public static void main(String[] args) {
try {
PDDocument document = PDDocument.load(new File(filePath));
PDDocumentOutline outline = document.getDocumentCatalog().getDocumentOutline();
getOutlines(document, outline, "");
document.close();
} catch (InvalidPasswordException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void getOutlines(PDDocument document, PDOutlineNode bookmark, String indentation) throws IOException{
PDOutlineItem current = bookmark.getFirstChild();
while (current != null) {
PDPage currentPage = current.findDestinationPage(document);
Integer pageNumber = document.getDocumentCatalog().getPages().indexOf(currentPage) + 1;
System.out.println(current.getTitle() + "-------->" + pageNumber);
getOutlines(document, current, indentation);
current = current.getNextSibling();
}
}