I am new to Apache PDFBox
; below is my code to extract all the text from a simple resume. It 's working fine and now I want to get the text by fonts, bold, images etc. How do I do this?
import java.io.File;
import java.io.IOException;
import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.util.PDFTextStripper;
public class PdfExtract {
public static void main(String args[]) throws IOException {
PDDocument pdf = PDDocument.load(new File("/home/praveen/Downloa/sampleresume.pdf"));
PDFTextStripper stripper = new PDFTextStripper();
String plainText = stripper.getText(pdf
System.out.println(plainText);
}
}