while extracting text from pdf file to a .txt file using itext and pdfbox jar,I am unable extract some of the special characters.below is my code
public class PDFConversionUsingPDFBox {
public static void main(String args[]) {
PDFParser parser = null;
PDDocument pdDoc = null;
COSDocument cosDoc = null;
PDFTextStripper pdfStripper;
COSWriter writer=null;
FileWriter fw=null;
String parsedText;
String fileName = "C:/Users/sample/Desktop/test.PDF";
File file = new File(fileName);
try {
FileInputStream in=new FileInputStream("C:/Users/sample/Desktop/test.PDF");
String outputProps = "C:/Users/sample/Desktop/Sample PDF/chapter 13/269328979.PDF";
parser = new PDFParser(in);
parser.parse();
cosDoc = parser.getDocument();
pdfStripper = new PDFTextStripper();
pdDoc = new PDDocument(parser.getDocument());
parsedText = pdfStripper.getText(pdDoc);
System.out.println(parsedText);
FileOutputStream os=new FileOutputStream("C:/Users/sample/Desktop/testfile.txt");
writer=new COSWriter(os);
writer.write(pdDoc);
} catch (Exception e) {
e.printStackTrace();
try {
if (cosDoc != null)
cosDoc.close();
if (pdDoc != null)
pdDoc.close();
} catch (Exception e1) {
e.printStackTrace();
}
}
}
}