Im working on a pdfreader. But i want to differ between a real new line or just a paragraph break (caused by missing space).
The problem is even the new line belongs to the paragraph it adds an \n
.
Here is some code i already tried.
public string GetContent(int page = 1)
{
using (var pdfReader = new PdfReader(Path))
{
ITextExtractionStrategy strategy = new LocationTextExtractionStrategy();
//ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
//iTextSharp.text.Rectangle rect = new iTextSharp.text.Rectangle(0, 0, 612, 792);
//RenderFilter[] renderFilter = new RenderFilter[1];
//renderFilter[0] = new RegionTextRenderFilter(rect);
//ITextExtractionStrategy textExtractionStrategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), renderFilter);
var currentText = PdfTextExtractor.GetTextFromPage(pdfReader, 1, strategy);
currentText =
Encoding.UTF8.GetString(Encoding.Convert(
Encoding.Default,
Encoding.UTF8,
Encoding.Default.GetBytes(currentText)));
return currentText;
}
}