Have few Questions related to Bookmarks and Links
BookMarks
As we get the bookmarks, is there is any option to bookmark in the page content and modify the text of the bookmark and modify the bookmark prsent in the tree structer in PDF file.
Do we have option to search bookmark in the page content . Irespective of conditions like (space, spelling etc)
Option to edit bookmark and bookmark present in the pdf page content.
Is there any option to find H1,H2 in page content
Is there is any option to find that bookmark is pointing to correct page or not.
Links
- Is there is any option to find links. And to check whether the link is pointing to correct URL.
This is my code
public void ReadPdfFile(string fileName, string CompareText)
{
StringBuilder text = new StringBuilder();
System.Data.DataTable dtResult = new System.Data.DataTable();
string currentText = "";
string title = "";
string[] pages;
if (ViewState["Append"] != null)
{
dtResult = ViewState["Append"] as System.Data.DataTable;
}
string SearchText = string.Empty;
if (!dtResult.Columns.Contains("BookMarks"))
{
dtResult.Columns.Add(new DataColumn("BookMarks"));
}
if (!dtResult.Columns.Contains("Exists"))
{
dtResult.Columns.Add(new DataColumn("Exists"));
}
if (File.Exists(fileName))
{
PdfReader pdfReader = new PdfReader(fileName);
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
//string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
IList<Dictionary<string, object>> bookmarks = SimpleBookmark.GetBookmark(pdfReader);
for (int i = 0; i < bookmarks.Count; i++)
{
var values = bookmarks[i].Values.ToList();
title = values[0].ToString();
pages = values[1].ToString().Split(' ');
currentText = PdfTextExtractor.GetTextFromPage(pdfReader, Convert.ToInt32(pages[0].ToString()));
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
if (currentText.Contains(title))
{
DataRow dr = dtResult.NewRow();
dr["BookMarks"] = title;
dr["Exists"] = "No";
dtResult.Rows.Add(dr);
dtResult.AcceptChanges();
}
currentText = "";
if (values.Count >= 4)
{
var ChildValues = (((System.Collections.Generic.List<System.Collections.Generic.Dictionary<string, object>>)(values[3])));
int haschild = 0;
int count = 0;
int n = 0;
int m = 0;
while (count < ChildValues.Count)
{
haschild = 1;
title = ChildValues[n]["Title"].ToString();
pages = ChildValues[n]["Page"].ToString().Split(' ');
currentText = PdfTextExtractor.GetTextFromPage(pdfReader, Convert.ToInt32(pages[0].ToString()));
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
// currentText = currentText.Replace('\n', ' ');
if (currentText.Contains(title))
{
DataRow dr = dtResult.NewRow();
dr["BookMarks"] = title;
dr["Exists"] = "No";
dtResult.Rows.Add(dr);
dtResult.AcceptChanges();
}
currentText = "";
if (ChildValues[n].Count >= 4)
{
int mychildcount = 0;
m = 0;
var mychild = (((System.Collections.Generic.List<System.Collections.Generic.Dictionary<string, object>>)(ChildValues[n]["Kids"])));
while (haschild == 1 || mychildcount < mychild.Count)
{
title = mychild[m]["Title"].ToString();
pages = mychild[m]["Page"].ToString().Split(' ');
currentText = PdfTextExtractor.GetTextFromPage(pdfReader, Convert.ToInt32(pages[0].ToString()));
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
// currentText = currentText.Replace('\n', ' ');
if (currentText.Contains(title))
{
DataRow dr = dtResult.NewRow();
dr["BookMarks"] = title;
dr["Exists"] = "No";
dtResult.Rows.Add(dr);
dtResult.AcceptChanges();
}
currentText = "";
if (mychild[m].Count >= 4)
{
haschild = 1;
mychild = (((System.Collections.Generic.List<System.Collections.Generic.Dictionary<string, object>>)(mychild[m]["Kids"])));
}
else
{
m++;
haschild = 0;
mychildcount++;
}
}
n++;
count++;
}
else
{
n++;
count++;
}
}
}
}
pdfReader.Close();
}
ViewState["Append"] = dtResult;
GVResult.DataSource = dtResult;
GVResult.DataBind();
}