I need to extract text and images from a DOCX file in to a text file (save images as graphic files of course). Using code below how can I get the image and save it with a reference inside the text file?
If I use:
List<ImagePart> imgPart = wordProcessingDoc.MainDocumentPart.ImageParts.ToList();
I can get all the images but sometimes the one image is used in several places. I cannot find out the reference to grab that particular image from the list. Here is the sample code taken from (Extract table from DOCX):
public static string ReadAllTextFromDocx(FileInfo fileInfo)
{
StringBuilder stringBuilder;
using (WordprocessingDocument wordprocessingDocument = WordprocessingDocument.Open(fileInfo.FullName, false))
{
NameTable nameTable = new NameTable();
XmlNamespaceManager xmlNamespaceManager = new XmlNamespaceManager(nameTable);
xmlNamespaceManager.AddNamespace("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
string wordprocessingDocumentText;
using (StreamReader streamReader = new StreamReader(wordprocessingDocument.MainDocumentPart.GetStream()))
{
wordprocessingDocumentText = streamReader.ReadToEnd();
}
stringBuilder = new StringBuilder(wordprocessingDocumentText.Length);
XmlDocument xmlDocument = new XmlDocument(nameTable);
xmlDocument.LoadXml(wordprocessingDocumentText);
XmlNodeList paragraphNodes = xmlDocument.SelectNodes("//w:p", xmlNamespaceManager);
foreach (XmlNode paragraphNode in paragraphNodes)
{
XmlNodeList textNodes = paragraphNode.SelectNodes(".//w:t | .//w:tab | .//w:br | .//w:drawing", xmlNamespaceManager);
foreach (XmlNode textNode in textNodes)
{
switch (textNode.Name)
{
case "w:t":
stringBuilder.Append(textNode.InnerText);
break;
case "w:tab":
stringBuilder.Append("\t");
break;
case "w:br":
stringBuilder.Append("\v");
break;
case "w:drawing":
stringBuilder.Append("----------------IMAGE HERE-------------");
break;
}
}
stringBuilder.Append(Environment.NewLine);
}
}
return stringBuilder.ToString();
}