5

I have been trying the following C# code to extract image from the doc file but it is not working:

object missing = System.Reflection.Missing.Value;            
            Microsoft.Office.Interop.Word.Application oWord = new Microsoft.Office.Interop.Word.Application();
            Microsoft.Office.Interop.Word.Document oDoc = new Microsoft.Office.Interop.Word.Document();
            oWord.Visible = false;
            object str1 = "C:\\doc.doc";
            oDoc = oWord.Documents.Open(ref str1, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing);

            if (oDoc.InlineShapes.Count > 0)            {


                for (int j = 0; j < oDoc.InlineShapes.Count; j++)
                {  

                    oWord.ActiveDocument.Select();
                    oDoc.ActiveWindow.Selection.CopyAsPicture();

                    IDataObject data = Clipboard.GetDataObject();                    

                    if (data.GetDataPresent(typeof(System.Drawing.Bitmap)))
                    {
                        object bm = data.GetData(DataFormats.Bitmap);

                        Bitmap bmp;
                        bmp = (Bitmap)data.GetData(typeof(System.Drawing.Bitmap));

                        bmp.Save("C:\\test.bmp");
                    }



                }

Can anybody give the proper code for extracting the image from word file?

Chris
  • 8,527
  • 10
  • 34
  • 51
fawad
  • 1,323
  • 11
  • 31
  • 50
  • If you can manage `DOCX` files, theyr'e simply zip files. You can open them up to find the binary file, and read the XML index to find out which one you want. – Origin Apr 17 '15 at 17:19

4 Answers4

10
using System;
using System.Drawing;
using System.IO;
using System.Threading;
using Page = System.Web.UI.Page;
using Microsoft.Office.Interop.Word;
using Microsoft.VisualBasic.Devices;
public partial class ReadIMG : System.Web.UI.Page
{   
    private Application m_word;
    private int m_i;
    protected void Page_Load(object sender, EventArgs e)
    {
        object missing = Type.Missing;
        object FileName = Server.MapPath("~/LectureOrig/Word.docx");
        object readOnly = true;
        m_word = new Application();
        m_word.Documents.Open(ref FileName,
                                ref missing, ref readOnly, ref missing, ref missing,
                                ref missing, ref missing, ref missing, ref missing,
                                ref missing, ref missing, ref missing, ref missing, ref missing,ref missing,ref missing);
        try
        {
            for (int i = 1; i <= m_word.ActiveDocument.InlineShapes.Count; i++)
            {
                m_i = i;
               // CopyFromClipboardShape();
                Thread thread = new Thread(CopyFromClipbordInlineShape);
                thread.SetApartmentState(ApartmentState.STA);
                thread.Start();
                thread.Join();
            }
        }
        finally
        {
            object save = false;
            m_word.Quit(ref save, ref missing, ref missing);
            m_word = null;
        }
    }
    protected void CopyFromClipbordInlineShape()
    {   
        InlineShape inlineShape = m_word.ActiveDocument.InlineShapes[m_i];
        inlineShape.Select();
        m_word.Selection.Copy();
        Computer computer = new Computer();
        //Image img = computer.Clipboard.GetImage();
        if (computer.Clipboard.GetDataObject() != null)
        {
            System.Windows.Forms.IDataObject data = computer.Clipboard.GetDataObject();
            if (data.GetDataPresent(System.Windows.Forms.DataFormats.Bitmap))
            {
                Image image = (Image)data.GetData(System.Windows.Forms.DataFormats.Bitmap, true);                
                image.Save(Server.MapPath("~/ImagesGet/image.gif"), System.Drawing.Imaging.ImageFormat.Gif);
                image.Save(Server.MapPath("~/ImagesGet/image.jpg"), System.Drawing.Imaging.ImageFormat.Jpeg);

            }
            else
            {
                LabelMessage.Text="The Data In Clipboard is not as image format";
            }
        }
        else
        {
            LabelMessage.Text="The Clipboard was empty";
        }
    }

Code copy from How To Exctract images from Doc (Word) file in C#?

Ekk
  • 5,627
  • 19
  • 27
6

Another option if it's a .docx file:

  1. Rename the file to a .zip
  2. Extract the contents
  3. Look for the following directory in the extracted folder word/media

Yeah, it's not the C# way to do it as posted, but even writing the code to perform the 3 steps above would be a way of automating the process if that's what you are looking for.

S McCrohan
  • 6,663
  • 1
  • 30
  • 39
Alex Davis
  • 61
  • 1
  • 1
  • The problem with this approach is, that you will only be able to get the original images. This might lead to some inaccurate outputs if the user modified (e.g. cropped) those within Word. – 91378246 Aug 22 '22 at 15:23
3

Here's a local/non-web-page version.

Most of this code is copied from: http://www.csharphelp.com/2007/05/save-picture-from-clipboard-to-file-using-c/ - plus a few lines from Ekk's answer.

InlineShape inlineShape = m_word.ActiveDocument.InlineShapes[m_i];
inlineShape.Select();
m_word.Selection.Copy();
if (Clipboard.GetDataObject() != null)
{
    IDataObject data = Clipboard.GetDataObject();

    if (data.GetDataPresent(DataFormats.Bitmap))
    {
        Image image = (Image)data.GetData(DataFormats.Bitmap,true);

        image.Save("image.bmp",System.Drawing.Imaging.ImageFormat.Bmp);
        image.Save("image.jpg",System.Drawing.Imaging.ImageFormat.Jpeg);
        image.Save("image.gif",System.Drawing.Imaging.ImageFormat.Gif);
    }
    else
    {
        MessageBox.Show("The Data In Clipboard is not as image format");
    }
}
else
{
    MessageBox.Show("The Clipboard was empty");
}
Chris
  • 3,400
  • 1
  • 27
  • 41
  • This won't work when Word is in the middle of opening a context menu for the image in question, by the way. The clipboard will not have any data after the copy. – Chris May 07 '16 at 20:36
-1

I had the same problem I used spire library and i got the solution i am giving the link of that library use just add that dll files in your visual studio and copy the below code :

enter code here



        if (file.ShowDialog() == DialogResult.OK) //if there is a file choosen by the user  
        {
            object path = file.FileName; //get the path of the file  
            object readOnly = true;

            Spire.Doc.Document document = new Spire.Doc.Document(file.FileName);
            int index = 1;

            //Get Each Section of Document  
            foreach (Spire.Doc.Section section in document.Sections)
            {
                //Get Each Paragraph of Section  
                foreach (Spire.Doc.Documents.Paragraph paragraph in section.Paragraphs)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.AppendLine(paragraph.Text);//storing the text of word in string builder
                    Console.WriteLine(sb);
                    //Get Each Document Object of Paragraph Items  
                    foreach (DocumentObject docObject in paragraph.ChildObjects)
                    {
                        //If Type of Document Object is Picture, Extract.  
                        if (docObject.DocumentObjectType == DocumentObjectType.Picture)
                        {
                            DocPicture pic = docObject as DocPicture;

                            String imgName = String.Format(@"E:\C#\OnlineExam\Question\{0}.png", index);

                            //Save Image  
                            pic.Image.Save(imgName, System.Drawing.Imaging.ImageFormat.Png);
                            index++;
                        }
                    }
                }
            }}

You can find dll files from this link

Uzef Shaikh
  • 636
  • 6
  • 11