0

I have the following code to remove Last Author and Revision Number of Word Document

using Microsoft.Office.Core;
using Word = Microsoft.Office.Interop.Word;
using System.Reflection;
using System.IO;
...


Word.Application oWord;
Word._Document oDoc;

oWord = new Word.Application();
oWord.Visible = false;

List<string> lstDocFile = new List<string>();
//Add doc files here
List<string> g_lstCheck = new List<string>();
//Add list check here "Last Author" and "Revision Number"

foreach (string path in lstDocFile)
{
    oDoc = oWord.Documents.Open(path, ReadOnly: false);
    foreach (string chkItem in g_lstCheck)
    {
        strValue = oDoc.BuiltInDocumentProperties[chkItem].Value;
        if (!string.IsNullOrEmpty(strValue))
        {
            oDoc.BuiltInDocumentProperties[chkItem].Value = string.Empty);
        }
    }
    oDoc.Close(Word.WdSaveOptions.wdSaveChanges);
}
oWord.Quit(Word.WdSaveOptions.wdDoNotSaveChanges);

After the code run, I expect the Last Author and Revision Number to be Empty string. But the result is Last Author become me and Revision Number increase by 1. I understand that happen because I use the following code to save the word document

oDoc.Close(Word.WdSaveOptions.wdSaveChanges);

Please help me to remove Last Author and Revision Number with C#.

Uwe Keim
  • 39,551
  • 56
  • 175
  • 291
123iamking
  • 2,387
  • 4
  • 36
  • 56

2 Answers2

1

For .docx (Open Xml) files, the most simple way is to use the official Open XML SDK nuget package. With this, it's very easy to manipulate document properties:

// open for read write
using (var package = WordprocessingDocument.Open("myfile.docx", true))
{
    // modify properties
    package.PackageProperties.Creator = null;
    package.PackageProperties.LastModifiedBy = null;
    package.PackageProperties.Revision = null;
}

For .doc (Word .97->2003) files, here is a small C# method that will be able to remove the properties (which are technically stored completely differently):

RemoveProperties("myfile.doc", SummaryInformationFormatId, PIDSI_AUTHOR, PIDSI_REVNUMBER, PIDSI_LASTAUTHOR);

...

public static void RemoveProperties(string filePath, Guid propertySet, params int[] ids)
{
    if (filePath == null)
        throw new ArgumentNullException(nameof(filePath));

    if (ids == null || ids.Length == 0)
        return;

    int hr = StgOpenStorageEx(filePath, STGM.STGM_DIRECT_SWMR | STGM.STGM_READWRITE | STGM.STGM_SHARE_DENY_WRITE, STGFMT.STGFMT_ANY, 0, IntPtr.Zero, IntPtr.Zero, typeof(IPropertySetStorage).GUID, out IPropertySetStorage setStorage);
    if (hr != 0)
        throw new Win32Exception(hr);

    try
    {
        hr = setStorage.Open(propertySet, STGM.STGM_READWRITE | STGM.STGM_SHARE_EXCLUSIVE, out IPropertyStorage storage);
        if (hr != 0)
        {
            const int STG_E_FILENOTFOUND = unchecked((int)0x80030002);
            if (hr == STG_E_FILENOTFOUND)
                return;

            throw new Win32Exception(hr);
        }

        var props = new List<PROPSPEC>();
        foreach (int id in ids)
        {
            var prop = new PROPSPEC();
            prop.ulKind = PRSPEC.PRSPEC_PROPID;
            prop.union.propid = id;
            props.Add(prop);
        }
        storage.DeleteMultiple(props.Count, props.ToArray());
        storage.Commit(0);
    }
    finally
    {
        Marshal.ReleaseComObject(setStorage);
    }
}

// "The Summary Information Property Set"
// https://msdn.microsoft.com/en-us/library/windows/desktop/aa380376.aspx
public static readonly Guid SummaryInformationFormatId = new Guid("F29F85E0-4FF9-1068-AB91-08002B27B3D9");
public const int PIDSI_AUTHOR = 4;
public const int PIDSI_LASTAUTHOR = 8;
public const int PIDSI_REVNUMBER = 9;

[Flags]
private enum STGM
{
    STGM_READ = 0x00000000,
    STGM_READWRITE = 0x00000002,
    STGM_SHARE_DENY_NONE = 0x00000040,
    STGM_SHARE_DENY_WRITE = 0x00000020,
    STGM_SHARE_EXCLUSIVE = 0x00000010,
    STGM_DIRECT_SWMR = 0x00400000
}

private enum STGFMT
{
    STGFMT_STORAGE = 0,
    STGFMT_FILE = 3,
    STGFMT_ANY = 4,
    STGFMT_DOCFILE = 5
}

[StructLayout(LayoutKind.Sequential)]
private struct PROPSPEC
{
    public PRSPEC ulKind;
    public PROPSPECunion union;
}

[StructLayout(LayoutKind.Explicit)]
private struct PROPSPECunion
{
    [FieldOffset(0)]
    public int propid;
    [FieldOffset(0)]
    public IntPtr lpwstr;
}

private enum PRSPEC
{
    PRSPEC_LPWSTR = 0,
    PRSPEC_PROPID = 1
}

[DllImport("ole32.dll")]
private static extern int StgOpenStorageEx([MarshalAs(UnmanagedType.LPWStr)] string pwcsName, STGM grfMode, STGFMT stgfmt, int grfAttrs, IntPtr pStgOptions, IntPtr reserved2, [MarshalAs(UnmanagedType.LPStruct)] Guid riid, out IPropertySetStorage ppObjectOpen);

[Guid("0000013A-0000-0000-C000-000000000046"), InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]
private interface IPropertySetStorage
{
    void Unused1();
    [PreserveSig]
    int Open([MarshalAs(UnmanagedType.LPStruct)] Guid rfmtid, STGM grfMode, out IPropertyStorage storage);
}

[Guid("00000138-0000-0000-C000-000000000046"), InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]
private interface IPropertyStorage
{
    void Unused1();
    void Unused2();
    void DeleteMultiple(int cpspec, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 0)] PROPSPEC[] rgpspec);
    void Unused4();
    void Unused5();
    void Unused6();
    void Commit(uint grfCommitFlags);
    // rest ommited
}
Simon Mourier
  • 132,049
  • 21
  • 248
  • 298
  • Thank you very much, I see that Dsofile.dll eula said: "The user assumes the entire risk", "USE... AT YOUR OWN RISK",... . So I'm a little worry when use Dsofile.dll. So I wanna ask, Is Open XML SDK nuget package safer than Dsofile.dll. – 123iamking Aug 08 '17 at 07:50
  • @123iamking - yes, it's an official open source Microsoft package: https://github.com/OfficeDev/Open-XML-SDK – Simon Mourier Aug 08 '17 at 07:54
  • A little note is that has to add WindowsBase.dll to fix build error: https://stackoverflow.com/a/7814593/4608491 – 123iamking Aug 08 '17 at 09:19
  • Is it possible with edit .doc file, According to this answer: https://stackoverflow.com/a/4220382/4608491 , it's only for .docx – 123iamking Aug 08 '17 at 09:40
  • 1
    No, open xml is only for open xml file format. Old Office files (.doc, .xls, etc.) are "compound files". I have provided a sample C# code to remove properties for these files. – Simon Mourier Aug 08 '17 at 10:08
0

*According to this article, the author, Mr.Vivek Singh, gives us some useful code.

**Also We have this library -Dsofile.dll from Microsoft.

Go like this.

Step 1: Download the Dsofile.dll library (**), extract and get the file Interop.Dsofile.dll (retrieved date 8/8/2017)

Step 2: Add Reference the file Interop.Dsofile.dll for your C# project.

Step 3: Use this code (I edit from Article * - Thanks to Vivek Singh, I just remove the word Class in OleDocumentPropertiesClass to prevent build error, and edit a bit to solve this problem)

        string fileName = "";//Add the full path of the Word file

        OleDocumentProperties myDSOOleDocument = new OleDocumentProperties();
        myDSOOleDocument.Open(fileName, false,
 DSOFile.dsoFileOpenOptions.dsoOptionOpenReadOnlyIfNoWriteAccess);

        myDSOOleDocument.SummaryProperties.LastSavedBy = string.Empty;
       //myDSOOleDocument.SummaryProperties.RevisionNumber = string.Empty; //This can't be edit -readonly

        myDSOOleDocument.Save();
        myDSOOleDocument.Close();

Anyway, I can't edit RevisionNumber cause it's read-only. Ok, I can only happy with what I can get.

123iamking
  • 2,387
  • 4
  • 36
  • 56