2

I would like to decompress in C# some DeflateCoded data (PDF extracted). Unfortunately I got every time the exception "Found invalid data while decoding.". But the data are valid.

private void Decompress()
{
    FileStream fs = new FileStream(@"S:\Temp\myFile.bin", FileMode.Open);

    //First two bytes are irrelevant
    fs.ReadByte();
    fs.ReadByte();

    DeflateStream d_Stream = new DeflateStream(fs, CompressionMode.Decompress);

    StreamToFile(d_Stream, @"S:\Temp\myFile1.txt", FileMode.OpenOrCreate);

    d_Stream.Close();
    fs.Close();
}

private static void StreamToFile(Stream inputStream, string outputFile, FileMode fileMode)
{
    if (inputStream == null)
        throw new ArgumentNullException("inputStream");

    if (String.IsNullOrEmpty(outputFile))
        throw new ArgumentException("Argument null or empty.", "outputFile");

    using (FileStream outputStream = new FileStream(outputFile, fileMode, FileAccess.Write))
    {
        int cnt = 0;
        const int LEN = 4096;
        byte[] buffer = new byte[LEN];

        while ((cnt = inputStream.Read(buffer, 0, LEN)) != 0)
            outputStream.Write(buffer, 0, cnt);
    }
}

Does anyone has some ideas? Thanks.

Igby Largeman
  • 16,495
  • 3
  • 60
  • 86
user1011394
  • 1,656
  • 6
  • 28
  • 41
  • Why are the first two bytes irrelevant? –  Feb 08 '12 at 16:51
  • 2
    The stream decoder is using RFC1951. In combination with DeflateStreams the first two bytes are irrelevant. If the stream decoder is using RC1950 then I have to use the first bytes too. – user1011394 Feb 08 '12 at 16:59
  • 2
    What have you done to prove that the error is incorrect and that the data is in fact valid. – Ben Robinson Feb 08 '12 at 17:02

4 Answers4

3

I added this for test data:-

private static void Compress()
{
  FileStream fs = new FileStream(@"C:\Temp\myFile.bin", FileMode.Create);

  DeflateStream d_Stream = new DeflateStream(fs, CompressionMode.Compress);
  for (byte n = 0; n < 255; n++)
    d_Stream.WriteByte(n);
  d_Stream.Close();
  fs.Close();
}

Modified Decompress like this:-

private static void Decompress()
{
  FileStream fs = new FileStream(@"C:\Temp\myFile.bin", FileMode.Open);

  //First two bytes are irrelevant
  //      fs.ReadByte();
  //      fs.ReadByte();

  DeflateStream d_Stream = new DeflateStream(fs, CompressionMode.Decompress);

  StreamToFile(d_Stream, @"C:\Temp\myFile1.txt", FileMode.OpenOrCreate);

  d_Stream.Close();
  fs.Close();
}

Ran it like this:-

static void Main(string[] args)
{
  Compress();
  Decompress();
}

And got no errors.

I conclude that either the first two bytes are relevant (Obviously they are with my particular test data.) or that your data has a problem.

Can we have some of your test data to play with?

(Obviously don't if it's sensitive)

2
private static string decompress(byte[] input)
{
    byte[] cutinput = new byte[input.Length - 2];
    Array.Copy(input, 2, cutinput, 0, cutinput.Length);

    var stream = new MemoryStream();

    using (var compressStream = new MemoryStream(cutinput))
    using (var decompressor = new DeflateStream(compressStream, CompressionMode.Decompress))
        decompressor.CopyTo(stream);

    return Encoding.Default.GetString(stream.ToArray());
}

Thank you user159335 and user1011394 for bringing me on the right track! Just pass all bytes of the stream to input of above function. Make sure the bytecount is the same as the length specified.

Pete
  • 1,191
  • 12
  • 19
2

All you need to do is use GZip instead of Deflate. Below is the code I use for the content of the stream… endstream section in a PDF document:

        using System.IO.Compression;

        public void DecompressStreamData(byte[] data)
        {

            int start = 0;
            while ((this.data[start] == 0x0a) | (this.data[start] == 0x0d)) start++; // skip trailling cr, lf

            byte[] tempdata = new byte[this.data.Length - start];
            Array.Copy(data, start, tempdata, 0, data.Length - start);

            MemoryStream msInput = new MemoryStream(tempdata);
            MemoryStream msOutput = new MemoryStream();
            try
            {
                GZipStream decomp = new GZipStream(msInput, CompressionMode.Decompress);
                decomp.CopyTo(msOutput);
            }
            catch (Exception e)
            {
                MessageBox.Show(e.Message);
            }

        }
0

None of the solutions worked for me on Deflate attachments in a PDF/A-3 document. Some research showed that .NET DeflateStream does not support compressed streams with a header and trailer as per RFC1950.

Error message for reference: The archive entry was compressed using an unsupported compression method.

The solution is to use an alternative library SharpZipLib

Here is a simple method that successfully decoded a Deflate attachment from a PDF/A-3 file for me:

public static string SZLDecompress(byte[] data) {
    var outputStream = new MemoryStream();
    using var compressedStream = new MemoryStream(data);
    using var inputStream = new InflaterInputStream(compressedStream);
    inputStream.CopyTo(outputStream);
    outputStream.Position = 0;
    return Encoding.Default.GetString(outputStream.ToArray());
}
Programmierus
  • 167
  • 11