55

I am using the following code to compress and decompress string data, but the problem which I am facing is, it is easily getting compressed without error, but the decompress method throws the following error.

Exception in thread "main" java.io.IOException: Not in GZIP format

public static void main(String[] args) throws Exception {
    String string = "I am what I am hhhhhhhhhhhhhhhhhhhhhhhhhhhhh"
            + "bjggujhhhhhhhhh"
            + "rggggggggggggggggggggggggg"
            + "esfffffffffffffffffffffffffffffff"
            + "esffffffffffffffffffffffffffffffff"
            + "esfekfgy enter code here`etd`enter code here wdd"
            + "heljwidgutwdbwdq8d"
            + "skdfgysrdsdnjsvfyekbdsgcu"
            + "jbujsbjvugsduddbdj";

    System.out.println("after compress:");
    String compressed = compress(string);
    System.out.println(compressed);
    System.out.println("after decompress:");
    String decomp = decompress(compressed);
    System.out.println(decomp);
}


public static String compress(String str) throws Exception {
    if (str == null || str.length() == 0) {
        return str;
    }
    System.out.println("String length : " + str.length());
    ByteArrayOutputStream obj=new ByteArrayOutputStream();
    GZIPOutputStream gzip = new GZIPOutputStream(obj);
    gzip.write(str.getBytes("UTF-8"));
    gzip.close();
    String outStr = obj.toString("UTF-8");
    System.out.println("Output String length : " + outStr.length());
    return outStr;
}

public static String decompress(String str) throws Exception {
    if (str == null || str.length() == 0) {
        return str;
    }
    System.out.println("Input String length : " + str.length());
    GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(str.getBytes("UTF-8")));
    BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8"));
    String outStr = "";
    String line;
    while ((line=bf.readLine())!=null) {
        outStr += line;
    }
    System.out.println("Output String lenght : " + outStr.length());
    return outStr;
}

Still couldn't figure out how to fix this issue!

Dennis Rönn
  • 101
  • 10
rampuriyaaa
  • 4,926
  • 10
  • 34
  • 41

8 Answers8

50

This is because of

String outStr = obj.toString("UTF-8");

Send the byte[] which you can get from your ByteArrayOutputStream and use it as such in your ByteArrayInputStream to construct your GZIPInputStream. Following are the changes which need to be done in your code.

byte[] compressed = compress(string); //In the main method

public static byte[] compress(String str) throws Exception {
    ...
    ...
    return obj.toByteArray();
}

public static String decompress(byte[] bytes) throws Exception {
    ...
    GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(bytes));
    ...
}
Rahul
  • 44,383
  • 11
  • 84
  • 103
  • 10
    Also consider using StringBuilder instead of just concatenating String, because String is immutable and you will be wasting space in the stringpool. (Rivers of ink spilled about this) – Tomas Fornara Mar 13 '14 at 22:38
  • 2
    @fornarat That comment makes little sense. There is no "space in the stringpool" to consider for a normal String object, except for strings literals (i.e. things that are specified directly in the .java file, like String text = "text", which are put in the constant string pool) *and* string you specifically put in that pool yourself by running the .intern() method on. Otherwise a String is just an object like everything else. – stolsvik Nov 30 '18 at 20:49
37

The above Answer solves our problem but in addition to that. if we are trying to decompress a uncompressed("not a zip format") byte[] . we will get "Not in GZIP format" exception message.

For solving that we can add addition code in our Class.

public static boolean isCompressed(final byte[] compressed) {
    return (compressed[0] == (byte) (GZIPInputStream.GZIP_MAGIC)) && (compressed[1] == (byte) (GZIPInputStream.GZIP_MAGIC >> 8));
}

My Complete Compression Class with compress/decompress would look like:

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

public class GZIPCompression {
  public static byte[] compress(final String str) throws IOException {
    if ((str == null) || (str.length() == 0)) {
      return null;
    }
    ByteArrayOutputStream obj = new ByteArrayOutputStream();
    GZIPOutputStream gzip = new GZIPOutputStream(obj);
    gzip.write(str.getBytes("UTF-8"));
    gzip.flush();
    gzip.close();
    return obj.toByteArray();
  }

  public static String decompress(final byte[] compressed) throws IOException {
    final StringBuilder outStr = new StringBuilder();
    if ((compressed == null) || (compressed.length == 0)) {
      return "";
    }
    if (isCompressed(compressed)) {
      final GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(compressed));
      final BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(gis, "UTF-8"));

      String line;
      while ((line = bufferedReader.readLine()) != null) {
        outStr.append(line);
      }
    } else {
      outStr.append(compressed);
    }
    return outStr.toString();
  }

  public static boolean isCompressed(final byte[] compressed) {
    return (compressed[0] == (byte) (GZIPInputStream.GZIP_MAGIC)) && (compressed[1] == (byte) (GZIPInputStream.GZIP_MAGIC >> 8));
  }
}
Arun Pratap Singh
  • 3,428
  • 30
  • 23
  • very nice. you're missing a call to `gzip.flush();` before `gzip.close();` though. – isapir Jun 18 '17 at 23:59
  • I use your code and i add a little correction for break lines, in your decompress method : 'while ((line = bufferedReader.readLine()) != null) { outStr.append(line); outStr.append(System.getProperty("line.separator")); }' – Guillaume Camus Nov 15 '17 at 09:41
  • 1
    in decompress, change your while loop to: char[] buffer = new char[256]; int readCount; while((readCount = bufferedReader.read(buffer)) > 0){ outStr.append(buffer, 0, readCount); } it wont get confused by line breaks – Nick May 03 '21 at 03:40
16

If you ever need to transfer the zipped content via network or store it as text, you have to use Base64 encoder(such as apache commons codec Base64) to convert the byte array to a Base64 String, and decode the string back to byte array at remote client. Found an example at Use Zip Stream and Base64 Encoder to Compress Large String Data!

jeffery.yuan
  • 1,177
  • 1
  • 17
  • 27
JeffersonZhang
  • 229
  • 2
  • 5
  • Most straightforward example I've come across yet for gzipping. – Dan Coghlan Nov 09 '16 at 15:10
  • But if we used gzip compression to compress for example a big json into a gzip, then we need to send it via network, so we convert this byte array that is the result from the gzip compression to base64, will it still be less size than the original uncompressed big json? – BugsOverflow Apr 15 '23 at 05:55
12

Another example of correct compression and decompression:

@Slf4j
public class GZIPCompression {
    public static byte[] compress(final String stringToCompress) {
        if (isNull(stringToCompress) || stringToCompress.length() == 0) {
            return null;
        }

        try (final ByteArrayOutputStream baos = new ByteArrayOutputStream();
            final GZIPOutputStream gzipOutput = new GZIPOutputStream(baos)) {
            gzipOutput.write(stringToCompress.getBytes(UTF_8));
            gzipOutput.finish();
            return baos.toByteArray();
        } catch (IOException e) {
            throw new UncheckedIOException("Error while compression!", e);
        }
    }

    public static String decompress(final byte[] compressed) {
        if (isNull(compressed) || compressed.length == 0) {
            return null;
        }

        try (final GZIPInputStream gzipInput = new GZIPInputStream(new ByteArrayInputStream(compressed));
             final StringWriter stringWriter = new StringWriter()) {
            IOUtils.copy(gzipInput, stringWriter, UTF_8);
            return stringWriter.toString();
        } catch (IOException e) {
            throw new UncheckedIOException("Error while decompression!", e);
        }
    }
}
Sergey Frolov
  • 1,317
  • 1
  • 16
  • 30
8

The problem is this line:

    String outStr = obj.toString("UTF-8");

The byte array obj contains arbitrary binary data. You can't "decode" arbitrary binary data as if it was UTF-8. If you try you will get a String that cannot then be "encoded" back to bytes. Or at least, the bytes you get will be different to what you started with ... to the extent that they are no longer a valid GZIP stream.

The fix is to store or transmit the contents of the byte array as-is. Don't try to convert it into a String. It is binary data, not text.

Stephen C
  • 698,415
  • 94
  • 811
  • 1,216
5

Client send some messages need be compressed, server (kafka) decompress the string meesage

Below is my sample:

compress:

   public static String compress(String str, String inEncoding) {
        if (str == null || str.length() == 0) {
            return str;
        }
        try {
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            GZIPOutputStream gzip = new GZIPOutputStream(out);
            gzip.write(str.getBytes(inEncoding));
            gzip.close();
            return URLEncoder.encode(out.toString("ISO-8859-1"), "UTF-8");
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

decompress:

    public static String decompress(String str, String outEncoding) {
        if (str == null || str.length() == 0) {
            return str;
        }

        try {
            String decode = URLDecoder.decode(str, "UTF-8");

            ByteArrayOutputStream out = new ByteArrayOutputStream();
            ByteArrayInputStream in = new ByteArrayInputStream(decode.getBytes("ISO-8859-1"));
            GZIPInputStream gunzip = new GZIPInputStream(in);
            byte[] buffer = new byte[256];
            int n;
            while ((n = gunzip.read(buffer)) >= 0) {
                out.write(buffer, 0, n);
            }
            return out.toString(outEncoding);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }
Yu Zhang
  • 1,202
  • 1
  • 11
  • 16
1

You can't convert binary data to String. As a solution you can encode binary data and then convert to String. For example, look at this How do you convert binary data to Strings and back in Java?

  • If you need string of compressed value (say to store in a DB), DEFINITELY take this route – 0TTT0 May 12 '19 at 20:04
-1

In decompress method, we should decode the bytes using base64 decoder. By doing so we can overcome this exception

byte[] bytes = str.getBytes("UTF-8");
bytes = Base64.deocdeBase64(bytes);

GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(bytes));

By adding and modifying the above lines in decompress method, we can fix this issue

Belal R
  • 29
  • 4
  • (s)he didn't encode base64 on compression, and if (s)he do, the size of output will be larger which is not expected in this case – Mostafa Nazari Mar 13 '23 at 10:03