Data Compression and Decompression in Java

Categories Java
0
Tags

Getting started

Compressing and Decompressing Data Using Java APIs/

Non-UTF-8 encoding in ZIP file

ZipInputStream.getNextEntry() throws IllegalArgumentException when the zip file contains Non-UTF-8 (such as Chinese or Japanese) encoded file names.

The issue is solved in JDK7:
https://blogs.oracle.com/xuemingshen/entry/non_utf_8_encoding_in

JDK5/6 users can use Apache Commons Compress
http://commons.apache.org/compress/

RAR?

RAR archive tools

Open RAR Archive and List Contents

Code Samples

  1. package com.tangcs.zhc.server.io;
  2.  
  3. import java.io.IOException;
  4. import java.io.OutputStream;
  5. import java.util.zip.ZipEntry;
  6. import java.util.zip.ZipInputStream;
  7.  
  8. import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
  9. import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
  10. import org.junit.Test;
  11.  
  12. import com.tangcs.zhc.server.ServerTestBase;
  13. import com.tangcs.zhc.server.ServerUtils;
  14.  
  15. /**
  16.  *
  17.  * @author Warren Tang
  18.  */
  19. public class CompressionTest extends ServerTestBase {
  20.  
  21.   /**
  22.    * http://java.sun.com/developer/technicalArticles/Programming/compression/
  23.    */
  24.   @Test public void testDecompressZipFile() throws IOException {
  25.  
  26.     ZipInputStream in = new ZipInputStream(getResourceAsStream("xls/exam-results.zip"));
  27.  
  28.     String fileName;
  29.     ZipEntry entry;
  30.     while((entry = in.getNextEntry()) != null ) {
  31.       fileName = entry.getName();
  32.       OutputStream out = getTempFileAsStream(fileName);
  33.       ServerUtils.copyStream(in, out, false);
  34.       out.close();
  35.     }
  36.     in.close();
  37.  
  38.   }
  39.  
  40.   /**
  41.    * <p>IllegalArgumentException from ZipInputStream.getNextEntry()
  42.    * when zip file contains Non-UTF-8 (such as Chinese or Japanese) encoded characters.</p>
  43.    * <p>This is solved in JDK7:
  44.    * https://blogs.oracle.com/xuemingshen/entry/non_utf_8_encoding_in </p>
  45.    */
  46.   @Test(expected = IllegalArgumentException.class)
  47.   public void testZipInputStreamDoesnotSupportZipFileContainingChineseNamedFiles()
  48.       throws IOException {
  49.  
  50.     ZipInputStream in = new ZipInputStream(getResourceAsStream("xls/sample.zip"));
  51.  
  52.     String fileName;
  53.     ZipEntry entry;
  54.     while((entry = in.getNextEntry()) != null ) {
  55.       fileName = entry.getName();
  56.       OutputStream out = getTempFileAsStream(fileName);
  57.       ServerUtils.copyStream(in, out, false);
  58.       out.close();
  59.     }
  60.     in.close();
  61.   }
  62.  
  63.   /**
  64.    * For JDK5/6, use Apache Commons Compress:
  65.    * http://commons.apache.org/compress/zip.html
  66.    */
  67.   @Test public void testAppacheCommonsCompressSupportZipFileContainingChineseNamedFiles()
  68.       throws IOException {
  69.  
  70.     ZipArchiveInputStream in = new ZipArchiveInputStream(getResourceAsStream("xls/sample.zip"), "gbk", true);
  71.  
  72.     ZipArchiveEntry entry;
  73.     while((entry = in.getNextZipEntry()) != null) {
  74.       String fileName = entry.getName();
  75.       OutputStream out = getTempFileAsStream(fileName);
  76.       ServerUtils.copyStream(in, out, false);
  77.       out.close();
  78.     }
  79.  
  80.     in.close();
  81.  
  82.   }
  83.  
  84. }

Leave a Reply

Your email address will not be published. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>