In the example below we build on the Reading a zip file from java using ZipInputStream page to provide basic filtering. This filtering is provided by the filteredExpandZipFile method taking a Predicate. Every ZipEntry is passed to the predicate, but only ones that match (predicate returns true) are included.
Note that the size of an entry cannot be accurately determined in all cases, so it is not safe to perform validation on the this field.
package com.thecoderscorner.example.compression;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;
import java.time.temporal.ChronoField;
import java.util.Date;
import java.util.function.Predicate;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
/**
* ZipFilteredReader shows an example of filtering one or more matching
* files from a ZipInputStream. Instead of expanding the whole archive
* this uses the Function interface to only expand matching files.
*
* Files are output to the OUTPUT_DIR directory.
*/
public class ZipFilteredReader
{
private static final Logger LOGGER = Logger.getLogger("ZipReader");
private final Path zipLocation;
private final Path outputDirectory;
/**
* Here we create the ZipFilteredReader and configure it with a Predicate.
* This predicate function is used to filter which files we want to copy
* out of the zip file.
*/
public static void main(String args[])
{
ZipFilteredReader reader = new ZipFilteredReader("c:/dev/temp/ziptest/output.zip", "c:/dev/temp/ziptest");
// We define a simple predicate that only extracts files ending in
// .txt from the zip archive and pass it to the zip filter method.
reader.filteredExpandZipFile(zipEntry -> zipEntry.getName().endsWith(".txt"));
// Another example predicate that filters files last written in or
// after year 2015. Uncomment below to try this filter.
//reader.filteredExpandZipFile(zipEntry -> {
// Instant lastModified = zipEntry.getLastModifiedTime().toInstant();
// int yearModified = lastModified.get(ChronoField.YEAR);
// return yearModified >= 2015;
//});
}
/**
* Constructs the filtered zip reader passing in the zip file to
* be expanded by filter and the output directory
* @param zipLocation the zip file
* @param outputDir the output directory
*/
public ZipFilteredReader(String zipLocation, String outputDir) {
this.zipLocation = Paths.get(zipLocation);
this.outputDirectory = Paths.get(outputDir);
}
/**
* This method iterates through all entries in the zip archive. Each
* entry is checked against the predicate (filter) that is passed to
* the method. If the filter returns true, the entry is expanded,
* otherwise it is ignored.
* @param filter the predicate used to compare each entry against
*/
private void filteredExpandZipFile(Predicate<ZipEntry> filter) {
// we open the zip file using a java 7 try with resources block
try(ZipInputStream stream = new ZipInputStream(new FileInputStream(zipLocation.toFile())))
{
LOGGER.info("Zip file: " + zipLocation.toFile().getName() + " has been opened");
// we now iterate through all files in the archive testing them
// again the predicate filter that we passed in. Only items that
// match the filter are expanded.
ZipEntry entry;
while((entry = stream.getNextEntry())!=null)
{
if(filter.test(entry)) {
LOGGER.info("Matched file " + entry.getName());
extractFileFromArchive(stream, entry.getName());
}
else {
LOGGER.info("Skipping file: " + entry.getName());
}
}
}
catch(IOException ex) {
LOGGER.log(Level.SEVERE, "Exception reading zip", ex);
}
}
/**
* We only get here when we the stream is located on a zip entry.
* Now we can read the file data from the stream for this current
* ZipEntry. Just like a normal input stream we continue reading
* until read() returns 0 or less.
*/
private void extractFileFromArchive(ZipInputStream stream, String outputName) {
// build the path to the output file and then create the file
String outpath = outputDirectory + "/" + outputName;
try (FileOutputStream output = new FileOutputStream(outpath)) {
// create a buffer to copy through
byte[] buffer = new byte[2048];
// now copy out of the zip archive until all bytes are copied
int len;
while ((len = stream.read(buffer)) > 0)
{
output.write(buffer, 0, len);
}
}
catch(IOException e) {
LOGGER.log(Level.SEVERE, "Exception writing file", e);
}
}
}
Listing of the zip file that we used:
$ unzip -l output.zip
Archive: output.zip
Length Date Time Name
--------- ---------- ----- ----
566 09-25-2014 15:21 LICENSE.txt
661 09-25-2014 15:21 NOTICE.txt
26 05-16-2015 17:18 output.log
--------- -------
1253 3 files
We can see that only the .txt files were output to the directory
$ ls
LICENSE.txt NOTICE.txt output.zip
Lastly, here’s the log output
May 16, 2015 5:21:47 PM com.thecoderscorner.example.compression.ZipFilteredReader filteredExpandZipFile
INFO: Zip file: output.zip has been opened
May 16, 2015 5:21:47 PM com.thecoderscorner.example.compression.ZipFilteredReader filteredExpandZipFile
INFO: Matched file LICENSE.txt
May 16, 2015 5:21:47 PM com.thecoderscorner.example.compression.ZipFilteredReader filteredExpandZipFile
INFO: Matched file NOTICE.txt
May 16, 2015 5:21:47 PM com.thecoderscorner.example.compression.ZipFilteredReader filteredExpandZipFile
INFO: Skipping file: output.log