Add deflate compression to ZipWriter

Use zlib to deflate compress data into the zip file.

Change-Id: If094b926a67e199be976c3190781cd0101dd66e1
diff --git a/include/ziparchive/zip_writer.h b/include/ziparchive/zip_writer.h
index 9ee4abe..d996c4a 100644
--- a/include/ziparchive/zip_writer.h
+++ b/include/ziparchive/zip_writer.h
@@ -21,9 +21,11 @@
 #include <utils/Compat.h>
 
 #include <cstdio>
-#include <string>
 #include <ctime>
+#include <memory>
+#include <string>
 #include <vector>
+#include <zlib.h>
 
 /**
  * Writes a Zip file via a stateful interface.
@@ -112,8 +114,6 @@
 private:
   DISALLOW_COPY_AND_ASSIGN(ZipWriter);
 
-  int32_t HandleError(int32_t error_code);
-
   struct FileInfo {
     std::string path;
     uint16_t compression_method;
@@ -125,6 +125,12 @@
     uint32_t local_file_header_offset;
   };
 
+  int32_t HandleError(int32_t error_code);
+  int32_t PrepareDeflate();
+  int32_t StoreBytes(FileInfo* file, const void* data, size_t len);
+  int32_t CompressBytes(FileInfo* file, const void* data, size_t len);
+  int32_t FlushCompressedBytes(FileInfo* file);
+
   enum class State {
     kWritingZip,
     kWritingEntry,
@@ -136,6 +142,9 @@
   off64_t current_offset_;
   State state_;
   std::vector<FileInfo> files_;
+
+  std::unique_ptr<z_stream, void(*)(z_stream*)> z_stream_;
+  std::vector<uint8_t> buffer_;
 };
 
 #endif /* LIBZIPARCHIVE_ZIPWRITER_H_ */
diff --git a/libziparchive/Android.mk b/libziparchive/Android.mk
index 8ff94d4..8a4921f 100644
--- a/libziparchive/Android.mk
+++ b/libziparchive/Android.mk
@@ -18,9 +18,12 @@
 source_files := zip_archive.cc zip_writer.cc
 test_files := zip_archive_test.cc zip_writer_test.cc entry_name_utils_test.cc
 
+# ZLIB_CONST turns on const for input buffers, which is pretty standard.
+common_c_flags := -Werror -Wall -DZLIB_CONST
+
 # Incorrectly warns when C++11 empty brace {} initializer is used.
 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61489
-common_cpp_flags := -Wno-missing-field-initializers
+common_cpp_flags := -Wold-style-cast -Wno-missing-field-initializers
 
 include $(CLEAR_VARS)
 LOCAL_CPP_EXTENSION := .cc
@@ -28,8 +31,8 @@
 LOCAL_STATIC_LIBRARIES := libz
 LOCAL_SHARED_LIBRARIES := libutils libbase
 LOCAL_MODULE:= libziparchive
-LOCAL_CFLAGS := -Werror -Wall
-LOCAL_CPPFLAGS := -Wold-style-cast $(common_cpp_flags)
+LOCAL_CFLAGS := $(common_c_flags)
+LOCAL_CPPFLAGS := $(common_cpp_flags)
 include $(BUILD_STATIC_LIBRARY)
 
 include $(CLEAR_VARS)
@@ -37,7 +40,7 @@
 LOCAL_SRC_FILES := ${source_files}
 LOCAL_STATIC_LIBRARIES := libz libutils libbase
 LOCAL_MODULE:= libziparchive-host
-LOCAL_CFLAGS := -Werror
+LOCAL_CFLAGS := $(common_c_flags)
 LOCAL_CFLAGS_windows := -mno-ms-bitfields
 LOCAL_CPPFLAGS := $(common_cpp_flags)
 
@@ -51,7 +54,7 @@
 LOCAL_STATIC_LIBRARIES := libutils
 LOCAL_SHARED_LIBRARIES := libz-host liblog libbase
 LOCAL_MODULE:= libziparchive-host
-LOCAL_CFLAGS := -Werror
+LOCAL_CFLAGS := $(common_c_flags)
 LOCAL_CPPFLAGS := $(common_cpp_flags)
 LOCAL_MULTILIB := both
 include $(BUILD_HOST_SHARED_LIBRARY)
@@ -60,7 +63,7 @@
 include $(CLEAR_VARS)
 LOCAL_MODULE := ziparchive-tests
 LOCAL_CPP_EXTENSION := .cc
-LOCAL_CFLAGS := -Werror
+LOCAL_CFLAGS := $(common_c_flags)
 LOCAL_CPPFLAGS := $(common_cpp_flags)
 LOCAL_SRC_FILES := $(test_files)
 LOCAL_SHARED_LIBRARIES := liblog libbase
@@ -70,7 +73,7 @@
 include $(CLEAR_VARS)
 LOCAL_MODULE := ziparchive-tests-host
 LOCAL_CPP_EXTENSION := .cc
-LOCAL_CFLAGS := -Werror
+LOCAL_CFLAGS := $(common_c_flags)
 LOCAL_CPPFLAGS := -Wno-unnamed-type-template-args $(common_cpp_flags)
 LOCAL_SRC_FILES := $(test_files)
 LOCAL_SHARED_LIBRARIES := libziparchive-host liblog libbase
diff --git a/libziparchive/zip_writer.cc b/libziparchive/zip_writer.cc
index de75d1e..21662ec 100644
--- a/libziparchive/zip_writer.cc
+++ b/libziparchive/zip_writer.cc
@@ -18,10 +18,13 @@
 #include "zip_archive_common.h"
 #include "ziparchive/zip_writer.h"
 
+#include <utils/Log.h>
+
 #include <cassert>
 #include <cstdio>
 #include <memory>
 #include <zlib.h>
+#define DEF_MEM_LEVEL 8                // normally in zutil.h?
 
 /* Zip compression methods we support */
 enum {
@@ -29,6 +32,9 @@
   kCompressDeflated   = 8,        // standard deflate
 };
 
+// Size of the output buffer used for compression.
+static const size_t kBufSize = 32768u;
+
 // No error, operation completed successfully.
 static const int32_t kNoError = 0;
 
@@ -41,10 +47,14 @@
 // The zip entry name was invalid.
 static const int32_t kInvalidEntryName = -3;
 
+// An error occurred in zlib.
+static const int32_t kZlibError = -4;
+
 static const char* sErrorCodes[] = {
     "Invalid state",
     "IO error",
     "Invalid entry name",
+    "Zlib error",
 };
 
 const char* ZipWriter::ErrorCodeString(int32_t error_code) {
@@ -54,13 +64,21 @@
   return nullptr;
 }
 
-ZipWriter::ZipWriter(FILE* f) : file_(f), current_offset_(0), state_(State::kWritingZip) {
+static void DeleteZStream(z_stream* stream) {
+  deflateEnd(stream);
+  delete stream;
+}
+
+ZipWriter::ZipWriter(FILE* f) : file_(f), current_offset_(0), state_(State::kWritingZip),
+                                z_stream_(nullptr, DeleteZStream), buffer_(kBufSize) {
 }
 
 ZipWriter::ZipWriter(ZipWriter&& writer) : file_(writer.file_),
                                            current_offset_(writer.current_offset_),
                                            state_(writer.state_),
-                                           files_(std::move(writer.files_)) {
+                                           files_(std::move(writer.files_)),
+                                           z_stream_(std::move(writer.z_stream_)),
+                                           buffer_(std::move(writer.buffer_)){
   writer.file_ = nullptr;
   writer.state_ = State::kError;
 }
@@ -70,6 +88,8 @@
   current_offset_ = writer.current_offset_;
   state_ = writer.state_;
   files_ = std::move(writer.files_);
+  z_stream_ = std::move(writer.z_stream_);
+  buffer_ = std::move(writer.buffer_);
   writer.file_ = nullptr;
   writer.state_ = State::kError;
   return *this;
@@ -77,6 +97,7 @@
 
 int32_t ZipWriter::HandleError(int32_t error_code) {
   state_ = State::kError;
+  z_stream_.reset();
   return error_code;
 }
 
@@ -126,8 +147,16 @@
   // containing the crc and size fields.
   header.gpb_flags |= kGPBDDFlagMask;
 
-  // For now, ignore the ZipWriter::kCompress flag.
-  fileInfo.compression_method = kCompressStored;
+  if (flags & ZipWriter::kCompress) {
+    fileInfo.compression_method = kCompressDeflated;
+
+    int32_t result = PrepareDeflate();
+    if (result != kNoError) {
+      return result;
+    }
+  } else {
+    fileInfo.compression_method = kCompressStored;
+  }
   header.compression_method = fileInfo.compression_method;
 
   ExtractTimeAndDate(time, &fileInfo.last_mod_time, &fileInfo.last_mod_date);
@@ -163,40 +192,138 @@
   return kNoError;
 }
 
+int32_t ZipWriter::PrepareDeflate() {
+  assert(state_ == State::kWritingZip);
+
+  // Initialize the z_stream for compression.
+  z_stream_ = std::unique_ptr<z_stream, void(*)(z_stream*)>(new z_stream(), DeleteZStream);
+
+  int zerr = deflateInit2(z_stream_.get(), Z_BEST_COMPRESSION, Z_DEFLATED, -MAX_WBITS,
+                          DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+  if (zerr != Z_OK) {
+    if (zerr == Z_VERSION_ERROR) {
+      ALOGE("Installed zlib is not compatible with linked version (%s)", ZLIB_VERSION);
+      return HandleError(kZlibError);
+    } else {
+      ALOGE("deflateInit2 failed (zerr=%d)", zerr);
+      return HandleError(kZlibError);
+    }
+  }
+
+  z_stream_->next_out = buffer_.data();
+  z_stream_->avail_out = buffer_.size();
+  return kNoError;
+}
+
 int32_t ZipWriter::WriteBytes(const void* data, size_t len) {
   if (state_ != State::kWritingEntry) {
     return HandleError(kInvalidState);
   }
 
   FileInfo& currentFile = files_.back();
+  int32_t result = kNoError;
   if (currentFile.compression_method & kCompressDeflated) {
-    // TODO(adamlesinski): Implement compression using zlib deflate.
-    assert(false);
+    result = CompressBytes(&currentFile, data, len);
   } else {
-    if (fwrite(data, 1, len, file_) != len) {
-      return HandleError(kIoError);
-    }
-    currentFile.crc32 = crc32(currentFile.crc32, reinterpret_cast<const Bytef*>(data), len);
-    currentFile.compressed_size += len;
-    current_offset_ += len;
+    result = StoreBytes(&currentFile, data, len);
   }
 
+  if (result != kNoError) {
+    return result;
+  }
+
+  currentFile.crc32 = crc32(currentFile.crc32, reinterpret_cast<const Bytef*>(data), len);
   currentFile.uncompressed_size += len;
   return kNoError;
 }
 
+int32_t ZipWriter::StoreBytes(FileInfo* file, const void* data, size_t len) {
+  assert(state_ == State::kWritingEntry);
+
+  if (fwrite(data, 1, len, file_) != len) {
+    return HandleError(kIoError);
+  }
+  file->compressed_size += len;
+  current_offset_ += len;
+  return kNoError;
+}
+
+int32_t ZipWriter::CompressBytes(FileInfo* file, const void* data, size_t len) {
+  assert(state_ == State::kWritingEntry);
+  assert(z_stream_);
+  assert(z_stream_->next_out != nullptr);
+  assert(z_stream_->avail_out != 0);
+
+  // Prepare the input.
+  z_stream_->next_in = reinterpret_cast<const uint8_t*>(data);
+  z_stream_->avail_in = len;
+
+  while (z_stream_->avail_in > 0) {
+    // We have more data to compress.
+    int zerr = deflate(z_stream_.get(), Z_NO_FLUSH);
+    if (zerr != Z_OK) {
+      return HandleError(kZlibError);
+    }
+
+    if (z_stream_->avail_out == 0) {
+      // The output is full, let's write it to disk.
+      size_t dataToWrite = z_stream_->next_out - buffer_.data();
+      if (fwrite(buffer_.data(), 1, dataToWrite, file_) != dataToWrite) {
+        return HandleError(kIoError);
+      }
+      file->compressed_size += dataToWrite;
+      current_offset_ += dataToWrite;
+
+      // Reset the output buffer for the next input.
+      z_stream_->next_out = buffer_.data();
+      z_stream_->avail_out = buffer_.size();
+    }
+  }
+  return kNoError;
+}
+
+int32_t ZipWriter::FlushCompressedBytes(FileInfo* file) {
+  assert(state_ == State::kWritingEntry);
+  assert(z_stream_);
+  assert(z_stream_->next_out != nullptr);
+  assert(z_stream_->avail_out != 0);
+
+  int zerr = deflate(z_stream_.get(), Z_FINISH);
+  if (zerr != Z_STREAM_END) {
+    return HandleError(kZlibError);
+  }
+
+  size_t dataToWrite = z_stream_->next_out - buffer_.data();
+  if (dataToWrite != 0) {
+    if (fwrite(buffer_.data(), 1, dataToWrite, file_) != dataToWrite) {
+      return HandleError(kIoError);
+    }
+    file->compressed_size += dataToWrite;
+    current_offset_ += dataToWrite;
+  }
+  z_stream_.reset();
+  return kNoError;
+}
+
 int32_t ZipWriter::FinishEntry() {
   if (state_ != State::kWritingEntry) {
     return kInvalidState;
   }
 
+  FileInfo& currentFile = files_.back();
+  if (currentFile.compression_method & kCompressDeflated) {
+    int32_t result = FlushCompressedBytes(&currentFile);
+    if (result != kNoError) {
+      return result;
+    }
+  }
+
   const uint32_t sig = DataDescriptor::kOptSignature;
   if (fwrite(&sig, sizeof(sig), 1, file_) != 1) {
     state_ = State::kError;
     return kIoError;
   }
 
-  FileInfo& currentFile = files_.back();
   DataDescriptor dd = {};
   dd.crc32 = currentFile.crc32;
   dd.compressed_size = currentFile.compressed_size;
diff --git a/libziparchive/zip_writer_test.cc b/libziparchive/zip_writer_test.cc
index 5269730..046f195 100644
--- a/libziparchive/zip_writer_test.cc
+++ b/libziparchive/zip_writer_test.cc
@@ -44,26 +44,26 @@
 
   const char* expected = "hello";
 
-  ASSERT_EQ(writer.StartEntry("file.txt", 0), 0);
-  ASSERT_EQ(writer.WriteBytes("he", 2), 0);
-  ASSERT_EQ(writer.WriteBytes("llo", 3), 0);
-  ASSERT_EQ(writer.FinishEntry(), 0);
-  ASSERT_EQ(writer.Finish(), 0);
+  ASSERT_EQ(0, writer.StartEntry("file.txt", 0));
+  ASSERT_EQ(0, writer.WriteBytes("he", 2));
+  ASSERT_EQ(0, writer.WriteBytes("llo", 3));
+  ASSERT_EQ(0, writer.FinishEntry());
+  ASSERT_EQ(0, writer.Finish());
 
-  ASSERT_GE(lseek(fd_, 0, SEEK_SET), 0);
+  ASSERT_GE(0, lseek(fd_, 0, SEEK_SET));
 
   ZipArchiveHandle handle;
-  ASSERT_EQ(OpenArchiveFd(fd_, "temp", &handle, false), 0);
+  ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false));
 
   ZipEntry data;
-  ASSERT_EQ(FindEntry(handle, ZipString("file.txt"), &data), 0);
-  EXPECT_EQ(data.compressed_length, strlen(expected));
-  EXPECT_EQ(data.uncompressed_length, strlen(expected));
-  EXPECT_EQ(data.method, kCompressStored);
+  ASSERT_EQ(0, FindEntry(handle, ZipString("file.txt"), &data));
+  EXPECT_EQ(strlen(expected), data.compressed_length);
+  EXPECT_EQ(strlen(expected), data.uncompressed_length);
+  EXPECT_EQ(kCompressStored, data.method);
 
   char buffer[6];
-  EXPECT_EQ(ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(&buffer), sizeof(buffer)),
-            0);
+  EXPECT_EQ(0,
+            ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(&buffer), sizeof(buffer)));
   buffer[5] = 0;
 
   EXPECT_STREQ(expected, buffer);
@@ -74,49 +74,49 @@
 TEST_F(zipwriter, WriteUncompressedZipWithMultipleFiles) {
   ZipWriter writer(file_);
 
-  ASSERT_EQ(writer.StartEntry("file.txt", 0), 0);
-  ASSERT_EQ(writer.WriteBytes("he", 2), 0);
-  ASSERT_EQ(writer.FinishEntry(), 0);
+  ASSERT_EQ(0, writer.StartEntry("file.txt", 0));
+  ASSERT_EQ(0, writer.WriteBytes("he", 2));
+  ASSERT_EQ(0, writer.FinishEntry());
 
-  ASSERT_EQ(writer.StartEntry("file/file.txt", 0), 0);
-  ASSERT_EQ(writer.WriteBytes("llo", 3), 0);
-  ASSERT_EQ(writer.FinishEntry(), 0);
+  ASSERT_EQ(0, writer.StartEntry("file/file.txt", 0));
+  ASSERT_EQ(0, writer.WriteBytes("llo", 3));
+  ASSERT_EQ(0, writer.FinishEntry());
 
-  ASSERT_EQ(writer.StartEntry("file/file2.txt", 0), 0);
-  ASSERT_EQ(writer.FinishEntry(), 0);
+  ASSERT_EQ(0, writer.StartEntry("file/file2.txt", 0));
+  ASSERT_EQ(0, writer.FinishEntry());
 
-  ASSERT_EQ(writer.Finish(), 0);
+  ASSERT_EQ(0, writer.Finish());
 
-  ASSERT_GE(lseek(fd_, 0, SEEK_SET), 0);
+  ASSERT_GE(0, lseek(fd_, 0, SEEK_SET));
 
   ZipArchiveHandle handle;
-  ASSERT_EQ(OpenArchiveFd(fd_, "temp", &handle, false), 0);
+  ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false));
 
   char buffer[4];
   ZipEntry data;
 
-  ASSERT_EQ(FindEntry(handle, ZipString("file.txt"), &data), 0);
-  EXPECT_EQ(data.method, kCompressStored);
-  EXPECT_EQ(data.compressed_length, 2u);
-  EXPECT_EQ(data.uncompressed_length, 2u);
-  ASSERT_EQ(ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(buffer), arraysize(buffer)),
-            0);
+  ASSERT_EQ(0, FindEntry(handle, ZipString("file.txt"), &data));
+  EXPECT_EQ(kCompressStored, data.method);
+  EXPECT_EQ(2u, data.compressed_length);
+  EXPECT_EQ(2u, data.uncompressed_length);
+  ASSERT_EQ(0,
+            ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(buffer), arraysize(buffer)));
   buffer[2] = 0;
   EXPECT_STREQ("he", buffer);
 
-  ASSERT_EQ(FindEntry(handle, ZipString("file/file.txt"), &data), 0);
-  EXPECT_EQ(data.method, kCompressStored);
-  EXPECT_EQ(data.compressed_length, 3u);
-  EXPECT_EQ(data.uncompressed_length, 3u);
-  ASSERT_EQ(ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(buffer), arraysize(buffer)),
-            0);
+  ASSERT_EQ(0, FindEntry(handle, ZipString("file/file.txt"), &data));
+  EXPECT_EQ(kCompressStored, data.method);
+  EXPECT_EQ(3u, data.compressed_length);
+  EXPECT_EQ(3u, data.uncompressed_length);
+  ASSERT_EQ(0,
+            ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(buffer), arraysize(buffer)));
   buffer[3] = 0;
   EXPECT_STREQ("llo", buffer);
 
-  ASSERT_EQ(FindEntry(handle, ZipString("file/file2.txt"), &data), 0);
-  EXPECT_EQ(data.method, kCompressStored);
-  EXPECT_EQ(data.compressed_length, 0u);
-  EXPECT_EQ(data.uncompressed_length, 0u);
+  ASSERT_EQ(0, FindEntry(handle, ZipString("file/file2.txt"), &data));
+  EXPECT_EQ(kCompressStored, data.method);
+  EXPECT_EQ(0u, data.compressed_length);
+  EXPECT_EQ(0u, data.uncompressed_length);
 
   CloseArchive(handle);
 }
@@ -124,17 +124,47 @@
 TEST_F(zipwriter, WriteUncompressedZipWithAlignedFile) {
   ZipWriter writer(file_);
 
-  ASSERT_EQ(writer.StartEntry("align.txt", ZipWriter::kAlign32), 0);
-  ASSERT_EQ(writer.WriteBytes("he", 2), 0);
-  ASSERT_EQ(writer.FinishEntry(), 0);
-  ASSERT_EQ(writer.Finish(), 0);
+  ASSERT_EQ(0, writer.StartEntry("align.txt", ZipWriter::kAlign32));
+  ASSERT_EQ(0, writer.WriteBytes("he", 2));
+  ASSERT_EQ(0, writer.FinishEntry());
+  ASSERT_EQ(0, writer.Finish());
 
-  ASSERT_GE(lseek(fd_, 0, SEEK_SET), 0);
+  ASSERT_GE(0, lseek(fd_, 0, SEEK_SET));
 
   ZipArchiveHandle handle;
-  ASSERT_EQ(OpenArchiveFd(fd_, "temp", &handle, false), 0);
+  ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false));
 
   ZipEntry data;
-  ASSERT_EQ(FindEntry(handle, ZipString("align.txt"), &data), 0);
-  EXPECT_EQ(data.offset & 0x03, 0);
+  ASSERT_EQ(0, FindEntry(handle, ZipString("align.txt"), &data));
+  EXPECT_EQ(0, data.offset & 0x03);
+
+  CloseArchive(handle);
+}
+
+TEST_F(zipwriter, WriteCompressedZipWithOneFile) {
+  ZipWriter writer(file_);
+
+  ASSERT_EQ(0, writer.StartEntry("file.txt", ZipWriter::kCompress));
+  ASSERT_EQ(0, writer.WriteBytes("helo", 4));
+  ASSERT_EQ(0, writer.FinishEntry());
+  ASSERT_EQ(0, writer.Finish());
+
+  ASSERT_GE(0, lseek(fd_, 0, SEEK_SET));
+
+  ZipArchiveHandle handle;
+  ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false));
+
+  ZipEntry data;
+  ASSERT_EQ(0, FindEntry(handle, ZipString("file.txt"), &data));
+  EXPECT_EQ(kCompressDeflated, data.method);
+  EXPECT_EQ(4u, data.uncompressed_length);
+
+  char buffer[5];
+  ASSERT_EQ(0,
+            ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(buffer), arraysize(buffer)));
+  buffer[4] = 0;
+
+  EXPECT_STREQ("helo", buffer);
+
+  CloseArchive(handle);
 }