Pass the size of the new file to the PatchWriterInterface::Init()
Most patch formats include the size of the new file in the header.
To help streaming the patch to disk while generating it, this CL
passes the size of the new file to the patch writer on initialization.
To do this, we also move the Init() call to the patch writer to the
DiffEncoder, which makes more sense since the Close() call is also made
from the DiffEnconder.
Bug: None
Test: Updated tests to check for this value.
Change-Id: Idfaedbd492d68ab6e6cb2c1cb3883947f068c3aa
diff --git a/bsdiff.cc b/bsdiff.cc
index f77f6ca..01cd721 100644
--- a/bsdiff.cc
+++ b/bsdiff.cc
@@ -82,9 +82,9 @@
}
/* Initialize the patch file encoder */
- if (!patch->Init())
- return 1;
DiffEncoder diff_encoder(patch, old_buf, oldsize, new_buf, newsize);
+ if (!diff_encoder.Init())
+ return 1;
/* Compute the differences, writing ctrl as we go */
scan=0;len=0;
diff --git a/diff_encoder.cc b/diff_encoder.cc
index bc89c2e..baa5565 100644
--- a/diff_encoder.cc
+++ b/diff_encoder.cc
@@ -21,6 +21,10 @@
namespace bsdiff {
+bool DiffEncoder::Init() {
+ return patch_->Init(new_size_);
+}
+
bool DiffEncoder::AddControlEntry(const ControlEntry& entry) {
if (entry.diff_size > kMaxEncodedUint64Value) {
LOG(ERROR) << "Encoding value out of range " << entry.diff_size << endl;
diff --git a/diff_encoder.h b/diff_encoder.h
index a479c81..3d992df 100644
--- a/diff_encoder.h
+++ b/diff_encoder.h
@@ -20,7 +20,7 @@
class DiffEncoder {
public:
// Initialize the DiffEncoder with the old and new file buffers, as well as
- // the path writer used. The |patch| must be already initialized.
+ // the path writer used. The |patch| will be initialized when calling Init().
DiffEncoder(PatchWriterInterface* patch,
const uint8_t* old_buf,
uint64_t old_size,
@@ -32,6 +32,9 @@
new_buf_(new_buf),
new_size_(new_size) {}
+ // Initialize the diff encoder and the underlying patch.
+ bool Init();
+
// Add a new control triplet entry to the patch. The |entry.diff_size| bytes
// for the diff stream and the |entry.extra_size| bytes for the extra stream
// will be computed and added to the corresponding streams in the patch.
diff --git a/diff_encoder_unittest.cc b/diff_encoder_unittest.cc
index e3b3747..15e4331 100644
--- a/diff_encoder_unittest.cc
+++ b/diff_encoder_unittest.cc
@@ -27,7 +27,6 @@
class DiffEncoderTest : public testing::Test {
protected:
void SetUp() {
- EXPECT_TRUE(fake_patch_.Init());
// By default, set the encoder to kHelloWorld to kHelloWorld.
diff_encoder_.reset(new DiffEncoder(&fake_patch_, kHelloWorld,
sizeof(kHelloWorld), kHelloWorld,
@@ -40,6 +39,7 @@
TEST_F(DiffEncoderTest, CreateEmptyPatchTest) {
diff_encoder_.reset(new DiffEncoder(&fake_patch_, nullptr, 0, nullptr, 0));
+ EXPECT_TRUE(diff_encoder_->Init());
EXPECT_TRUE(diff_encoder_->Close());
// Both diff and extra stream must be empty stream, and not control entries.
@@ -51,6 +51,7 @@
TEST_F(DiffEncoderTest, AllInExtraStreamTest) {
diff_encoder_.reset(new DiffEncoder(&fake_patch_, nullptr, 0, kHelloWorld,
sizeof(kHelloWorld)));
+ EXPECT_TRUE(diff_encoder_->Init());
// Write to the extra stream in two parts: first 5 bytes, then the rest.
EXPECT_TRUE(diff_encoder_->AddControlEntry(ControlEntry(0, 5, 0)));
@@ -66,6 +67,7 @@
}
TEST_F(DiffEncoderTest, AllInDiffStreamTest) {
+ EXPECT_TRUE(diff_encoder_->Init());
EXPECT_TRUE(
diff_encoder_->AddControlEntry(ControlEntry(sizeof(kHelloWorld), 0, 0)));
EXPECT_TRUE(diff_encoder_->Close());
@@ -76,6 +78,7 @@
}
TEST_F(DiffEncoderTest, OldPosNegativeErrorTest) {
+ EXPECT_TRUE(diff_encoder_->Init());
// Referencing negative values in oldpos is fine, until you use them.
EXPECT_TRUE(diff_encoder_->AddControlEntry(ControlEntry(0, 0, -5)));
EXPECT_TRUE(diff_encoder_->AddControlEntry(ControlEntry(0, 0, 2)));
@@ -84,6 +87,7 @@
// Test that using an oldpos past the end of the file fails.
TEST_F(DiffEncoderTest, OldPosTooBigErrorTest) {
+ EXPECT_TRUE(diff_encoder_->Init());
EXPECT_TRUE(
diff_encoder_->AddControlEntry(ControlEntry(0, 0, sizeof(kHelloWorld))));
EXPECT_FALSE(diff_encoder_->AddControlEntry(ControlEntry(1, 0, 0)));
@@ -92,6 +96,7 @@
// Test that diffing against a section of the old file past the end of the file
// fails.
TEST_F(DiffEncoderTest, OldPosPlusSizeTooBigErrorTest) {
+ EXPECT_TRUE(diff_encoder_->Init());
// The oldpos is set to a range inside the word, the we try to copy past the
// end of it.
EXPECT_TRUE(diff_encoder_->AddControlEntry(
@@ -101,6 +106,7 @@
}
TEST_F(DiffEncoderTest, ExtraStreamTooBigErrorTest) {
+ EXPECT_TRUE(diff_encoder_->Init());
EXPECT_TRUE(diff_encoder_->AddControlEntry(ControlEntry(3, 0, 0)));
// This writes too many bytes in the stream because we already have 3 bytes.
EXPECT_FALSE(
diff --git a/fake_patch_writer.h b/fake_patch_writer.h
index 4c99928..7ac5063 100644
--- a/fake_patch_writer.h
+++ b/fake_patch_writer.h
@@ -20,9 +20,10 @@
~FakePatchWriter() override = default;
// PatchWriterInterface overrides.
- bool Init() override {
+ bool Init(size_t new_size) override {
EXPECT_FALSE(initialized_);
initialized_ = true;
+ new_size_ = new_size;
return true;
}
@@ -53,6 +54,7 @@
const std::vector<ControlEntry>& entries() const { return entries_; }
const std::vector<uint8_t>& diff_stream() const { return diff_stream_; }
const std::vector<uint8_t>& extra_stream() const { return extra_stream_; }
+ size_t new_size() const { return new_size_; }
private:
// The list of ControlEntry passed to this class.
@@ -60,6 +62,9 @@
std::vector<uint8_t> diff_stream_;
std::vector<uint8_t> extra_stream_;
+ // The size of the new file for the patch we are writing.
+ size_t new_size_{0};
+
// Whether this class was initialized.
bool initialized_{false};
diff --git a/include/bsdiff/patch_writer_interface.h b/include/bsdiff/patch_writer_interface.h
index ce805fe..0f9adfb 100644
--- a/include/bsdiff/patch_writer_interface.h
+++ b/include/bsdiff/patch_writer_interface.h
@@ -16,8 +16,9 @@
public:
virtual ~PatchWriterInterface() = default;
- // Initialize the patch writer.
- virtual bool Init() = 0;
+ // Initialize the patch writer for a patch where the new file will have
+ // |new_size| bytes.
+ virtual bool Init(size_t new_size) = 0;
// Write the passed |data| buffer of length |size| to the Diff or Extra
// streams respectively. Each method can be called independently from each
diff --git a/patch_writer.cc b/patch_writer.cc
index e6eb8ad..06f11ad 100644
--- a/patch_writer.cc
+++ b/patch_writer.cc
@@ -27,7 +27,7 @@
namespace bsdiff {
-bool BsdiffPatchWriter::Init() {
+bool BsdiffPatchWriter::Init(size_t /* new_size */) {
fp_ = fopen(patch_filename_.c_str(), "w");
if (!fp_) {
LOG(ERROR) << "Opening " << patch_filename_ << endl;
diff --git a/patch_writer.h b/patch_writer.h
index de4c239..83da26c 100644
--- a/patch_writer.h
+++ b/patch_writer.h
@@ -23,7 +23,7 @@
: patch_filename_(patch_filename) {}
// PatchWriterInterface overrides.
- bool Init() override;
+ bool Init(size_t new_size) override;
bool WriteDiffStream(const uint8_t* data, size_t size) override;
bool WriteExtraStream(const uint8_t* data, size_t size) override;
bool AddControlEntry(const ControlEntry& entry) override;
diff --git a/patch_writer_unittest.cc b/patch_writer_unittest.cc
index 99d8f34..ab20388 100644
--- a/patch_writer_unittest.cc
+++ b/patch_writer_unittest.cc
@@ -46,7 +46,11 @@
class BsdiffPatchWriterTest : public testing::Test {
protected:
- void SetUp() override { EXPECT_TRUE(patch_writer_.Init()); }
+ void SetUp() override {
+ // This patch writer doesn't use the |new_size| value passed on init, so we
+ // don't pass any meaningful value.
+ EXPECT_TRUE(patch_writer_.Init(0));
+ }
test_utils::ScopedTempFile patch_file_{"bsdiff_newfile.XXXXXX"};
BsdiffPatchWriter patch_writer_{patch_file_.filename()};
diff --git a/split_patch_writer.cc b/split_patch_writer.cc
index 4123440..3e0d6e7 100644
--- a/split_patch_writer.cc
+++ b/split_patch_writer.cc
@@ -12,12 +12,24 @@
namespace bsdiff {
-bool SplitPatchWriter::Init() {
+bool SplitPatchWriter::Init(size_t new_size) {
+ new_size_ = new_size;
// Fail gracefully if re-initialized.
if (current_patch_ || patches_.empty())
return false;
- return patches_[0]->Init();
+ size_t expected_patches = (new_size_ + new_chunk_size_ - 1) / new_chunk_size_;
+ if (expected_patches == 0)
+ expected_patches = 1;
+ if (expected_patches != patches_.size()) {
+ LOG(ERROR) << "Expected " << expected_patches << " for a new file of size "
+ << new_size_ << " split in chunks of " << new_chunk_size_
+ << " but got " << patches_.size() << " instead." << endl;
+ return false;
+ }
+
+ return patches_[0]->Init(
+ std::min(static_cast<uint64_t>(new_size_), new_chunk_size_));
}
bool SplitPatchWriter::WriteDiffStream(const uint8_t* data, size_t size) {
@@ -61,7 +73,8 @@
LOG(ERROR) << "Writing past the last patch" << endl;
return false;
}
- if (!patches_[current_patch_]->Init()) {
+ if (!patches_[current_patch_]->Init(std::min(
+ new_size_ - current_patch_ * new_chunk_size_, new_chunk_size_))) {
LOG(ERROR) << "Failed to initialize patch " << current_patch_ << endl;
return false;
}
diff --git a/split_patch_writer.h b/split_patch_writer.h
index 5aaabce..395f999 100644
--- a/split_patch_writer.h
+++ b/split_patch_writer.h
@@ -33,7 +33,7 @@
// corresponding AddControlEntry() is not supported and will fail. The reason
// for this is because which underlying patch takes the bytes depends on the
// control entries.
- bool Init() override;
+ bool Init(size_t new_size) override;
bool WriteDiffStream(const uint8_t* data, size_t size) override;
bool WriteExtraStream(const uint8_t* data, size_t size) override;
bool AddControlEntry(const ControlEntry& entry) override;
@@ -53,6 +53,9 @@
const uint8_t* data,
size_t size);
+ // The size of the new file for the patch we are writing.
+ size_t new_size_{0};
+
// The size of each chunk of the new file written to.
uint64_t new_chunk_size_;
std::vector<PatchWriterInterface*> patches_;
diff --git a/split_patch_writer_unittest.cc b/split_patch_writer_unittest.cc
index 1170db9..a37bcdc 100644
--- a/split_patch_writer_unittest.cc
+++ b/split_patch_writer_unittest.cc
@@ -16,23 +16,33 @@
class SplitPatchWriterTest : public testing::Test {
protected:
- void SetUpForSize(size_t num_chunks, uint64_t new_chunk_size) {
+ void SetUpForSize(size_t num_chunks,
+ uint64_t new_chunk_size,
+ size_t new_size) {
fake_patches_.resize(num_chunks);
std::vector<PatchWriterInterface*> patches;
for (auto& fake_patch : fake_patches_)
patches.push_back(&fake_patch);
patch_writer_.reset(new SplitPatchWriter(new_chunk_size, patches));
- EXPECT_TRUE(patch_writer_->Init());
+ EXPECT_TRUE(patch_writer_->Init(new_size));
}
std::vector<FakePatchWriter> fake_patches_;
std::unique_ptr<SplitPatchWriter> patch_writer_;
};
+TEST_F(SplitPatchWriterTest, InvalidNumberOfPatchesForSizeTest) {
+ FakePatchWriter p;
+ std::vector<PatchWriterInterface*> patches = {&p};
+ patch_writer_.reset(new SplitPatchWriter(10, patches));
+ // We should have pass two patches.
+ EXPECT_FALSE(patch_writer_->Init(15));
+}
+
// A single empty patch is allowed.
TEST_F(SplitPatchWriterTest, NonSplitEmptyPatchTest) {
- SetUpForSize(1, 100);
+ SetUpForSize(1, 100, 0);
EXPECT_TRUE(patch_writer_->Close());
EXPECT_TRUE(fake_patches_[0].entries().empty());
@@ -40,13 +50,13 @@
// Leaving patches at the end that are empty is considered an error.
TEST_F(SplitPatchWriterTest, NotAllPatchesWrittenErrorTest) {
- SetUpForSize(2, 100);
+ SetUpForSize(2, 100, 200);
// We write less than the amount needed for two patches, which should fail.
EXPECT_FALSE(patch_writer_->Close());
}
TEST_F(SplitPatchWriterTest, MissingDiffBytesErrorTest) {
- SetUpForSize(2, 10);
+ SetUpForSize(2, 10, 20);
EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(15, 5, 0)));
std::vector<uint8_t> zeros(20, 0);
@@ -58,7 +68,7 @@
}
TEST_F(SplitPatchWriterTest, MissingExtraBytesErrorTest) {
- SetUpForSize(2, 10);
+ SetUpForSize(2, 10, 20);
std::vector<uint8_t> zeros(20, 0);
EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(5, 15, -5)));
@@ -72,7 +82,7 @@
// Test all sort of corner cases when splitting the ControlEntry across multiple
// patches
TEST_F(SplitPatchWriterTest, SplitControlAcrossSeveralPatchesTest) {
- SetUpForSize(4, 10);
+ SetUpForSize(4, 10, 40);
// The middle control entry would be split in tree different patches.
EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(5, 1, -5)));
EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(4, 0, -4)));
@@ -116,6 +126,10 @@
ControlEntry(4, 0, 0), // (4, 0, -5) ignoring the offset.
}),
fake_patches_[3].entries());
+
+ for (size_t i = 0; i < fake_patches_.size(); ++i) {
+ EXPECT_EQ(10U, fake_patches_[i].new_size()) << "where i = " << i;
+ }
}
TEST_F(SplitPatchWriterTest, WriteStreamsAfterControlAcrossPatchesTest) {
@@ -123,7 +137,7 @@
for (size_t i = 0; i < numbers.size(); ++i)
numbers[i] = 'A' + i;
- SetUpForSize(4, 10);
+ SetUpForSize(4, 10, 40);
// The sequence is 15 diff, 10 extra, 15 diff.
EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(15, 10, 0)));
EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(15, 0, 0)));