blob: e991d4a033d162fc474800f36fe200ff94d08b27 [file] [log] [blame]
Alex Deymo710b3da2017-10-26 13:13:28 +02001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef _BSDIFF_ENDSLEY_PATCH_WRITER_H_
6#define _BSDIFF_ENDSLEY_PATCH_WRITER_H_
7
Alex Deymo19fc5752018-02-15 16:56:39 +01008#include <memory>
Alex Deymo710b3da2017-10-26 13:13:28 +02009#include <string>
10#include <vector>
11
Alex Deymo19fc5752018-02-15 16:56:39 +010012#include "bsdiff/compressor_interface.h"
13#include "bsdiff/constants.h"
Alex Deymo710b3da2017-10-26 13:13:28 +020014#include "bsdiff/patch_writer_interface.h"
15
16namespace bsdiff {
17
18// A PatchWriterInterface class compatible with the format used by Android Play
19// Store's bsdiff implementation, which is based on Matthew Endsley's bsdiff
20// implementation. See https://github.com/mendsley/bsdiff for the original
21// implementation of this format. See also Google's APK patch size estimator for
22// more information on the file-by-file format used by Play Store:
23// https://github.com/googlesamples/apk-patch-size-estimator
24
25// This format, identified by the "ENDSLEY/BSDIFF43" magic string, uses a single
26// stream with the control entries, diff data and extra data interleaved. After
27// the header, each Control Entry is stored in 24 bytes followed by the diff
28// stream data for that entry only, and then followed by the extra stream data
29// for that entry only. The format doesn't handle the compression of the data,
30// instead, the whole file (including the magic string) is compressed with any
31// compression algorithm.
32
33// This format is easier to parse and allows the patch to be streamed, but by
34// mixing the diff and extra data into the same compression context offers a
35// slightly worse compression ratio (about 3.5% compared to upstream's format).
36
37class EndsleyPatchWriter : public PatchWriterInterface {
38 public:
39 // Create the patch writer that will write the data to the passed vector
40 // |patch|, resizing it as needed. The |patch| vector must be valid until
Alex Deymo19fc5752018-02-15 16:56:39 +010041 // Close() is called or this patch is destroyed. The data in |patch| will be
42 // compressed using the compressor type |type|.
43 EndsleyPatchWriter(std::vector<uint8_t>* patch,
44 CompressorType type,
Tianjie Xu2e70b552018-03-02 16:22:10 -080045 int brotli_quality)
46 : patch_(patch),
47 compressor_type_(type),
48 brotli_quality_(brotli_quality) {}
Alex Deymo710b3da2017-10-26 13:13:28 +020049
50 // PatchWriterInterface overrides.
51 bool Init(size_t new_size) override;
52 bool WriteDiffStream(const uint8_t* data, size_t size) override;
53 bool WriteExtraStream(const uint8_t* data, size_t size) override;
54 bool AddControlEntry(const ControlEntry& entry) override;
55 bool Close() override;
56
57 private:
58 // Emit at the end of the |patch_| vector the passed control entry.
59 void EmitControlEntry(const ControlEntry& entry);
60
61 // Emit at the end of the |patch_| vector the passed buffer.
62 void EmitBuffer(const uint8_t* data, size_t size);
63
64 // Flush as much as possible of the pending data.
65 void Flush();
66
67 // The vector we are writing to, owned by the caller.
68 std::vector<uint8_t>* patch_;
69
Alex Deymo19fc5752018-02-15 16:56:39 +010070 // The compressor type to use and its quality (if any).
71 CompressorType compressor_type_;
Tianjie Xu2e70b552018-03-02 16:22:10 -080072 int brotli_quality_;
Alex Deymo19fc5752018-02-15 16:56:39 +010073
74 std::unique_ptr<CompressorInterface> compressor_;
75
Alex Deymo710b3da2017-10-26 13:13:28 +020076 // The pending diff and extra data to be encoded in the file. These vectors
77 // would not be used whenever is possible to the data directly to the patch_
78 // vector; namely when the control, diff and extra stream data are provided in
79 // that order for each control entry.
80 std::vector<uint8_t> diff_data_;
81 std::vector<uint8_t> extra_data_;
82 std::vector<ControlEntry> control_;
83
84 // Defined as the sum of all the diff_size and extra_size values in
85 // |control_|. This is used to determine whether it is worth Flushing the
86 // pending data.
87 size_t pending_control_data_{0};
88
89 // Number of bytes in the diff and extra stream that are pending in the
90 // last control entry encoded in the |patch_|. If both are zero the last
91 // control entry was completely emitted.
92 size_t pending_diff_{0};
93 size_t pending_extra_{0};
94};
95
96} // namespace bsdiff
97
98#endif // _BSDIFF_ENDSLEY_PATCH_WRITER_H_