blob: 481f3ca5cb95f9562f0b75b20da9e22563494108 [file] [log] [blame]
openvcdiff311c7142008-08-26 19:29:25 +00001// Copyright 2007 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// Classes to implement an Encoder for the format described in
17// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19//
20// The RFC describes the possibility of using a secondary compressor
21// to further reduce the size of each section of the VCDIFF output.
22// That feature is not supported in this implementation of the encoder
23// and decoder.
24// No secondary compressor types have been publicly registered with
25// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26// in the more than five years since the registry was created, so there
27// is no standard set of compressor IDs which would be generated by other
28// encoders or accepted by other decoders.
29
30#include <config.h>
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +000031#include <memory> // auto_ptr
openvcdiff311c7142008-08-26 19:29:25 +000032#include "checksum.h"
33#include "encodetable.h"
openvcdiff311c7142008-08-26 19:29:25 +000034#include "google/output_string.h"
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +000035#include "google/vcencoder.h"
36#include "jsonwriter.h"
37#include "logging.h"
openvcdiff311c7142008-08-26 19:29:25 +000038#include "vcdiffengine.h"
39
40namespace open_vcdiff {
41
42HashedDictionary::HashedDictionary(const char* dictionary_contents,
43 size_t dictionary_size)
44 : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }
45
46HashedDictionary::~HashedDictionary() { delete engine_; }
47
48bool HashedDictionary::Init() {
49 return const_cast<VCDiffEngine*>(engine_)->Init();
50}
51
52class VCDiffStreamingEncoderImpl {
53 public:
54 VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
55 VCDiffFormatExtensionFlags format_extensions,
56 bool look_for_target_matches);
57
58 // These functions are identical to their counterparts
59 // in VCDiffStreamingEncoder.
60 bool StartEncoding(OutputStringInterface* out);
61
62 bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);
63
64 bool FinishEncoding(OutputStringInterface* out);
65
openvcdiff311c7142008-08-26 19:29:25 +000066 private:
openvcdiff311c7142008-08-26 19:29:25 +000067 const VCDiffEngine* engine_;
68
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +000069 std::auto_ptr<CodeTableWriterInterface> coder_;
openvcdiff311c7142008-08-26 19:29:25 +000070
71 const VCDiffFormatExtensionFlags format_extensions_;
72
73 // Determines whether to look for matches within the previously encoded
74 // target data, or just within the source (dictionary) data. Please see
75 // vcencoder.h for a full explanation of this parameter.
76 const bool look_for_target_matches_;
77
78 // This state variable is used to ensure that StartEncoding(), EncodeChunk(),
79 // and FinishEncoding() are called in the correct order. It will be true
80 // if StartEncoding() has been called, followed by zero or more calls to
81 // EncodeChunk(), but FinishEncoding() has not yet been called. It will
82 // be false initially, and also after FinishEncoding() has been called.
83 bool encode_chunk_allowed_;
84
85 // Making these private avoids implicit copy constructor & assignment operator
86 VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&); // NOLINT
87 void operator=(const VCDiffStreamingEncoderImpl&);
88};
89
90inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
91 const HashedDictionary* dictionary,
92 VCDiffFormatExtensionFlags format_extensions,
93 bool look_for_target_matches)
94 : engine_(dictionary->engine()),
openvcdiff311c7142008-08-26 19:29:25 +000095 format_extensions_(format_extensions),
96 look_for_target_matches_(look_for_target_matches),
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +000097 encode_chunk_allowed_(false) {
98 if (format_extensions & VCD_FORMAT_JSON) {
99 coder_.reset(new JSONCodeTableWriter());
100 } else {
101 // This implementation of the encoder uses the default
102 // code table. A VCDiffCodeTableWriter could also be constructed
103 // using a custom code table.
104 coder_.reset(new VCDiffCodeTableWriter(
105 (format_extensions & VCD_FORMAT_INTERLEAVED) != 0));
openvcdiff311c7142008-08-26 19:29:25 +0000106 }
openvcdiff311c7142008-08-26 19:29:25 +0000107}
108
109inline bool VCDiffStreamingEncoderImpl::StartEncoding(
110 OutputStringInterface* out) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000111 if (!coder_->Init(engine_->dictionary_size())) {
112 VCD_DFATAL << "Internal error: "
113 "Initialization of code table writer failed" << VCD_ENDL;
openvcdiffd1845782009-03-20 21:56:15 +0000114 return false;
115 }
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000116 coder_->WriteHeader(out, format_extensions_);
openvcdiff311c7142008-08-26 19:29:25 +0000117 encode_chunk_allowed_ = true;
118 return true;
119}
120
121inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
122 const char* data,
123 size_t len,
124 OutputStringInterface* out) {
125 if (!encode_chunk_allowed_) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000126 VCD_ERROR << "EncodeChunk called before StartEncoding" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000127 return false;
128 }
129 if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000130 coder_->AddChecksum(ComputeAdler32(data, len));
openvcdiff311c7142008-08-26 19:29:25 +0000131 }
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000132 engine_->Encode(data, len, look_for_target_matches_, out, coder_.get());
openvcdiff311c7142008-08-26 19:29:25 +0000133 return true;
134}
135
136inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000137 OutputStringInterface* out) {
openvcdiff311c7142008-08-26 19:29:25 +0000138 if (!encode_chunk_allowed_) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000139 VCD_ERROR << "FinishEncoding called before StartEncoding" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000140 return false;
141 }
142 encode_chunk_allowed_ = false;
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000143 coder_->FinishEncoding(out);
openvcdiff311c7142008-08-26 19:29:25 +0000144 return true;
145}
146
147VCDiffStreamingEncoder::VCDiffStreamingEncoder(
148 const HashedDictionary* dictionary,
149 VCDiffFormatExtensionFlags format_extensions,
150 bool look_for_target_matches)
151 : impl_(new VCDiffStreamingEncoderImpl(dictionary,
152 format_extensions,
153 look_for_target_matches)) { }
154
155VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }
156
157bool VCDiffStreamingEncoder::StartEncodingToInterface(
158 OutputStringInterface* out) {
159 return impl_->StartEncoding(out);
160}
161
162bool VCDiffStreamingEncoder::EncodeChunkToInterface(
163 const char* data,
164 size_t len,
165 OutputStringInterface* out) {
166 return impl_->EncodeChunk(data, len, out);
167}
168
169bool VCDiffStreamingEncoder::FinishEncodingToInterface(
170 OutputStringInterface* out) {
171 return impl_->FinishEncoding(out);
172}
173
openvcdiff311c7142008-08-26 19:29:25 +0000174bool VCDiffEncoder::EncodeToInterface(const char* target_data,
175 size_t target_len,
176 OutputStringInterface* out) {
177 out->clear();
178 if (!encoder_) {
179 if (!dictionary_.Init()) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000180 VCD_ERROR << "Error initializing HashedDictionary" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000181 return false;
182 }
183 encoder_ = new VCDiffStreamingEncoder(&dictionary_,
184 flags_,
185 look_for_target_matches_);
186 }
187 if (!encoder_->StartEncodingToInterface(out)) {
188 return false;
189 }
190 if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
191 return false;
192 }
193 return encoder_->FinishEncodingToInterface(out);
194}
195
196} // namespace open_vcdiff