blob: 7e6646fad63e51da4c0b5c537bacec84cec4729e [file] [log] [blame]
Lukas Zilkae7962cc2018-03-28 18:09:48 +02001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "zlib-utils.h"
18
19#include <memory>
20
21#include "util/base/logging.h"
Lukas Zilka434442d2018-04-25 11:38:51 +020022#include "util/flatbuffers.h"
Lukas Zilkae7962cc2018-03-28 18:09:48 +020023
24namespace libtextclassifier2 {
25
26std::unique_ptr<ZlibDecompressor> ZlibDecompressor::Instance() {
27 std::unique_ptr<ZlibDecompressor> result(new ZlibDecompressor());
28 if (!result->initialized_) {
29 result.reset();
30 }
31 return result;
32}
33
34ZlibDecompressor::ZlibDecompressor() {
35 memset(&stream_, 0, sizeof(stream_));
36 stream_.zalloc = Z_NULL;
37 stream_.zfree = Z_NULL;
38 initialized_ = (inflateInit(&stream_) == Z_OK);
39}
40
41ZlibDecompressor::~ZlibDecompressor() {
42 if (initialized_) {
43 inflateEnd(&stream_);
44 }
45}
46
47bool ZlibDecompressor::Decompress(const CompressedBuffer* compressed_buffer,
48 std::string* out) {
49 out->resize(compressed_buffer->uncompressed_size());
50 stream_.next_in =
51 reinterpret_cast<const Bytef*>(compressed_buffer->buffer()->Data());
52 stream_.avail_in = compressed_buffer->buffer()->Length();
53 stream_.next_out = reinterpret_cast<Bytef*>(const_cast<char*>(out->c_str()));
54 stream_.avail_out = compressed_buffer->uncompressed_size();
55 return (inflate(&stream_, Z_SYNC_FLUSH) == Z_OK);
56}
57
58std::unique_ptr<ZlibCompressor> ZlibCompressor::Instance() {
59 std::unique_ptr<ZlibCompressor> result(new ZlibCompressor());
60 if (!result->initialized_) {
61 result.reset();
62 }
63 return result;
64}
65
66ZlibCompressor::ZlibCompressor(int level, int tmp_buffer_size) {
67 memset(&stream_, 0, sizeof(stream_));
68 stream_.zalloc = Z_NULL;
69 stream_.zfree = Z_NULL;
70 buffer_size_ = tmp_buffer_size;
71 buffer_.reset(new Bytef[buffer_size_]);
72 initialized_ = (deflateInit(&stream_, level) == Z_OK);
73}
74
75ZlibCompressor::~ZlibCompressor() { deflateEnd(&stream_); }
76
77void ZlibCompressor::Compress(const std::string& uncompressed_content,
78 CompressedBufferT* out) {
79 out->uncompressed_size = uncompressed_content.size();
80 out->buffer.clear();
81 stream_.next_in =
82 reinterpret_cast<const Bytef*>(uncompressed_content.c_str());
83 stream_.avail_in = uncompressed_content.size();
84 stream_.next_out = buffer_.get();
85 stream_.avail_out = buffer_size_;
86 unsigned char* buffer_deflate_start_position =
87 reinterpret_cast<unsigned char*>(buffer_.get());
88 int status;
89 do {
90 // Deflate chunk-wise.
91 // Z_SYNC_FLUSH causes all pending output to be flushed, but doesn't
92 // reset the compression state.
93 // As we do not know how big the compressed buffer will be, we compress
94 // chunk wise and append the flushed content to the output string buffer.
95 // As we store the uncompressed size, we do not have to do this during
96 // decompression.
97 status = deflate(&stream_, Z_SYNC_FLUSH);
98 unsigned char* buffer_deflate_end_position =
99 reinterpret_cast<unsigned char*>(stream_.next_out);
100 if (buffer_deflate_end_position != buffer_deflate_start_position) {
101 out->buffer.insert(out->buffer.end(), buffer_deflate_start_position,
102 buffer_deflate_end_position);
103 stream_.next_out = buffer_deflate_start_position;
104 stream_.avail_out = buffer_size_;
105 } else {
106 break;
107 }
108 } while (status == Z_OK);
109}
110
111// Compress rule fields in the model.
112bool CompressModel(ModelT* model) {
113 std::unique_ptr<ZlibCompressor> zlib_compressor = ZlibCompressor::Instance();
114 if (!zlib_compressor) {
115 TC_LOG(ERROR) << "Cannot compress model.";
116 return false;
117 }
118
119 // Compress regex rules.
120 if (model->regex_model != nullptr) {
121 for (int i = 0; i < model->regex_model->patterns.size(); i++) {
122 RegexModel_::PatternT* pattern = model->regex_model->patterns[i].get();
123 pattern->compressed_pattern.reset(new CompressedBufferT);
124 zlib_compressor->Compress(pattern->pattern,
125 pattern->compressed_pattern.get());
126 pattern->pattern.clear();
127 }
128 }
129
130 // Compress date-time rules.
131 if (model->datetime_model != nullptr) {
132 for (int i = 0; i < model->datetime_model->patterns.size(); i++) {
133 DatetimeModelPatternT* pattern = model->datetime_model->patterns[i].get();
134 for (int j = 0; j < pattern->regexes.size(); j++) {
135 DatetimeModelPattern_::RegexT* regex = pattern->regexes[j].get();
136 regex->compressed_pattern.reset(new CompressedBufferT);
137 zlib_compressor->Compress(regex->pattern,
138 regex->compressed_pattern.get());
139 regex->pattern.clear();
140 }
141 }
142 for (int i = 0; i < model->datetime_model->extractors.size(); i++) {
143 DatetimeModelExtractorT* extractor =
144 model->datetime_model->extractors[i].get();
145 extractor->compressed_pattern.reset(new CompressedBufferT);
146 zlib_compressor->Compress(extractor->pattern,
147 extractor->compressed_pattern.get());
148 extractor->pattern.clear();
149 }
150 }
151 return true;
152}
153
Lukas Zilka434442d2018-04-25 11:38:51 +0200154namespace {
155
156bool DecompressBuffer(const CompressedBufferT* compressed_pattern,
157 ZlibDecompressor* zlib_decompressor,
158 std::string* uncompressed_pattern) {
159 std::string packed_pattern =
160 PackFlatbuffer<CompressedBuffer>(compressed_pattern);
161 if (!zlib_decompressor->Decompress(
162 LoadAndVerifyFlatbuffer<CompressedBuffer>(packed_pattern),
163 uncompressed_pattern)) {
164 return false;
165 }
166 return true;
167}
168
169} // namespace
170
171bool DecompressModel(ModelT* model) {
172 std::unique_ptr<ZlibDecompressor> zlib_decompressor =
173 ZlibDecompressor::Instance();
174 if (!zlib_decompressor) {
175 TC_LOG(ERROR) << "Cannot initialize decompressor.";
176 return false;
177 }
178
179 // Decompress regex rules.
180 if (model->regex_model != nullptr) {
181 for (int i = 0; i < model->regex_model->patterns.size(); i++) {
182 RegexModel_::PatternT* pattern = model->regex_model->patterns[i].get();
183 if (!DecompressBuffer(pattern->compressed_pattern.get(),
184 zlib_decompressor.get(), &pattern->pattern)) {
185 TC_LOG(ERROR) << "Cannot decompress pattern: " << i;
186 return false;
187 }
188 pattern->compressed_pattern.reset(nullptr);
189 }
190 }
191
192 // Decompress date-time rules.
193 if (model->datetime_model != nullptr) {
194 for (int i = 0; i < model->datetime_model->patterns.size(); i++) {
195 DatetimeModelPatternT* pattern = model->datetime_model->patterns[i].get();
196 for (int j = 0; j < pattern->regexes.size(); j++) {
197 DatetimeModelPattern_::RegexT* regex = pattern->regexes[j].get();
198 if (!DecompressBuffer(regex->compressed_pattern.get(),
199 zlib_decompressor.get(), &regex->pattern)) {
200 TC_LOG(ERROR) << "Cannot decompress pattern: " << i << " " << j;
201 return false;
202 }
203 regex->compressed_pattern.reset(nullptr);
204 }
205 }
206 for (int i = 0; i < model->datetime_model->extractors.size(); i++) {
207 DatetimeModelExtractorT* extractor =
208 model->datetime_model->extractors[i].get();
209 if (!DecompressBuffer(extractor->compressed_pattern.get(),
210 zlib_decompressor.get(), &extractor->pattern)) {
211 TC_LOG(ERROR) << "Cannot decompress pattern: " << i;
212 return false;
213 }
214 extractor->compressed_pattern.reset(nullptr);
215 }
216 }
217 return true;
218}
219
Lukas Zilkae7962cc2018-03-28 18:09:48 +0200220std::string CompressSerializedModel(const std::string& model) {
221 std::unique_ptr<ModelT> unpacked_model = UnPackModel(model.c_str());
222 TC_CHECK(unpacked_model != nullptr);
223 TC_CHECK(CompressModel(unpacked_model.get()));
224 flatbuffers::FlatBufferBuilder builder;
225 FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
226 return std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
227 builder.GetSize());
228}
229
230std::unique_ptr<UniLib::RegexPattern> UncompressMakeRegexPattern(
231 const UniLib& unilib, const flatbuffers::String* uncompressed_pattern,
Lukas Zilka434442d2018-04-25 11:38:51 +0200232 const CompressedBuffer* compressed_pattern, ZlibDecompressor* decompressor,
233 std::string* result_pattern_text) {
Lukas Zilkae7962cc2018-03-28 18:09:48 +0200234 UnicodeText unicode_regex_pattern;
235 std::string decompressed_pattern;
236 if (compressed_pattern != nullptr &&
237 compressed_pattern->buffer() != nullptr) {
238 if (decompressor == nullptr ||
239 !decompressor->Decompress(compressed_pattern, &decompressed_pattern)) {
240 TC_LOG(ERROR) << "Cannot decompress pattern.";
241 return nullptr;
242 }
243 unicode_regex_pattern =
244 UTF8ToUnicodeText(decompressed_pattern.data(),
245 decompressed_pattern.size(), /*do_copy=*/false);
246 } else {
247 if (uncompressed_pattern == nullptr) {
248 TC_LOG(ERROR) << "Cannot load uncompressed pattern.";
249 return nullptr;
250 }
251 unicode_regex_pattern =
252 UTF8ToUnicodeText(uncompressed_pattern->c_str(),
253 uncompressed_pattern->Length(), /*do_copy=*/false);
254 }
255
Lukas Zilka434442d2018-04-25 11:38:51 +0200256 if (result_pattern_text != nullptr) {
257 *result_pattern_text = unicode_regex_pattern.ToUTF8String();
258 }
259
Lukas Zilkae7962cc2018-03-28 18:09:48 +0200260 std::unique_ptr<UniLib::RegexPattern> regex_pattern =
261 unilib.CreateRegexPattern(unicode_regex_pattern);
262 if (!regex_pattern) {
263 TC_LOG(ERROR) << "Could not create pattern: "
264 << unicode_regex_pattern.ToUTF8String();
265 }
266 return regex_pattern;
267}
268
269} // namespace libtextclassifier2