| /* |
| * Copyright (C) 2017 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "zlib-utils.h" |
| |
| #include <memory> |
| |
| #include "util/base/logging.h" |
| #include "util/flatbuffers.h" |
| |
| namespace libtextclassifier2 { |
| |
| std::unique_ptr<ZlibDecompressor> ZlibDecompressor::Instance() { |
| std::unique_ptr<ZlibDecompressor> result(new ZlibDecompressor()); |
| if (!result->initialized_) { |
| result.reset(); |
| } |
| return result; |
| } |
| |
| ZlibDecompressor::ZlibDecompressor() { |
| memset(&stream_, 0, sizeof(stream_)); |
| stream_.zalloc = Z_NULL; |
| stream_.zfree = Z_NULL; |
| initialized_ = (inflateInit(&stream_) == Z_OK); |
| } |
| |
| ZlibDecompressor::~ZlibDecompressor() { |
| if (initialized_) { |
| inflateEnd(&stream_); |
| } |
| } |
| |
| bool ZlibDecompressor::Decompress(const CompressedBuffer* compressed_buffer, |
| std::string* out) { |
| out->resize(compressed_buffer->uncompressed_size()); |
| stream_.next_in = |
| reinterpret_cast<const Bytef*>(compressed_buffer->buffer()->Data()); |
| stream_.avail_in = compressed_buffer->buffer()->Length(); |
| stream_.next_out = reinterpret_cast<Bytef*>(const_cast<char*>(out->c_str())); |
| stream_.avail_out = compressed_buffer->uncompressed_size(); |
| return (inflate(&stream_, Z_SYNC_FLUSH) == Z_OK); |
| } |
| |
| std::unique_ptr<ZlibCompressor> ZlibCompressor::Instance() { |
| std::unique_ptr<ZlibCompressor> result(new ZlibCompressor()); |
| if (!result->initialized_) { |
| result.reset(); |
| } |
| return result; |
| } |
| |
| ZlibCompressor::ZlibCompressor(int level, int tmp_buffer_size) { |
| memset(&stream_, 0, sizeof(stream_)); |
| stream_.zalloc = Z_NULL; |
| stream_.zfree = Z_NULL; |
| buffer_size_ = tmp_buffer_size; |
| buffer_.reset(new Bytef[buffer_size_]); |
| initialized_ = (deflateInit(&stream_, level) == Z_OK); |
| } |
| |
| ZlibCompressor::~ZlibCompressor() { deflateEnd(&stream_); } |
| |
| void ZlibCompressor::Compress(const std::string& uncompressed_content, |
| CompressedBufferT* out) { |
| out->uncompressed_size = uncompressed_content.size(); |
| out->buffer.clear(); |
| stream_.next_in = |
| reinterpret_cast<const Bytef*>(uncompressed_content.c_str()); |
| stream_.avail_in = uncompressed_content.size(); |
| stream_.next_out = buffer_.get(); |
| stream_.avail_out = buffer_size_; |
| unsigned char* buffer_deflate_start_position = |
| reinterpret_cast<unsigned char*>(buffer_.get()); |
| int status; |
| do { |
| // Deflate chunk-wise. |
| // Z_SYNC_FLUSH causes all pending output to be flushed, but doesn't |
| // reset the compression state. |
| // As we do not know how big the compressed buffer will be, we compress |
| // chunk wise and append the flushed content to the output string buffer. |
| // As we store the uncompressed size, we do not have to do this during |
| // decompression. |
| status = deflate(&stream_, Z_SYNC_FLUSH); |
| unsigned char* buffer_deflate_end_position = |
| reinterpret_cast<unsigned char*>(stream_.next_out); |
| if (buffer_deflate_end_position != buffer_deflate_start_position) { |
| out->buffer.insert(out->buffer.end(), buffer_deflate_start_position, |
| buffer_deflate_end_position); |
| stream_.next_out = buffer_deflate_start_position; |
| stream_.avail_out = buffer_size_; |
| } else { |
| break; |
| } |
| } while (status == Z_OK); |
| } |
| |
| // Compress rule fields in the model. |
| bool CompressModel(ModelT* model) { |
| std::unique_ptr<ZlibCompressor> zlib_compressor = ZlibCompressor::Instance(); |
| if (!zlib_compressor) { |
| TC_LOG(ERROR) << "Cannot compress model."; |
| return false; |
| } |
| |
| // Compress regex rules. |
| if (model->regex_model != nullptr) { |
| for (int i = 0; i < model->regex_model->patterns.size(); i++) { |
| RegexModel_::PatternT* pattern = model->regex_model->patterns[i].get(); |
| pattern->compressed_pattern.reset(new CompressedBufferT); |
| zlib_compressor->Compress(pattern->pattern, |
| pattern->compressed_pattern.get()); |
| pattern->pattern.clear(); |
| } |
| } |
| |
| // Compress date-time rules. |
| if (model->datetime_model != nullptr) { |
| for (int i = 0; i < model->datetime_model->patterns.size(); i++) { |
| DatetimeModelPatternT* pattern = model->datetime_model->patterns[i].get(); |
| for (int j = 0; j < pattern->regexes.size(); j++) { |
| DatetimeModelPattern_::RegexT* regex = pattern->regexes[j].get(); |
| regex->compressed_pattern.reset(new CompressedBufferT); |
| zlib_compressor->Compress(regex->pattern, |
| regex->compressed_pattern.get()); |
| regex->pattern.clear(); |
| } |
| } |
| for (int i = 0; i < model->datetime_model->extractors.size(); i++) { |
| DatetimeModelExtractorT* extractor = |
| model->datetime_model->extractors[i].get(); |
| extractor->compressed_pattern.reset(new CompressedBufferT); |
| zlib_compressor->Compress(extractor->pattern, |
| extractor->compressed_pattern.get()); |
| extractor->pattern.clear(); |
| } |
| } |
| return true; |
| } |
| |
| namespace { |
| |
| bool DecompressBuffer(const CompressedBufferT* compressed_pattern, |
| ZlibDecompressor* zlib_decompressor, |
| std::string* uncompressed_pattern) { |
| std::string packed_pattern = |
| PackFlatbuffer<CompressedBuffer>(compressed_pattern); |
| if (!zlib_decompressor->Decompress( |
| LoadAndVerifyFlatbuffer<CompressedBuffer>(packed_pattern), |
| uncompressed_pattern)) { |
| return false; |
| } |
| return true; |
| } |
| |
| } // namespace |
| |
| bool DecompressModel(ModelT* model) { |
| std::unique_ptr<ZlibDecompressor> zlib_decompressor = |
| ZlibDecompressor::Instance(); |
| if (!zlib_decompressor) { |
| TC_LOG(ERROR) << "Cannot initialize decompressor."; |
| return false; |
| } |
| |
| // Decompress regex rules. |
| if (model->regex_model != nullptr) { |
| for (int i = 0; i < model->regex_model->patterns.size(); i++) { |
| RegexModel_::PatternT* pattern = model->regex_model->patterns[i].get(); |
| if (!DecompressBuffer(pattern->compressed_pattern.get(), |
| zlib_decompressor.get(), &pattern->pattern)) { |
| TC_LOG(ERROR) << "Cannot decompress pattern: " << i; |
| return false; |
| } |
| pattern->compressed_pattern.reset(nullptr); |
| } |
| } |
| |
| // Decompress date-time rules. |
| if (model->datetime_model != nullptr) { |
| for (int i = 0; i < model->datetime_model->patterns.size(); i++) { |
| DatetimeModelPatternT* pattern = model->datetime_model->patterns[i].get(); |
| for (int j = 0; j < pattern->regexes.size(); j++) { |
| DatetimeModelPattern_::RegexT* regex = pattern->regexes[j].get(); |
| if (!DecompressBuffer(regex->compressed_pattern.get(), |
| zlib_decompressor.get(), ®ex->pattern)) { |
| TC_LOG(ERROR) << "Cannot decompress pattern: " << i << " " << j; |
| return false; |
| } |
| regex->compressed_pattern.reset(nullptr); |
| } |
| } |
| for (int i = 0; i < model->datetime_model->extractors.size(); i++) { |
| DatetimeModelExtractorT* extractor = |
| model->datetime_model->extractors[i].get(); |
| if (!DecompressBuffer(extractor->compressed_pattern.get(), |
| zlib_decompressor.get(), &extractor->pattern)) { |
| TC_LOG(ERROR) << "Cannot decompress pattern: " << i; |
| return false; |
| } |
| extractor->compressed_pattern.reset(nullptr); |
| } |
| } |
| return true; |
| } |
| |
| std::string CompressSerializedModel(const std::string& model) { |
| std::unique_ptr<ModelT> unpacked_model = UnPackModel(model.c_str()); |
| TC_CHECK(unpacked_model != nullptr); |
| TC_CHECK(CompressModel(unpacked_model.get())); |
| flatbuffers::FlatBufferBuilder builder; |
| FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get())); |
| return std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()), |
| builder.GetSize()); |
| } |
| |
| std::unique_ptr<UniLib::RegexPattern> UncompressMakeRegexPattern( |
| const UniLib& unilib, const flatbuffers::String* uncompressed_pattern, |
| const CompressedBuffer* compressed_pattern, ZlibDecompressor* decompressor, |
| std::string* result_pattern_text) { |
| UnicodeText unicode_regex_pattern; |
| std::string decompressed_pattern; |
| if (compressed_pattern != nullptr && |
| compressed_pattern->buffer() != nullptr) { |
| if (decompressor == nullptr || |
| !decompressor->Decompress(compressed_pattern, &decompressed_pattern)) { |
| TC_LOG(ERROR) << "Cannot decompress pattern."; |
| return nullptr; |
| } |
| unicode_regex_pattern = |
| UTF8ToUnicodeText(decompressed_pattern.data(), |
| decompressed_pattern.size(), /*do_copy=*/false); |
| } else { |
| if (uncompressed_pattern == nullptr) { |
| TC_LOG(ERROR) << "Cannot load uncompressed pattern."; |
| return nullptr; |
| } |
| unicode_regex_pattern = |
| UTF8ToUnicodeText(uncompressed_pattern->c_str(), |
| uncompressed_pattern->Length(), /*do_copy=*/false); |
| } |
| |
| if (result_pattern_text != nullptr) { |
| *result_pattern_text = unicode_regex_pattern.ToUTF8String(); |
| } |
| |
| std::unique_ptr<UniLib::RegexPattern> regex_pattern = |
| unilib.CreateRegexPattern(unicode_regex_pattern); |
| if (!regex_pattern) { |
| TC_LOG(ERROR) << "Could not create pattern: " |
| << unicode_regex_pattern.ToUTF8String(); |
| } |
| return regex_pattern; |
| } |
| |
| } // namespace libtextclassifier2 |