Lukas Zilka | e7962cc | 2018-03-28 18:09:48 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | // Functions to compress and decompress low entropy entries in the model. |
| 18 | |
| 19 | #ifndef LIBTEXTCLASSIFIER_ZLIB_UTILS_H_ |
| 20 | #define LIBTEXTCLASSIFIER_ZLIB_UTILS_H_ |
| 21 | |
| 22 | #include <memory> |
| 23 | |
| 24 | #include "model_generated.h" |
| 25 | #include "util/utf8/unilib.h" |
| 26 | #include "zlib.h" |
| 27 | |
| 28 | namespace libtextclassifier2 { |
| 29 | |
| 30 | class ZlibDecompressor { |
| 31 | public: |
| 32 | static std::unique_ptr<ZlibDecompressor> Instance(); |
| 33 | ~ZlibDecompressor(); |
| 34 | |
| 35 | bool Decompress(const CompressedBuffer* compressed_buffer, std::string* out); |
| 36 | |
| 37 | private: |
| 38 | ZlibDecompressor(); |
| 39 | z_stream stream_; |
| 40 | bool initialized_; |
| 41 | }; |
| 42 | |
| 43 | class ZlibCompressor { |
| 44 | public: |
| 45 | static std::unique_ptr<ZlibCompressor> Instance(); |
| 46 | ~ZlibCompressor(); |
| 47 | |
| 48 | void Compress(const std::string& uncompressed_content, |
| 49 | CompressedBufferT* out); |
| 50 | |
| 51 | private: |
| 52 | explicit ZlibCompressor(int level = Z_BEST_COMPRESSION, |
| 53 | // Tmp. buffer size was set based on the current set |
| 54 | // of patterns to be compressed. |
| 55 | int tmp_buffer_size = 64 * 1024); |
| 56 | z_stream stream_; |
| 57 | std::unique_ptr<Bytef[]> buffer_; |
| 58 | unsigned int buffer_size_; |
| 59 | bool initialized_; |
| 60 | }; |
| 61 | |
| 62 | // Compresses regex and datetime rules in the model in place. |
| 63 | bool CompressModel(ModelT* model); |
| 64 | |
Lukas Zilka | 434442d | 2018-04-25 11:38:51 +0200 | [diff] [blame] | 65 | // Decompresses regex and datetime rules in the model in place. |
| 66 | bool DecompressModel(ModelT* model); |
| 67 | |
Lukas Zilka | e7962cc | 2018-03-28 18:09:48 +0200 | [diff] [blame] | 68 | // Compresses regex and datetime rules in the model. |
| 69 | std::string CompressSerializedModel(const std::string& model); |
| 70 | |
| 71 | // Create and compile a regex pattern from optionally compressed pattern. |
| 72 | std::unique_ptr<UniLib::RegexPattern> UncompressMakeRegexPattern( |
| 73 | const UniLib& unilib, const flatbuffers::String* uncompressed_pattern, |
Lukas Zilka | 434442d | 2018-04-25 11:38:51 +0200 | [diff] [blame] | 74 | const CompressedBuffer* compressed_pattern, ZlibDecompressor* decompressor, |
| 75 | std::string* result_pattern_text = nullptr); |
Lukas Zilka | e7962cc | 2018-03-28 18:09:48 +0200 | [diff] [blame] | 76 | |
| 77 | } // namespace libtextclassifier2 |
| 78 | |
| 79 | #endif // LIBTEXTCLASSIFIER_ZLIB_UTILS_H_ |