blob: cf4c9cd54ffe8a5778ed314fc11eec750df195e1 [file] [log] [blame]
Reid Spencer460eb632004-10-04 10:49:41 +00001//===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Reid Spencer and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the llvm::Compressor class, an abstraction for memory
11// block compression.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Config/config.h"
16#include "llvm/Support/Compressor.h"
17#include "llvm/ADT/StringExtras.h"
18#include <cassert>
19#include <string>
Reid Spencerf6a0acd2004-11-25 19:38:16 +000020#include "bzip2/bzlib.h"
Reid Spencer460eb632004-10-04 10:49:41 +000021
Reid Spencer460eb632004-10-04 10:49:41 +000022namespace {
23
Reid Spencerf6a0acd2004-11-25 19:38:16 +000024enum CompressionTypes {
25 COMP_TYPE_NONE = '0',
26 COMP_TYPE_BZIP2 = '2',
27};
28
Reid Spencer460eb632004-10-04 10:49:41 +000029inline int getdata(char*& buffer, unsigned& size,
Reid Spencer469c34b2004-10-04 17:29:25 +000030 llvm::Compressor::OutputDataCallback* cb, void* context) {
Reid Spencer460eb632004-10-04 10:49:41 +000031 buffer = 0;
32 size = 0;
Reid Spencer469c34b2004-10-04 17:29:25 +000033 int result = (*cb)(buffer, size, context);
Reid Spencer460eb632004-10-04 10:49:41 +000034 assert(buffer != 0 && "Invalid result from Compressor callback");
35 assert(size != 0 && "Invalid result from Compressor callback");
36 return result;
37}
38
39//===----------------------------------------------------------------------===//
Reid Spencer047c0092004-10-04 17:45:44 +000040//=== NULLCOMP - a compression like set of routines that just copies data
41//=== without doing any compression. This is provided so that if the
42//=== configured environment doesn't have a compression library the
43//=== program can still work, albeit using more data/memory.
Reid Spencer460eb632004-10-04 10:49:41 +000044//===----------------------------------------------------------------------===//
45
Reid Spencer047c0092004-10-04 17:45:44 +000046struct NULLCOMP_stream {
Reid Spencer460eb632004-10-04 10:49:41 +000047 // User provided fields
48 char* next_in;
49 unsigned avail_in;
50 char* next_out;
51 unsigned avail_out;
52
53 // Information fields
54 uint64_t output_count; // Total count of output bytes
Reid Spencer460eb632004-10-04 10:49:41 +000055};
56
Reid Spencer047c0092004-10-04 17:45:44 +000057void NULLCOMP_init(NULLCOMP_stream* s) {
Reid Spencer460eb632004-10-04 10:49:41 +000058 s->output_count = 0;
Reid Spencer460eb632004-10-04 10:49:41 +000059}
60
Reid Spencer047c0092004-10-04 17:45:44 +000061bool NULLCOMP_compress(NULLCOMP_stream* s) {
62 assert(s && "Invalid NULLCOMP_stream");
Reid Spencer460eb632004-10-04 10:49:41 +000063 assert(s->next_in != 0);
64 assert(s->next_out != 0);
65 assert(s->avail_in >= 1);
66 assert(s->avail_out >= 1);
67
Reid Spencer460eb632004-10-04 10:49:41 +000068 if (s->avail_out >= s->avail_in) {
69 ::memcpy(s->next_out, s->next_in, s->avail_in);
70 s->output_count += s->avail_in;
71 s->avail_out -= s->avail_in;
72 s->next_in += s->avail_in;
73 s->avail_in = 0;
74 return true;
75 } else {
76 ::memcpy(s->next_out, s->next_in, s->avail_out);
77 s->output_count += s->avail_out;
78 s->avail_in -= s->avail_out;
79 s->next_in += s->avail_out;
80 s->avail_out = 0;
81 return false;
82 }
83}
84
Reid Spencer047c0092004-10-04 17:45:44 +000085bool NULLCOMP_decompress(NULLCOMP_stream* s) {
86 assert(s && "Invalid NULLCOMP_stream");
Reid Spencer460eb632004-10-04 10:49:41 +000087 assert(s->next_in != 0);
88 assert(s->next_out != 0);
89 assert(s->avail_in >= 1);
90 assert(s->avail_out >= 1);
91
Reid Spencer460eb632004-10-04 10:49:41 +000092 if (s->avail_out >= s->avail_in) {
93 ::memcpy(s->next_out, s->next_in, s->avail_in);
94 s->output_count += s->avail_in;
95 s->avail_out -= s->avail_in;
96 s->next_in += s->avail_in;
97 s->avail_in = 0;
98 return true;
99 } else {
100 ::memcpy(s->next_out, s->next_in, s->avail_out);
101 s->output_count += s->avail_out;
102 s->avail_in -= s->avail_out;
103 s->next_in += s->avail_out;
104 s->avail_out = 0;
105 return false;
106 }
107}
108
Reid Spencer047c0092004-10-04 17:45:44 +0000109void NULLCOMP_end(NULLCOMP_stream* strm) {
Reid Spencer460eb632004-10-04 10:49:41 +0000110}
111
Reid Spencere3c6ad72004-11-14 22:04:46 +0000112/// This structure is only used when a bytecode file is compressed.
113/// As bytecode is being decompressed, the memory buffer might need
114/// to be reallocated. The buffer allocation is handled in a callback
115/// and this structure is needed to retain information across calls
116/// to the callback.
117/// @brief An internal buffer object used for handling decompression
118struct BufferContext {
119 char* buff;
120 unsigned size;
121 BufferContext(unsigned compressedSize ) {
122 // Null to indicate malloc of a new block
123 buff = 0;
124
125 // Compute the initial length of the uncompression buffer. Note that this
126 // is twice the length of the compressed buffer and will be doubled again
127 // in the callback for an initial allocation of 4x compressedSize. This
128 // calculation is based on the typical compression ratio of bzip2 on LLVM
129 // bytecode files which typically ranges in the 50%-75% range. Since we
130 // tyipcally get at least 50%, doubling is insufficient. By using a 4x
131 // multiplier on the first allocation, we minimize the impact of having to
132 // copy the buffer on reallocation.
133 size = compressedSize*2;
134 }
135
136 /// This function handles allocation of the buffer used for decompression of
137 /// compressed bytecode files. It is called by Compressor::decompress which is
138 /// called by BytecodeReader::ParseBytecode.
139 static unsigned callback(char*&buff, unsigned& sz, void* ctxt){
140 // Case the context variable to our BufferContext
141 BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt);
142
143 // Compute the new, doubled, size of the block
144 unsigned new_size = bc->size * 2;
145
146 // Extend or allocate the block (realloc(0,n) == malloc(n))
147 char* new_buff = (char*) ::realloc(bc->buff, new_size);
148
149 // Figure out what to return to the Compressor. If this is the first call,
150 // then bc->buff will be null. In this case we want to return the entire
151 // buffer because there was no previous allocation. Otherwise, when the
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000152 // buffer is reallocated, we save the new base pointer in the
153 // BufferContext.buff field but return the address of only the extension,
154 // mid-way through the buffer (since its size was doubled). Furthermore,
155 // the sz result must be 1/2 the total size of the buffer.
Reid Spencere3c6ad72004-11-14 22:04:46 +0000156 if (bc->buff == 0 ) {
157 buff = bc->buff = new_buff;
158 sz = new_size;
159 } else {
160 bc->buff = new_buff;
161 buff = new_buff + bc->size;
162 sz = bc->size;
163 }
164
165 // Retain the size of the allocated block
166 bc->size = new_size;
167
168 // Make sure we fail (return 1) if we didn't get any memory.
169 return (bc->buff == 0 ? 1 : 0);
170 }
171};
172
173// This structure retains the context when compressing the bytecode file. The
174// WriteCompressedData function below uses it to keep track of the previously
175// filled chunk of memory (which it writes) and how many bytes have been
176// written.
177struct WriterContext {
178 // Initialize the context
179 WriterContext(std::ostream*OS, unsigned CS)
180 : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {}
181
182 // Make sure we clean up memory
183 ~WriterContext() {
184 if (chunk)
185 delete [] chunk;
186 }
187
188 // Write the chunk
189 void write(unsigned size = 0) {
190 unsigned write_size = (size == 0 ? sz : size);
191 Out->write(chunk,write_size);
192 written += write_size;
193 delete [] chunk;
194 chunk = 0;
195 sz = 0;
196 }
197
198 // This function is a callback used by the Compressor::compress function to
199 // allocate memory for the compression buffer. This function fulfills that
200 // responsibility but also writes the previous (now filled) buffer out to the
201 // stream.
202 static unsigned callback(char*& buffer, unsigned& size, void* context) {
203 // Cast the context to the structure it must point to.
204 WriterContext* ctxt =
205 reinterpret_cast<WriterContext*>(context);
206
207 // If there's a previously allocated chunk, it must now be filled with
208 // compressed data, so we write it out and deallocate it.
209 if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
210 ctxt->write();
211 }
212
213 // Compute the size of the next chunk to allocate. We attempt to allocate
214 // enough memory to handle the compression in a single memory allocation. In
215 // general, the worst we do on compression of bytecode is about 50% so we
216 // conservatively estimate compSize / 2 as the size needed for the
217 // compression buffer. compSize is the size of the compressed data, provided
218 // by WriteBytecodeToFile.
219 size = ctxt->sz = ctxt->compSize / 2;
220
221 // Allocate the chunks
222 buffer = ctxt->chunk = new char [size];
223
224 // We must return 1 if the allocation failed so that the Compressor knows
225 // not to use the buffer pointer.
226 return (ctxt->chunk == 0 ? 1 : 0);
227 }
228
229 char* chunk; // pointer to the chunk of memory filled by compression
230 unsigned sz; // size of chunk
231 unsigned written; // aggregate total of bytes written in all chunks
232 unsigned compSize; // size of the uncompressed buffer
233 std::ostream* Out; // The stream we write the data to.
234};
235
Reid Spencer460eb632004-10-04 10:49:41 +0000236}
237
238namespace llvm {
239
240// Compress in one of three ways
Reid Spencere3c6ad72004-11-14 22:04:46 +0000241uint64_t Compressor::compress(const char* in, unsigned size,
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000242 OutputDataCallback* cb, void* context ) {
Reid Spencer460eb632004-10-04 10:49:41 +0000243 assert(in && "Can't compress null buffer");
244 assert(size && "Can't compress empty buffer");
245 assert(cb && "Can't compress without a callback function");
246
247 uint64_t result = 0;
248
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000249 // For small files, we just don't bother compressing. bzip2 isn't very good
250 // with tiny files and can actually make the file larger, so we just avoid
251 // it altogether.
252 if (size > 256) {
253 // Set up the bz_stream
254 bz_stream bzdata;
255 bzdata.bzalloc = 0;
256 bzdata.bzfree = 0;
257 bzdata.opaque = 0;
258 bzdata.next_in = (char*)in;
259 bzdata.avail_in = size;
260 bzdata.next_out = 0;
261 bzdata.avail_out = 0;
262 switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) {
263 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
264 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
265 case BZ_MEM_ERROR: throw std::string("Out of memory");
266 case BZ_OK:
267 default:
268 break;
269 }
Reid Spencer460eb632004-10-04 10:49:41 +0000270
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000271 // Get a block of memory
272 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
273 BZ2_bzCompressEnd(&bzdata);
274 throw std::string("Can't allocate output buffer");
275 }
276
277 // Put compression code in first byte
278 (*bzdata.next_out++) = COMP_TYPE_BZIP2;
279 bzdata.avail_out--;
280
281 // Compress it
282 int bzerr = BZ_FINISH_OK;
283 while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000284 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000285 BZ2_bzCompressEnd(&bzdata);
286 throw std::string("Can't allocate output buffer");
287 }
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000288 }
289 switch (bzerr) {
290 case BZ_SEQUENCE_ERROR:
291 case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
292 case BZ_FINISH_OK:
293 case BZ_STREAM_END: break;
294 default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
Reid Spencer460eb632004-10-04 10:49:41 +0000295 }
296
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000297 // Finish
298 result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
299 bzdata.total_out_lo32 + 1;
Reid Spencer460eb632004-10-04 10:49:41 +0000300
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000301 BZ2_bzCompressEnd(&bzdata);
302 } else {
303 // Do null compression, for small files
304 NULLCOMP_stream sdata;
305 sdata.next_in = (char*)in;
306 sdata.avail_in = size;
307 NULLCOMP_init(&sdata);
Reid Spencer460eb632004-10-04 10:49:41 +0000308
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000309 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
310 throw std::string("Can't allocate output buffer");
Reid Spencer460eb632004-10-04 10:49:41 +0000311 }
312
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000313 *(sdata.next_out++) = COMP_TYPE_NONE;
314 sdata.avail_out--;
Reid Spencer460eb632004-10-04 10:49:41 +0000315
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000316 while (!NULLCOMP_compress(&sdata)) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000317 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000318 throw std::string("Can't allocate output buffer");
319 }
Reid Spencer460eb632004-10-04 10:49:41 +0000320 }
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000321
322 result = sdata.output_count + 1;
323 NULLCOMP_end(&sdata);
Reid Spencer460eb632004-10-04 10:49:41 +0000324 }
325 return result;
326}
327
Reid Spencere3c6ad72004-11-14 22:04:46 +0000328uint64_t
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000329Compressor::compressToNewBuffer(const char* in, unsigned size, char*&out) {
Reid Spencere3c6ad72004-11-14 22:04:46 +0000330 BufferContext bc(size);
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000331 unsigned result = compress(in,size,BufferContext::callback,(void*)&bc);
Reid Spencere3c6ad72004-11-14 22:04:46 +0000332 out = bc.buff;
333 return result;
334}
335
336uint64_t
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000337Compressor::compressToStream(const char*in, unsigned size, std::ostream& out) {
Reid Spencere3c6ad72004-11-14 22:04:46 +0000338 // Set up the context and writer
339 WriterContext ctxt(&out,size / 2);
340
341 // Compress everything after the magic number (which we'll alter)
342 uint64_t zipSize = Compressor::compress(in,size,
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000343 WriterContext::callback, (void*)&ctxt);
Reid Spencere3c6ad72004-11-14 22:04:46 +0000344
345 if (ctxt.chunk) {
346 ctxt.write(zipSize - ctxt.written);
347 }
348 return zipSize;
349}
350
Reid Spencer460eb632004-10-04 10:49:41 +0000351// Decompress in one of three ways
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000352uint64_t Compressor::decompress(const char *in, unsigned size,
Reid Spencer469c34b2004-10-04 17:29:25 +0000353 OutputDataCallback* cb, void* context) {
Reid Spencer460eb632004-10-04 10:49:41 +0000354 assert(in && "Can't decompress null buffer");
355 assert(size > 1 && "Can't decompress empty buffer");
356 assert(cb && "Can't decompress without a callback function");
357
358 uint64_t result = 0;
359
360 switch (*in++) {
361 case COMP_TYPE_BZIP2: {
Reid Spencer460eb632004-10-04 10:49:41 +0000362 // Set up the bz_stream
363 bz_stream bzdata;
364 bzdata.bzalloc = 0;
365 bzdata.bzfree = 0;
366 bzdata.opaque = 0;
Reid Spencere3c6ad72004-11-14 22:04:46 +0000367 bzdata.next_in = (char*)in;
Reid Spencer460eb632004-10-04 10:49:41 +0000368 bzdata.avail_in = size - 1;
369 bzdata.next_out = 0;
370 bzdata.avail_out = 0;
371 switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
372 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
373 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
374 case BZ_MEM_ERROR: throw std::string("Out of memory");
375 case BZ_OK:
376 default:
377 break;
378 }
379
380 // Get a block of memory
Reid Spencer469c34b2004-10-04 17:29:25 +0000381 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000382 BZ2_bzDecompressEnd(&bzdata);
383 throw std::string("Can't allocate output buffer");
384 }
385
386 // Decompress it
387 int bzerr = BZ_OK;
388 while (BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata))) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000389 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000390 BZ2_bzDecompressEnd(&bzdata);
391 throw std::string("Can't allocate output buffer");
392 }
393 }
394
395 switch (bzerr) {
396 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
397 case BZ_MEM_ERROR: throw std::string("Out of memory");
398 case BZ_DATA_ERROR: throw std::string("Data integrity error");
399 case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
400 default: throw("Ooops");
401 case BZ_STREAM_END:
402 break;
403 }
404
405 // Finish
406 result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
407 bzdata.total_out_lo32;
408 BZ2_bzDecompressEnd(&bzdata);
409 break;
Chris Lattnerebe989c2004-10-04 16:33:25 +0000410 }
Reid Spencer460eb632004-10-04 10:49:41 +0000411
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000412 case COMP_TYPE_NONE: {
Reid Spencer047c0092004-10-04 17:45:44 +0000413 NULLCOMP_stream sdata;
Reid Spencere3c6ad72004-11-14 22:04:46 +0000414 sdata.next_in = (char*)in;
Reid Spencer460eb632004-10-04 10:49:41 +0000415 sdata.avail_in = size - 1;
Reid Spencer047c0092004-10-04 17:45:44 +0000416 NULLCOMP_init(&sdata);
Reid Spencer460eb632004-10-04 10:49:41 +0000417
Reid Spencer469c34b2004-10-04 17:29:25 +0000418 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000419 throw std::string("Can't allocate output buffer");
420 }
421
Reid Spencer047c0092004-10-04 17:45:44 +0000422 while (!NULLCOMP_decompress(&sdata)) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000423 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000424 throw std::string("Can't allocate output buffer");
425 }
426 }
427
428 result = sdata.output_count;
Reid Spencer047c0092004-10-04 17:45:44 +0000429 NULLCOMP_end(&sdata);
Reid Spencer460eb632004-10-04 10:49:41 +0000430 break;
431 }
432
433 default:
434 throw std::string("Unknown type of compressed data");
435 }
436
437 return result;
438}
439
Reid Spencere3c6ad72004-11-14 22:04:46 +0000440uint64_t
441Compressor::decompressToNewBuffer(const char* in, unsigned size, char*&out) {
442 BufferContext bc(size);
443 unsigned result = decompress(in,size,BufferContext::callback,(void*)&bc);
444 out = bc.buff;
445 return result;
446}
447
448uint64_t
449Compressor::decompressToStream(const char*in, unsigned size, std::ostream& out){
450 // Set up the context and writer
451 WriterContext ctxt(&out,size / 2);
452
453 // Compress everything after the magic number (which we'll alter)
454 uint64_t zipSize = Compressor::decompress(in,size,
455 WriterContext::callback, (void*)&ctxt);
456
457 if (ctxt.chunk) {
458 ctxt.write(zipSize - ctxt.written);
459 }
460 return zipSize;
461}
462
Reid Spencer460eb632004-10-04 10:49:41 +0000463}
464
465// vim: sw=2 ai