blob: 167b26b53f4ae3f297adf6f28e18a979e9867b40 [file] [log] [blame]
Reid Spencer460eb632004-10-04 10:49:41 +00001//===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Reid Spencer and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the llvm::Compressor class, an abstraction for memory
11// block compression.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Config/config.h"
16#include "llvm/Support/Compressor.h"
17#include "llvm/ADT/StringExtras.h"
18#include <cassert>
19#include <string>
Reid Spencerf6a0acd2004-11-25 19:38:16 +000020#include "bzip2/bzlib.h"
Chris Lattner52b87522005-01-29 16:53:02 +000021using namespace llvm;
Reid Spencer460eb632004-10-04 10:49:41 +000022
Reid Spencerf6a0acd2004-11-25 19:38:16 +000023enum CompressionTypes {
24 COMP_TYPE_NONE = '0',
25 COMP_TYPE_BZIP2 = '2',
26};
27
Chris Lattner52b87522005-01-29 16:53:02 +000028static int getdata(char*& buffer, unsigned& size,
Reid Spencer469c34b2004-10-04 17:29:25 +000029 llvm::Compressor::OutputDataCallback* cb, void* context) {
Reid Spencer460eb632004-10-04 10:49:41 +000030 buffer = 0;
31 size = 0;
Reid Spencer469c34b2004-10-04 17:29:25 +000032 int result = (*cb)(buffer, size, context);
Reid Spencer460eb632004-10-04 10:49:41 +000033 assert(buffer != 0 && "Invalid result from Compressor callback");
34 assert(size != 0 && "Invalid result from Compressor callback");
35 return result;
36}
37
38//===----------------------------------------------------------------------===//
Reid Spencer047c0092004-10-04 17:45:44 +000039//=== NULLCOMP - a compression like set of routines that just copies data
40//=== without doing any compression. This is provided so that if the
41//=== configured environment doesn't have a compression library the
42//=== program can still work, albeit using more data/memory.
Reid Spencer460eb632004-10-04 10:49:41 +000043//===----------------------------------------------------------------------===//
44
Reid Spencer047c0092004-10-04 17:45:44 +000045struct NULLCOMP_stream {
Reid Spencer460eb632004-10-04 10:49:41 +000046 // User provided fields
47 char* next_in;
48 unsigned avail_in;
49 char* next_out;
50 unsigned avail_out;
51
52 // Information fields
53 uint64_t output_count; // Total count of output bytes
Reid Spencer460eb632004-10-04 10:49:41 +000054};
55
Chris Lattner52b87522005-01-29 16:53:02 +000056static void NULLCOMP_init(NULLCOMP_stream* s) {
Reid Spencer460eb632004-10-04 10:49:41 +000057 s->output_count = 0;
Reid Spencer460eb632004-10-04 10:49:41 +000058}
59
Chris Lattner52b87522005-01-29 16:53:02 +000060static bool NULLCOMP_compress(NULLCOMP_stream* s) {
Reid Spencer047c0092004-10-04 17:45:44 +000061 assert(s && "Invalid NULLCOMP_stream");
Reid Spencer460eb632004-10-04 10:49:41 +000062 assert(s->next_in != 0);
63 assert(s->next_out != 0);
64 assert(s->avail_in >= 1);
65 assert(s->avail_out >= 1);
66
Reid Spencer460eb632004-10-04 10:49:41 +000067 if (s->avail_out >= s->avail_in) {
68 ::memcpy(s->next_out, s->next_in, s->avail_in);
69 s->output_count += s->avail_in;
70 s->avail_out -= s->avail_in;
71 s->next_in += s->avail_in;
72 s->avail_in = 0;
73 return true;
74 } else {
75 ::memcpy(s->next_out, s->next_in, s->avail_out);
76 s->output_count += s->avail_out;
77 s->avail_in -= s->avail_out;
78 s->next_in += s->avail_out;
79 s->avail_out = 0;
80 return false;
81 }
82}
83
Chris Lattner52b87522005-01-29 16:53:02 +000084static bool NULLCOMP_decompress(NULLCOMP_stream* s) {
Reid Spencer047c0092004-10-04 17:45:44 +000085 assert(s && "Invalid NULLCOMP_stream");
Reid Spencer460eb632004-10-04 10:49:41 +000086 assert(s->next_in != 0);
87 assert(s->next_out != 0);
88 assert(s->avail_in >= 1);
89 assert(s->avail_out >= 1);
90
Reid Spencer460eb632004-10-04 10:49:41 +000091 if (s->avail_out >= s->avail_in) {
92 ::memcpy(s->next_out, s->next_in, s->avail_in);
93 s->output_count += s->avail_in;
94 s->avail_out -= s->avail_in;
95 s->next_in += s->avail_in;
96 s->avail_in = 0;
97 return true;
98 } else {
99 ::memcpy(s->next_out, s->next_in, s->avail_out);
100 s->output_count += s->avail_out;
101 s->avail_in -= s->avail_out;
102 s->next_in += s->avail_out;
103 s->avail_out = 0;
104 return false;
105 }
106}
107
Chris Lattner52b87522005-01-29 16:53:02 +0000108static void NULLCOMP_end(NULLCOMP_stream* strm) {
Reid Spencer460eb632004-10-04 10:49:41 +0000109}
110
Chris Lattner52b87522005-01-29 16:53:02 +0000111namespace {
112
Reid Spencere3c6ad72004-11-14 22:04:46 +0000113/// This structure is only used when a bytecode file is compressed.
114/// As bytecode is being decompressed, the memory buffer might need
115/// to be reallocated. The buffer allocation is handled in a callback
116/// and this structure is needed to retain information across calls
117/// to the callback.
118/// @brief An internal buffer object used for handling decompression
119struct BufferContext {
120 char* buff;
121 unsigned size;
Chris Lattner8c2cb422005-01-29 17:05:56 +0000122 BufferContext(unsigned compressedSize) {
Reid Spencere3c6ad72004-11-14 22:04:46 +0000123 // Null to indicate malloc of a new block
124 buff = 0;
125
126 // Compute the initial length of the uncompression buffer. Note that this
127 // is twice the length of the compressed buffer and will be doubled again
128 // in the callback for an initial allocation of 4x compressedSize. This
129 // calculation is based on the typical compression ratio of bzip2 on LLVM
130 // bytecode files which typically ranges in the 50%-75% range. Since we
Chris Lattner8c2cb422005-01-29 17:05:56 +0000131 // typically get at least 50%, doubling is insufficient. By using a 4x
Reid Spencere3c6ad72004-11-14 22:04:46 +0000132 // multiplier on the first allocation, we minimize the impact of having to
133 // copy the buffer on reallocation.
134 size = compressedSize*2;
135 }
136
Chris Lattner8c2cb422005-01-29 17:05:56 +0000137 /// trimTo - Reduce the size of the buffer down to the specified amount. This
138 /// is useful after have read in the bytecode file to discard extra unused
139 /// memory.
140 ///
141 void trimTo(size_t NewSize) {
142 buff = (char*)::realloc(buff, NewSize);
143 size = NewSize;
144 }
145
Reid Spencere3c6ad72004-11-14 22:04:46 +0000146 /// This function handles allocation of the buffer used for decompression of
147 /// compressed bytecode files. It is called by Compressor::decompress which is
148 /// called by BytecodeReader::ParseBytecode.
149 static unsigned callback(char*&buff, unsigned& sz, void* ctxt){
150 // Case the context variable to our BufferContext
151 BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt);
152
153 // Compute the new, doubled, size of the block
154 unsigned new_size = bc->size * 2;
155
156 // Extend or allocate the block (realloc(0,n) == malloc(n))
157 char* new_buff = (char*) ::realloc(bc->buff, new_size);
158
159 // Figure out what to return to the Compressor. If this is the first call,
160 // then bc->buff will be null. In this case we want to return the entire
161 // buffer because there was no previous allocation. Otherwise, when the
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000162 // buffer is reallocated, we save the new base pointer in the
163 // BufferContext.buff field but return the address of only the extension,
164 // mid-way through the buffer (since its size was doubled). Furthermore,
165 // the sz result must be 1/2 the total size of the buffer.
Reid Spencere3c6ad72004-11-14 22:04:46 +0000166 if (bc->buff == 0 ) {
167 buff = bc->buff = new_buff;
168 sz = new_size;
169 } else {
170 bc->buff = new_buff;
171 buff = new_buff + bc->size;
172 sz = bc->size;
173 }
174
175 // Retain the size of the allocated block
176 bc->size = new_size;
177
178 // Make sure we fail (return 1) if we didn't get any memory.
179 return (bc->buff == 0 ? 1 : 0);
180 }
181};
182
Chris Lattner52b87522005-01-29 16:53:02 +0000183} // end anonymous namespace
184
185
186namespace {
187
Reid Spencere3c6ad72004-11-14 22:04:46 +0000188// This structure retains the context when compressing the bytecode file. The
189// WriteCompressedData function below uses it to keep track of the previously
190// filled chunk of memory (which it writes) and how many bytes have been
191// written.
192struct WriterContext {
193 // Initialize the context
194 WriterContext(std::ostream*OS, unsigned CS)
195 : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {}
196
197 // Make sure we clean up memory
198 ~WriterContext() {
199 if (chunk)
200 delete [] chunk;
201 }
202
203 // Write the chunk
204 void write(unsigned size = 0) {
205 unsigned write_size = (size == 0 ? sz : size);
206 Out->write(chunk,write_size);
207 written += write_size;
208 delete [] chunk;
209 chunk = 0;
210 sz = 0;
211 }
212
213 // This function is a callback used by the Compressor::compress function to
214 // allocate memory for the compression buffer. This function fulfills that
215 // responsibility but also writes the previous (now filled) buffer out to the
216 // stream.
217 static unsigned callback(char*& buffer, unsigned& size, void* context) {
218 // Cast the context to the structure it must point to.
219 WriterContext* ctxt =
220 reinterpret_cast<WriterContext*>(context);
221
222 // If there's a previously allocated chunk, it must now be filled with
223 // compressed data, so we write it out and deallocate it.
224 if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
225 ctxt->write();
226 }
227
228 // Compute the size of the next chunk to allocate. We attempt to allocate
229 // enough memory to handle the compression in a single memory allocation. In
230 // general, the worst we do on compression of bytecode is about 50% so we
231 // conservatively estimate compSize / 2 as the size needed for the
232 // compression buffer. compSize is the size of the compressed data, provided
233 // by WriteBytecodeToFile.
234 size = ctxt->sz = ctxt->compSize / 2;
235
236 // Allocate the chunks
237 buffer = ctxt->chunk = new char [size];
238
239 // We must return 1 if the allocation failed so that the Compressor knows
240 // not to use the buffer pointer.
241 return (ctxt->chunk == 0 ? 1 : 0);
242 }
243
244 char* chunk; // pointer to the chunk of memory filled by compression
245 unsigned sz; // size of chunk
246 unsigned written; // aggregate total of bytes written in all chunks
247 unsigned compSize; // size of the uncompressed buffer
248 std::ostream* Out; // The stream we write the data to.
249};
250
Chris Lattner52b87522005-01-29 16:53:02 +0000251} // end anonymous namespace
Reid Spencer460eb632004-10-04 10:49:41 +0000252
253// Compress in one of three ways
Reid Spencere3c6ad72004-11-14 22:04:46 +0000254uint64_t Compressor::compress(const char* in, unsigned size,
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000255 OutputDataCallback* cb, void* context ) {
Reid Spencer460eb632004-10-04 10:49:41 +0000256 assert(in && "Can't compress null buffer");
257 assert(size && "Can't compress empty buffer");
258 assert(cb && "Can't compress without a callback function");
259
260 uint64_t result = 0;
261
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000262 // For small files, we just don't bother compressing. bzip2 isn't very good
263 // with tiny files and can actually make the file larger, so we just avoid
264 // it altogether.
Reid Spencera7a5cc82004-11-30 07:13:34 +0000265 if (size > 64*1024) {
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000266 // Set up the bz_stream
267 bz_stream bzdata;
268 bzdata.bzalloc = 0;
269 bzdata.bzfree = 0;
270 bzdata.opaque = 0;
271 bzdata.next_in = (char*)in;
272 bzdata.avail_in = size;
273 bzdata.next_out = 0;
274 bzdata.avail_out = 0;
275 switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) {
276 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
277 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
278 case BZ_MEM_ERROR: throw std::string("Out of memory");
279 case BZ_OK:
280 default:
281 break;
282 }
Reid Spencer460eb632004-10-04 10:49:41 +0000283
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000284 // Get a block of memory
285 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
286 BZ2_bzCompressEnd(&bzdata);
287 throw std::string("Can't allocate output buffer");
288 }
289
290 // Put compression code in first byte
291 (*bzdata.next_out++) = COMP_TYPE_BZIP2;
292 bzdata.avail_out--;
293
294 // Compress it
295 int bzerr = BZ_FINISH_OK;
296 while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000297 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000298 BZ2_bzCompressEnd(&bzdata);
299 throw std::string("Can't allocate output buffer");
300 }
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000301 }
302 switch (bzerr) {
303 case BZ_SEQUENCE_ERROR:
304 case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
305 case BZ_FINISH_OK:
306 case BZ_STREAM_END: break;
307 default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
Reid Spencer460eb632004-10-04 10:49:41 +0000308 }
309
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000310 // Finish
311 result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
312 bzdata.total_out_lo32 + 1;
Reid Spencer460eb632004-10-04 10:49:41 +0000313
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000314 BZ2_bzCompressEnd(&bzdata);
315 } else {
316 // Do null compression, for small files
317 NULLCOMP_stream sdata;
318 sdata.next_in = (char*)in;
319 sdata.avail_in = size;
320 NULLCOMP_init(&sdata);
Reid Spencer460eb632004-10-04 10:49:41 +0000321
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000322 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
323 throw std::string("Can't allocate output buffer");
Reid Spencer460eb632004-10-04 10:49:41 +0000324 }
325
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000326 *(sdata.next_out++) = COMP_TYPE_NONE;
327 sdata.avail_out--;
Reid Spencer460eb632004-10-04 10:49:41 +0000328
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000329 while (!NULLCOMP_compress(&sdata)) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000330 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000331 throw std::string("Can't allocate output buffer");
332 }
Reid Spencer460eb632004-10-04 10:49:41 +0000333 }
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000334
335 result = sdata.output_count + 1;
336 NULLCOMP_end(&sdata);
Reid Spencer460eb632004-10-04 10:49:41 +0000337 }
338 return result;
339}
340
Reid Spencere3c6ad72004-11-14 22:04:46 +0000341uint64_t
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000342Compressor::compressToNewBuffer(const char* in, unsigned size, char*&out) {
Reid Spencere3c6ad72004-11-14 22:04:46 +0000343 BufferContext bc(size);
Chris Lattner25dc8912005-01-08 19:32:59 +0000344 uint64_t result = compress(in,size,BufferContext::callback,(void*)&bc);
Chris Lattner8c2cb422005-01-29 17:05:56 +0000345 bc.trimTo(result);
Reid Spencere3c6ad72004-11-14 22:04:46 +0000346 out = bc.buff;
347 return result;
348}
349
350uint64_t
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000351Compressor::compressToStream(const char*in, unsigned size, std::ostream& out) {
Reid Spencere3c6ad72004-11-14 22:04:46 +0000352 // Set up the context and writer
353 WriterContext ctxt(&out,size / 2);
354
355 // Compress everything after the magic number (which we'll alter)
356 uint64_t zipSize = Compressor::compress(in,size,
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000357 WriterContext::callback, (void*)&ctxt);
Reid Spencere3c6ad72004-11-14 22:04:46 +0000358
359 if (ctxt.chunk) {
360 ctxt.write(zipSize - ctxt.written);
361 }
362 return zipSize;
363}
364
Reid Spencer460eb632004-10-04 10:49:41 +0000365// Decompress in one of three ways
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000366uint64_t Compressor::decompress(const char *in, unsigned size,
Reid Spencer469c34b2004-10-04 17:29:25 +0000367 OutputDataCallback* cb, void* context) {
Reid Spencer460eb632004-10-04 10:49:41 +0000368 assert(in && "Can't decompress null buffer");
369 assert(size > 1 && "Can't decompress empty buffer");
370 assert(cb && "Can't decompress without a callback function");
371
372 uint64_t result = 0;
373
374 switch (*in++) {
375 case COMP_TYPE_BZIP2: {
Reid Spencer460eb632004-10-04 10:49:41 +0000376 // Set up the bz_stream
377 bz_stream bzdata;
378 bzdata.bzalloc = 0;
379 bzdata.bzfree = 0;
380 bzdata.opaque = 0;
Reid Spencere3c6ad72004-11-14 22:04:46 +0000381 bzdata.next_in = (char*)in;
Reid Spencer460eb632004-10-04 10:49:41 +0000382 bzdata.avail_in = size - 1;
383 bzdata.next_out = 0;
384 bzdata.avail_out = 0;
385 switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
386 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
387 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
388 case BZ_MEM_ERROR: throw std::string("Out of memory");
389 case BZ_OK:
390 default:
391 break;
392 }
393
394 // Get a block of memory
Reid Spencer469c34b2004-10-04 17:29:25 +0000395 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000396 BZ2_bzDecompressEnd(&bzdata);
397 throw std::string("Can't allocate output buffer");
398 }
399
400 // Decompress it
401 int bzerr = BZ_OK;
402 while (BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata))) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000403 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000404 BZ2_bzDecompressEnd(&bzdata);
405 throw std::string("Can't allocate output buffer");
406 }
407 }
408
409 switch (bzerr) {
410 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
411 case BZ_MEM_ERROR: throw std::string("Out of memory");
412 case BZ_DATA_ERROR: throw std::string("Data integrity error");
413 case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
414 default: throw("Ooops");
415 case BZ_STREAM_END:
416 break;
417 }
418
419 // Finish
420 result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
421 bzdata.total_out_lo32;
422 BZ2_bzDecompressEnd(&bzdata);
423 break;
Chris Lattnerebe989c2004-10-04 16:33:25 +0000424 }
Reid Spencer460eb632004-10-04 10:49:41 +0000425
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000426 case COMP_TYPE_NONE: {
Reid Spencer047c0092004-10-04 17:45:44 +0000427 NULLCOMP_stream sdata;
Reid Spencere3c6ad72004-11-14 22:04:46 +0000428 sdata.next_in = (char*)in;
Reid Spencer460eb632004-10-04 10:49:41 +0000429 sdata.avail_in = size - 1;
Reid Spencer047c0092004-10-04 17:45:44 +0000430 NULLCOMP_init(&sdata);
Reid Spencer460eb632004-10-04 10:49:41 +0000431
Reid Spencer469c34b2004-10-04 17:29:25 +0000432 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000433 throw std::string("Can't allocate output buffer");
434 }
435
Reid Spencer047c0092004-10-04 17:45:44 +0000436 while (!NULLCOMP_decompress(&sdata)) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000437 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000438 throw std::string("Can't allocate output buffer");
439 }
440 }
441
442 result = sdata.output_count;
Reid Spencer047c0092004-10-04 17:45:44 +0000443 NULLCOMP_end(&sdata);
Reid Spencer460eb632004-10-04 10:49:41 +0000444 break;
445 }
446
447 default:
448 throw std::string("Unknown type of compressed data");
449 }
450
451 return result;
452}
453
Reid Spencere3c6ad72004-11-14 22:04:46 +0000454uint64_t
455Compressor::decompressToNewBuffer(const char* in, unsigned size, char*&out) {
456 BufferContext bc(size);
457 unsigned result = decompress(in,size,BufferContext::callback,(void*)&bc);
458 out = bc.buff;
459 return result;
460}
461
462uint64_t
463Compressor::decompressToStream(const char*in, unsigned size, std::ostream& out){
464 // Set up the context and writer
465 WriterContext ctxt(&out,size / 2);
466
467 // Compress everything after the magic number (which we'll alter)
468 uint64_t zipSize = Compressor::decompress(in,size,
469 WriterContext::callback, (void*)&ctxt);
470
471 if (ctxt.chunk) {
472 ctxt.write(zipSize - ctxt.written);
473 }
474 return zipSize;
475}
476
Reid Spencer460eb632004-10-04 10:49:41 +0000477// vim: sw=2 ai