blob: 43e85495a1b2b3453301d81211243239fa73bf1e [file] [log] [blame]
Reid Spencer460eb632004-10-04 10:49:41 +00001//===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Reid Spencer and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the llvm::Compressor class, an abstraction for memory
11// block compression.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Config/config.h"
16#include "llvm/Support/Compressor.h"
17#include "llvm/ADT/StringExtras.h"
18#include <cassert>
19#include <string>
Reid Spencerf6a0acd2004-11-25 19:38:16 +000020#include "bzip2/bzlib.h"
Chris Lattner52b87522005-01-29 16:53:02 +000021using namespace llvm;
Reid Spencer460eb632004-10-04 10:49:41 +000022
Reid Spencerf6a0acd2004-11-25 19:38:16 +000023enum CompressionTypes {
24 COMP_TYPE_NONE = '0',
25 COMP_TYPE_BZIP2 = '2',
26};
27
Chris Lattner52b87522005-01-29 16:53:02 +000028static int getdata(char*& buffer, unsigned& size,
Reid Spencer469c34b2004-10-04 17:29:25 +000029 llvm::Compressor::OutputDataCallback* cb, void* context) {
Reid Spencer460eb632004-10-04 10:49:41 +000030 buffer = 0;
31 size = 0;
Reid Spencer469c34b2004-10-04 17:29:25 +000032 int result = (*cb)(buffer, size, context);
Reid Spencer460eb632004-10-04 10:49:41 +000033 assert(buffer != 0 && "Invalid result from Compressor callback");
34 assert(size != 0 && "Invalid result from Compressor callback");
35 return result;
36}
37
38//===----------------------------------------------------------------------===//
Reid Spencer047c0092004-10-04 17:45:44 +000039//=== NULLCOMP - a compression like set of routines that just copies data
40//=== without doing any compression. This is provided so that if the
41//=== configured environment doesn't have a compression library the
42//=== program can still work, albeit using more data/memory.
Reid Spencer460eb632004-10-04 10:49:41 +000043//===----------------------------------------------------------------------===//
44
Reid Spencer047c0092004-10-04 17:45:44 +000045struct NULLCOMP_stream {
Reid Spencer460eb632004-10-04 10:49:41 +000046 // User provided fields
47 char* next_in;
48 unsigned avail_in;
49 char* next_out;
50 unsigned avail_out;
51
52 // Information fields
53 uint64_t output_count; // Total count of output bytes
Reid Spencer460eb632004-10-04 10:49:41 +000054};
55
Chris Lattner52b87522005-01-29 16:53:02 +000056static void NULLCOMP_init(NULLCOMP_stream* s) {
Reid Spencer460eb632004-10-04 10:49:41 +000057 s->output_count = 0;
Reid Spencer460eb632004-10-04 10:49:41 +000058}
59
Chris Lattner52b87522005-01-29 16:53:02 +000060static bool NULLCOMP_compress(NULLCOMP_stream* s) {
Reid Spencer047c0092004-10-04 17:45:44 +000061 assert(s && "Invalid NULLCOMP_stream");
Reid Spencer460eb632004-10-04 10:49:41 +000062 assert(s->next_in != 0);
63 assert(s->next_out != 0);
64 assert(s->avail_in >= 1);
65 assert(s->avail_out >= 1);
66
Reid Spencer460eb632004-10-04 10:49:41 +000067 if (s->avail_out >= s->avail_in) {
68 ::memcpy(s->next_out, s->next_in, s->avail_in);
69 s->output_count += s->avail_in;
70 s->avail_out -= s->avail_in;
71 s->next_in += s->avail_in;
72 s->avail_in = 0;
73 return true;
74 } else {
75 ::memcpy(s->next_out, s->next_in, s->avail_out);
76 s->output_count += s->avail_out;
77 s->avail_in -= s->avail_out;
78 s->next_in += s->avail_out;
79 s->avail_out = 0;
80 return false;
81 }
82}
83
Chris Lattner52b87522005-01-29 16:53:02 +000084static bool NULLCOMP_decompress(NULLCOMP_stream* s) {
Reid Spencer047c0092004-10-04 17:45:44 +000085 assert(s && "Invalid NULLCOMP_stream");
Reid Spencer460eb632004-10-04 10:49:41 +000086 assert(s->next_in != 0);
87 assert(s->next_out != 0);
88 assert(s->avail_in >= 1);
89 assert(s->avail_out >= 1);
90
Reid Spencer460eb632004-10-04 10:49:41 +000091 if (s->avail_out >= s->avail_in) {
92 ::memcpy(s->next_out, s->next_in, s->avail_in);
93 s->output_count += s->avail_in;
94 s->avail_out -= s->avail_in;
95 s->next_in += s->avail_in;
96 s->avail_in = 0;
97 return true;
98 } else {
99 ::memcpy(s->next_out, s->next_in, s->avail_out);
100 s->output_count += s->avail_out;
101 s->avail_in -= s->avail_out;
102 s->next_in += s->avail_out;
103 s->avail_out = 0;
104 return false;
105 }
106}
107
Chris Lattner52b87522005-01-29 16:53:02 +0000108static void NULLCOMP_end(NULLCOMP_stream* strm) {
Reid Spencer460eb632004-10-04 10:49:41 +0000109}
110
Chris Lattner52b87522005-01-29 16:53:02 +0000111namespace {
112
Reid Spencere3c6ad72004-11-14 22:04:46 +0000113/// This structure is only used when a bytecode file is compressed.
114/// As bytecode is being decompressed, the memory buffer might need
115/// to be reallocated. The buffer allocation is handled in a callback
116/// and this structure is needed to retain information across calls
117/// to the callback.
118/// @brief An internal buffer object used for handling decompression
119struct BufferContext {
120 char* buff;
121 unsigned size;
122 BufferContext(unsigned compressedSize ) {
123 // Null to indicate malloc of a new block
124 buff = 0;
125
126 // Compute the initial length of the uncompression buffer. Note that this
127 // is twice the length of the compressed buffer and will be doubled again
128 // in the callback for an initial allocation of 4x compressedSize. This
129 // calculation is based on the typical compression ratio of bzip2 on LLVM
130 // bytecode files which typically ranges in the 50%-75% range. Since we
131 // tyipcally get at least 50%, doubling is insufficient. By using a 4x
132 // multiplier on the first allocation, we minimize the impact of having to
133 // copy the buffer on reallocation.
134 size = compressedSize*2;
135 }
136
137 /// This function handles allocation of the buffer used for decompression of
138 /// compressed bytecode files. It is called by Compressor::decompress which is
139 /// called by BytecodeReader::ParseBytecode.
140 static unsigned callback(char*&buff, unsigned& sz, void* ctxt){
141 // Case the context variable to our BufferContext
142 BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt);
143
144 // Compute the new, doubled, size of the block
145 unsigned new_size = bc->size * 2;
146
147 // Extend or allocate the block (realloc(0,n) == malloc(n))
148 char* new_buff = (char*) ::realloc(bc->buff, new_size);
149
150 // Figure out what to return to the Compressor. If this is the first call,
151 // then bc->buff will be null. In this case we want to return the entire
152 // buffer because there was no previous allocation. Otherwise, when the
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000153 // buffer is reallocated, we save the new base pointer in the
154 // BufferContext.buff field but return the address of only the extension,
155 // mid-way through the buffer (since its size was doubled). Furthermore,
156 // the sz result must be 1/2 the total size of the buffer.
Reid Spencere3c6ad72004-11-14 22:04:46 +0000157 if (bc->buff == 0 ) {
158 buff = bc->buff = new_buff;
159 sz = new_size;
160 } else {
161 bc->buff = new_buff;
162 buff = new_buff + bc->size;
163 sz = bc->size;
164 }
165
166 // Retain the size of the allocated block
167 bc->size = new_size;
168
169 // Make sure we fail (return 1) if we didn't get any memory.
170 return (bc->buff == 0 ? 1 : 0);
171 }
172};
173
Chris Lattner52b87522005-01-29 16:53:02 +0000174} // end anonymous namespace
175
176
177namespace {
178
Reid Spencere3c6ad72004-11-14 22:04:46 +0000179// This structure retains the context when compressing the bytecode file. The
180// WriteCompressedData function below uses it to keep track of the previously
181// filled chunk of memory (which it writes) and how many bytes have been
182// written.
183struct WriterContext {
184 // Initialize the context
185 WriterContext(std::ostream*OS, unsigned CS)
186 : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {}
187
188 // Make sure we clean up memory
189 ~WriterContext() {
190 if (chunk)
191 delete [] chunk;
192 }
193
194 // Write the chunk
195 void write(unsigned size = 0) {
196 unsigned write_size = (size == 0 ? sz : size);
197 Out->write(chunk,write_size);
198 written += write_size;
199 delete [] chunk;
200 chunk = 0;
201 sz = 0;
202 }
203
204 // This function is a callback used by the Compressor::compress function to
205 // allocate memory for the compression buffer. This function fulfills that
206 // responsibility but also writes the previous (now filled) buffer out to the
207 // stream.
208 static unsigned callback(char*& buffer, unsigned& size, void* context) {
209 // Cast the context to the structure it must point to.
210 WriterContext* ctxt =
211 reinterpret_cast<WriterContext*>(context);
212
213 // If there's a previously allocated chunk, it must now be filled with
214 // compressed data, so we write it out and deallocate it.
215 if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
216 ctxt->write();
217 }
218
219 // Compute the size of the next chunk to allocate. We attempt to allocate
220 // enough memory to handle the compression in a single memory allocation. In
221 // general, the worst we do on compression of bytecode is about 50% so we
222 // conservatively estimate compSize / 2 as the size needed for the
223 // compression buffer. compSize is the size of the compressed data, provided
224 // by WriteBytecodeToFile.
225 size = ctxt->sz = ctxt->compSize / 2;
226
227 // Allocate the chunks
228 buffer = ctxt->chunk = new char [size];
229
230 // We must return 1 if the allocation failed so that the Compressor knows
231 // not to use the buffer pointer.
232 return (ctxt->chunk == 0 ? 1 : 0);
233 }
234
235 char* chunk; // pointer to the chunk of memory filled by compression
236 unsigned sz; // size of chunk
237 unsigned written; // aggregate total of bytes written in all chunks
238 unsigned compSize; // size of the uncompressed buffer
239 std::ostream* Out; // The stream we write the data to.
240};
241
Chris Lattner52b87522005-01-29 16:53:02 +0000242} // end anonymous namespace
Reid Spencer460eb632004-10-04 10:49:41 +0000243
244// Compress in one of three ways
Reid Spencere3c6ad72004-11-14 22:04:46 +0000245uint64_t Compressor::compress(const char* in, unsigned size,
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000246 OutputDataCallback* cb, void* context ) {
Reid Spencer460eb632004-10-04 10:49:41 +0000247 assert(in && "Can't compress null buffer");
248 assert(size && "Can't compress empty buffer");
249 assert(cb && "Can't compress without a callback function");
250
251 uint64_t result = 0;
252
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000253 // For small files, we just don't bother compressing. bzip2 isn't very good
254 // with tiny files and can actually make the file larger, so we just avoid
255 // it altogether.
Reid Spencera7a5cc82004-11-30 07:13:34 +0000256 if (size > 64*1024) {
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000257 // Set up the bz_stream
258 bz_stream bzdata;
259 bzdata.bzalloc = 0;
260 bzdata.bzfree = 0;
261 bzdata.opaque = 0;
262 bzdata.next_in = (char*)in;
263 bzdata.avail_in = size;
264 bzdata.next_out = 0;
265 bzdata.avail_out = 0;
266 switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) {
267 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
268 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
269 case BZ_MEM_ERROR: throw std::string("Out of memory");
270 case BZ_OK:
271 default:
272 break;
273 }
Reid Spencer460eb632004-10-04 10:49:41 +0000274
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000275 // Get a block of memory
276 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
277 BZ2_bzCompressEnd(&bzdata);
278 throw std::string("Can't allocate output buffer");
279 }
280
281 // Put compression code in first byte
282 (*bzdata.next_out++) = COMP_TYPE_BZIP2;
283 bzdata.avail_out--;
284
285 // Compress it
286 int bzerr = BZ_FINISH_OK;
287 while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000288 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000289 BZ2_bzCompressEnd(&bzdata);
290 throw std::string("Can't allocate output buffer");
291 }
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000292 }
293 switch (bzerr) {
294 case BZ_SEQUENCE_ERROR:
295 case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
296 case BZ_FINISH_OK:
297 case BZ_STREAM_END: break;
298 default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
Reid Spencer460eb632004-10-04 10:49:41 +0000299 }
300
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000301 // Finish
302 result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
303 bzdata.total_out_lo32 + 1;
Reid Spencer460eb632004-10-04 10:49:41 +0000304
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000305 BZ2_bzCompressEnd(&bzdata);
306 } else {
307 // Do null compression, for small files
308 NULLCOMP_stream sdata;
309 sdata.next_in = (char*)in;
310 sdata.avail_in = size;
311 NULLCOMP_init(&sdata);
Reid Spencer460eb632004-10-04 10:49:41 +0000312
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000313 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
314 throw std::string("Can't allocate output buffer");
Reid Spencer460eb632004-10-04 10:49:41 +0000315 }
316
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000317 *(sdata.next_out++) = COMP_TYPE_NONE;
318 sdata.avail_out--;
Reid Spencer460eb632004-10-04 10:49:41 +0000319
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000320 while (!NULLCOMP_compress(&sdata)) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000321 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000322 throw std::string("Can't allocate output buffer");
323 }
Reid Spencer460eb632004-10-04 10:49:41 +0000324 }
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000325
326 result = sdata.output_count + 1;
327 NULLCOMP_end(&sdata);
Reid Spencer460eb632004-10-04 10:49:41 +0000328 }
329 return result;
330}
331
Reid Spencere3c6ad72004-11-14 22:04:46 +0000332uint64_t
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000333Compressor::compressToNewBuffer(const char* in, unsigned size, char*&out) {
Reid Spencere3c6ad72004-11-14 22:04:46 +0000334 BufferContext bc(size);
Chris Lattner25dc8912005-01-08 19:32:59 +0000335 uint64_t result = compress(in,size,BufferContext::callback,(void*)&bc);
Reid Spencere3c6ad72004-11-14 22:04:46 +0000336 out = bc.buff;
337 return result;
338}
339
340uint64_t
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000341Compressor::compressToStream(const char*in, unsigned size, std::ostream& out) {
Reid Spencere3c6ad72004-11-14 22:04:46 +0000342 // Set up the context and writer
343 WriterContext ctxt(&out,size / 2);
344
345 // Compress everything after the magic number (which we'll alter)
346 uint64_t zipSize = Compressor::compress(in,size,
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000347 WriterContext::callback, (void*)&ctxt);
Reid Spencere3c6ad72004-11-14 22:04:46 +0000348
349 if (ctxt.chunk) {
350 ctxt.write(zipSize - ctxt.written);
351 }
352 return zipSize;
353}
354
Reid Spencer460eb632004-10-04 10:49:41 +0000355// Decompress in one of three ways
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000356uint64_t Compressor::decompress(const char *in, unsigned size,
Reid Spencer469c34b2004-10-04 17:29:25 +0000357 OutputDataCallback* cb, void* context) {
Reid Spencer460eb632004-10-04 10:49:41 +0000358 assert(in && "Can't decompress null buffer");
359 assert(size > 1 && "Can't decompress empty buffer");
360 assert(cb && "Can't decompress without a callback function");
361
362 uint64_t result = 0;
363
364 switch (*in++) {
365 case COMP_TYPE_BZIP2: {
Reid Spencer460eb632004-10-04 10:49:41 +0000366 // Set up the bz_stream
367 bz_stream bzdata;
368 bzdata.bzalloc = 0;
369 bzdata.bzfree = 0;
370 bzdata.opaque = 0;
Reid Spencere3c6ad72004-11-14 22:04:46 +0000371 bzdata.next_in = (char*)in;
Reid Spencer460eb632004-10-04 10:49:41 +0000372 bzdata.avail_in = size - 1;
373 bzdata.next_out = 0;
374 bzdata.avail_out = 0;
375 switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
376 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
377 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
378 case BZ_MEM_ERROR: throw std::string("Out of memory");
379 case BZ_OK:
380 default:
381 break;
382 }
383
384 // Get a block of memory
Reid Spencer469c34b2004-10-04 17:29:25 +0000385 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000386 BZ2_bzDecompressEnd(&bzdata);
387 throw std::string("Can't allocate output buffer");
388 }
389
390 // Decompress it
391 int bzerr = BZ_OK;
392 while (BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata))) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000393 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000394 BZ2_bzDecompressEnd(&bzdata);
395 throw std::string("Can't allocate output buffer");
396 }
397 }
398
399 switch (bzerr) {
400 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
401 case BZ_MEM_ERROR: throw std::string("Out of memory");
402 case BZ_DATA_ERROR: throw std::string("Data integrity error");
403 case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
404 default: throw("Ooops");
405 case BZ_STREAM_END:
406 break;
407 }
408
409 // Finish
410 result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
411 bzdata.total_out_lo32;
412 BZ2_bzDecompressEnd(&bzdata);
413 break;
Chris Lattnerebe989c2004-10-04 16:33:25 +0000414 }
Reid Spencer460eb632004-10-04 10:49:41 +0000415
Reid Spencerf6a0acd2004-11-25 19:38:16 +0000416 case COMP_TYPE_NONE: {
Reid Spencer047c0092004-10-04 17:45:44 +0000417 NULLCOMP_stream sdata;
Reid Spencere3c6ad72004-11-14 22:04:46 +0000418 sdata.next_in = (char*)in;
Reid Spencer460eb632004-10-04 10:49:41 +0000419 sdata.avail_in = size - 1;
Reid Spencer047c0092004-10-04 17:45:44 +0000420 NULLCOMP_init(&sdata);
Reid Spencer460eb632004-10-04 10:49:41 +0000421
Reid Spencer469c34b2004-10-04 17:29:25 +0000422 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000423 throw std::string("Can't allocate output buffer");
424 }
425
Reid Spencer047c0092004-10-04 17:45:44 +0000426 while (!NULLCOMP_decompress(&sdata)) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000427 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000428 throw std::string("Can't allocate output buffer");
429 }
430 }
431
432 result = sdata.output_count;
Reid Spencer047c0092004-10-04 17:45:44 +0000433 NULLCOMP_end(&sdata);
Reid Spencer460eb632004-10-04 10:49:41 +0000434 break;
435 }
436
437 default:
438 throw std::string("Unknown type of compressed data");
439 }
440
441 return result;
442}
443
Reid Spencere3c6ad72004-11-14 22:04:46 +0000444uint64_t
445Compressor::decompressToNewBuffer(const char* in, unsigned size, char*&out) {
446 BufferContext bc(size);
447 unsigned result = decompress(in,size,BufferContext::callback,(void*)&bc);
448 out = bc.buff;
449 return result;
450}
451
452uint64_t
453Compressor::decompressToStream(const char*in, unsigned size, std::ostream& out){
454 // Set up the context and writer
455 WriterContext ctxt(&out,size / 2);
456
457 // Compress everything after the magic number (which we'll alter)
458 uint64_t zipSize = Compressor::decompress(in,size,
459 WriterContext::callback, (void*)&ctxt);
460
461 if (ctxt.chunk) {
462 ctxt.write(zipSize - ctxt.written);
463 }
464 return zipSize;
465}
466
Reid Spencer460eb632004-10-04 10:49:41 +0000467// vim: sw=2 ai