blob: e9bb781a2ca4182f924982cf5fe08f3ebbe2a691 [file] [log] [blame]
Reid Spencer460eb632004-10-04 10:49:41 +00001//===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Reid Spencer and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the llvm::Compressor class, an abstraction for memory
11// block compression.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Config/config.h"
16#include "llvm/Support/Compressor.h"
17#include "llvm/ADT/StringExtras.h"
18#include <cassert>
19#include <string>
20
21#ifdef HAVE_BZIP2
22#include <bzlib.h>
23#endif
24
25#ifdef HAVE_ZLIB
26#include <zlib.h>
27#endif
28
29#ifndef HAVE_BZIP2
30# ifndef HAVE_ZLIB
31# warning No compression library is available!!
32# endif
33#endif
34
35namespace {
36
37inline int getdata(char*& buffer, unsigned& size,
Reid Spencer469c34b2004-10-04 17:29:25 +000038 llvm::Compressor::OutputDataCallback* cb, void* context) {
Reid Spencer460eb632004-10-04 10:49:41 +000039 buffer = 0;
40 size = 0;
Reid Spencer469c34b2004-10-04 17:29:25 +000041 int result = (*cb)(buffer, size, context);
Reid Spencer460eb632004-10-04 10:49:41 +000042 assert(buffer != 0 && "Invalid result from Compressor callback");
43 assert(size != 0 && "Invalid result from Compressor callback");
44 return result;
45}
46
47//===----------------------------------------------------------------------===//
48//=== RLCOMP - very simple run length compression scheme
49//=== The code below transforms the input into blocks that are either
50//=== compressed or not. Each block starts with a header byte that provides
51//=== the length of the block. Values < 128 are uncompressed, values >128
52//=== are compressed. The value 128 is illegal. Currently, the algorithm is
53//=== not completed and is #if'd out.
54//===----------------------------------------------------------------------===//
55
56enum {
57 MAX_RLCOMP_OUT = 32768
58};
59
60struct RLCOMP_stream {
61 // User provided fields
62 char* next_in;
63 unsigned avail_in;
64 char* next_out;
65 unsigned avail_out;
66
67 // Information fields
68 uint64_t output_count; // Total count of output bytes
69
70#if 0
71 // Algorithm fields
72 unsigned block_len; // Length of current block
73 unsigned compression; // State of compression 0=no, 1=yes, 2=indeterminate
74 char buffer[128]; // compression buffer (not used for decompress)
75 unsigned buflen; // bytes in compression buffer
76 bool pending; // is data pending to be written
77 char pending_data; // the pending data
78 unsigned clen; // length of the compressed block
79#endif
80};
81
82void RLCOMP_init(RLCOMP_stream* s) {
83 s->output_count = 0;
84#if 0
85 s->block_len = 0;
86 s->compression = 2;
87 s->buflen = 0;
88 s->pending = false;
89 s->pending_data = 0;
90 s->clen = 0;
91#endif
92}
93
94inline bool RLCOMP_getchar(RLCOMP_stream* s, unsigned& data) {
95#if 0
96 if (s->avail_in) {
97 data = *s->next_in++;
98 s->avail_in--;
99 return true;
100 }
101#endif
102 return false;
103}
104
105inline bool RLCOMP_putchar(RLCOMP_stream* s, unsigned data) {
106#if 0
107 if (s->avail_out) {
108 *s->next_out++ = data;
109 s->avail_out--;
110 s->output_count++;
111 return true;
112 } else {
113 s->pending = true;
114 s->pending_data = data;
115 return false;
116 }
117#else
118 return false;
119#endif
120}
121
122bool RLCOMP_compress(RLCOMP_stream* s) {
123 assert(s && "Invalid RLCOMP_stream");
124 assert(s->next_in != 0);
125 assert(s->next_out != 0);
126 assert(s->avail_in >= 1);
127 assert(s->avail_out >= 1);
128
129#if 0
130
131 // Handle pending data from the last time in
132 if (s->pending) {
133 RLCOMP_putchar(s,s->pending_data);
134 s->pending = false;
135 }
136
137 unsigned c = 0;
138 unsigned lastc = 0;
139 // Handle the degenerate len=1 case
140 if (!RLCOMP_getchar(s,lastc)) {
141 RLCOMP_putchar(s,1);
142 return RLCOMP_putchar(s,lastc);
143 }
144
145 while (RLCOMP_getchar(s,c)) {
146 switch(s->compression) {
147 case 0:
148 if (lastc == c) {
149 s->compression = 1;
150 s->clen = 2 ;
151 } else {
152 if (!RLCOMP_putchar(s, c))
153 return false;
154 }
155 break;
156
157 case 1:
158 if (lastc != c) {
159 s->compression = 2;
160 if (!RLCOMP_putchar(s, s->clen))
161 return false;
162 } else {
163 s->clen++;
164 }
165 break;
166
167 case 2:
168 break;
169 }
170 lastc = c;
171 }
172#endif
173 if (s->avail_out >= s->avail_in) {
174 ::memcpy(s->next_out, s->next_in, s->avail_in);
175 s->output_count += s->avail_in;
176 s->avail_out -= s->avail_in;
177 s->next_in += s->avail_in;
178 s->avail_in = 0;
179 return true;
180 } else {
181 ::memcpy(s->next_out, s->next_in, s->avail_out);
182 s->output_count += s->avail_out;
183 s->avail_in -= s->avail_out;
184 s->next_in += s->avail_out;
185 s->avail_out = 0;
186 return false;
187 }
188}
189
190bool RLCOMP_decompress(RLCOMP_stream* s) {
191 assert(s && "Invalid RLCOMP_stream");
192 assert(s->next_in != 0);
193 assert(s->next_out != 0);
194 assert(s->avail_in >= 1);
195 assert(s->avail_out >= 1);
196
197#if 0
198 unsigned c = 0;
199 while (RLCOMP_getchar(s,c)) {
200 switch(s->compression) {
201 case 0: // This is not a compressed block
202 s->block_len--;
203 if (!RLCOMP_putchar(s,c))
204 return false;
205 break;
206
207 case 1: // This is a comperssed block
208 while (s->block_len-- > 0)
209 if (!RLCOMP_putchar(s,c))
210 return false;
211 break;
212
213 case 2: // This is the length field
214 if (c < 128) {
215 s->compression = 0;
216 s->block_len = c;
217 } else {
218 s->compression = 1;
219 s->block_len = c - 128;
220 }
221 continue;
222
223 default: // oops!
224 throw std::string("Invalid compression state");
225 }
226 if (s->block_len <= 0)
227 s->compression = 2;
228 }
229
230 if (s->repeat > 0)
231 throw std::string("Invalid compression state");
232#endif
233 if (s->avail_out >= s->avail_in) {
234 ::memcpy(s->next_out, s->next_in, s->avail_in);
235 s->output_count += s->avail_in;
236 s->avail_out -= s->avail_in;
237 s->next_in += s->avail_in;
238 s->avail_in = 0;
239 return true;
240 } else {
241 ::memcpy(s->next_out, s->next_in, s->avail_out);
242 s->output_count += s->avail_out;
243 s->avail_in -= s->avail_out;
244 s->next_in += s->avail_out;
245 s->avail_out = 0;
246 return false;
247 }
248}
249
250void RLCOMP_end(RLCOMP_stream* strm) {
251}
252
253}
254
255namespace llvm {
256
257// Compress in one of three ways
Reid Spencer469c34b2004-10-04 17:29:25 +0000258uint64_t Compressor::compress(char* in, unsigned size, OutputDataCallback* cb,
259 Algorithm hint, void* context ) {
Reid Spencer460eb632004-10-04 10:49:41 +0000260 assert(in && "Can't compress null buffer");
261 assert(size && "Can't compress empty buffer");
262 assert(cb && "Can't compress without a callback function");
263
264 uint64_t result = 0;
265
266 switch (hint) {
267 case COMP_TYPE_BZIP2: {
268#if defined(HAVE_BZIP2)
269 // Set up the bz_stream
270 bz_stream bzdata;
271 bzdata.bzalloc = 0;
272 bzdata.bzfree = 0;
273 bzdata.opaque = 0;
274 bzdata.next_in = in;
275 bzdata.avail_in = size;
276 bzdata.next_out = 0;
277 bzdata.avail_out = 0;
278 switch ( BZ2_bzCompressInit(&bzdata, 9, 0, 0) ) {
279 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
280 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
281 case BZ_MEM_ERROR: throw std::string("Out of memory");
282 case BZ_OK:
283 default:
284 break;
285 }
286
287 // Get a block of memory
Reid Spencer469c34b2004-10-04 17:29:25 +0000288 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000289 BZ2_bzCompressEnd(&bzdata);
290 throw std::string("Can't allocate output buffer");
291 }
292
293 // Put compression code in first byte
294 (*bzdata.next_out++) = COMP_TYPE_BZIP2;
295 bzdata.avail_out--;
296
297 // Compress it
298 int bzerr = BZ_FINISH_OK;
299 while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000300 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000301 BZ2_bzCompressEnd(&bzdata);
302 throw std::string("Can't allocate output buffer");
303 }
304 }
305 switch (bzerr) {
306 case BZ_SEQUENCE_ERROR:
307 case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
308 case BZ_FINISH_OK:
309 case BZ_STREAM_END: break;
310 default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
311 }
312
313 // Finish
314 result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
315 bzdata.total_out_lo32 + 1;
316
317 BZ2_bzCompressEnd(&bzdata);
318 break;
319#else
320 // FALL THROUGH
321#endif
322 }
323
324 case COMP_TYPE_ZLIB: {
325#if defined(HAVE_ZLIB)
326 z_stream zdata;
327 zdata.zalloc = Z_NULL;
328 zdata.zfree = Z_NULL;
329 zdata.opaque = Z_NULL;
330 zdata.next_in = reinterpret_cast<Bytef*>(in);
331 zdata.avail_in = size;
332 if (Z_OK != deflateInit(&zdata,Z_BEST_COMPRESSION))
333 throw std::string(zdata.msg ? zdata.msg : "zlib error");
334
Reid Spencer469c34b2004-10-04 17:29:25 +0000335 if (0 != getdata((char*&)(zdata.next_out), zdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000336 deflateEnd(&zdata);
337 throw std::string("Can't allocate output buffer");
338 }
339
340 (*zdata.next_out++) = COMP_TYPE_ZLIB;
341 zdata.avail_out--;
342
343 int flush = 0;
344 while ( Z_OK == deflate(&zdata,0) && zdata.avail_out == 0) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000345 if (0 != getdata((char*&)zdata.next_out, zdata.avail_out, cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000346 deflateEnd(&zdata);
347 throw std::string("Can't allocate output buffer");
348 }
349 }
350
351 while ( Z_STREAM_END != deflate(&zdata, Z_FINISH)) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000352 if (0 != getdata((char*&)zdata.next_out, zdata.avail_out, cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000353 deflateEnd(&zdata);
354 throw std::string("Can't allocate output buffer");
355 }
356 }
357
358 result = static_cast<uint64_t>(zdata.total_out) + 1;
359 deflateEnd(&zdata);
360 break;
361
362#else
363 // FALL THROUGH
364#endif
365 }
366
367 case COMP_TYPE_SIMPLE: {
368 RLCOMP_stream sdata;
369 sdata.next_in = in;
370 sdata.avail_in = size;
371 RLCOMP_init(&sdata);
372
Reid Spencer469c34b2004-10-04 17:29:25 +0000373 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000374 throw std::string("Can't allocate output buffer");
375 }
376
377 *(sdata.next_out++) = COMP_TYPE_SIMPLE;
378 sdata.avail_out--;
379
380 while (!RLCOMP_compress(&sdata)) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000381 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000382 throw std::string("Can't allocate output buffer");
383 }
384 }
385
386 result = sdata.output_count + 1;
387 RLCOMP_end(&sdata);
388 break;
389 }
390 default:
391 throw std::string("Invalid compression type hint");
392 }
393 return result;
394}
395
396// Decompress in one of three ways
397uint64_t Compressor::decompress(char *in, unsigned size,
Reid Spencer469c34b2004-10-04 17:29:25 +0000398 OutputDataCallback* cb, void* context) {
Reid Spencer460eb632004-10-04 10:49:41 +0000399 assert(in && "Can't decompress null buffer");
400 assert(size > 1 && "Can't decompress empty buffer");
401 assert(cb && "Can't decompress without a callback function");
402
403 uint64_t result = 0;
404
405 switch (*in++) {
406 case COMP_TYPE_BZIP2: {
407#if !defined(HAVE_BZIP2)
408 throw std::string("Can't decompress BZIP2 data");
409#else
410 // Set up the bz_stream
411 bz_stream bzdata;
412 bzdata.bzalloc = 0;
413 bzdata.bzfree = 0;
414 bzdata.opaque = 0;
415 bzdata.next_in = in;
416 bzdata.avail_in = size - 1;
417 bzdata.next_out = 0;
418 bzdata.avail_out = 0;
419 switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
420 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
421 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
422 case BZ_MEM_ERROR: throw std::string("Out of memory");
423 case BZ_OK:
424 default:
425 break;
426 }
427
428 // Get a block of memory
Reid Spencer469c34b2004-10-04 17:29:25 +0000429 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000430 BZ2_bzDecompressEnd(&bzdata);
431 throw std::string("Can't allocate output buffer");
432 }
433
434 // Decompress it
435 int bzerr = BZ_OK;
436 while (BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata))) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000437 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000438 BZ2_bzDecompressEnd(&bzdata);
439 throw std::string("Can't allocate output buffer");
440 }
441 }
442
443 switch (bzerr) {
444 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
445 case BZ_MEM_ERROR: throw std::string("Out of memory");
446 case BZ_DATA_ERROR: throw std::string("Data integrity error");
447 case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
448 default: throw("Ooops");
449 case BZ_STREAM_END:
450 break;
451 }
452
453 // Finish
454 result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
455 bzdata.total_out_lo32;
456 BZ2_bzDecompressEnd(&bzdata);
457 break;
Reid Spencer460eb632004-10-04 10:49:41 +0000458#endif
Chris Lattnerebe989c2004-10-04 16:33:25 +0000459 }
Reid Spencer460eb632004-10-04 10:49:41 +0000460
461 case COMP_TYPE_ZLIB: {
462#if !defined(HAVE_ZLIB)
463 throw std::string("Can't decompress ZLIB data");
464#else
465 z_stream zdata;
466 zdata.zalloc = Z_NULL;
467 zdata.zfree = Z_NULL;
468 zdata.opaque = Z_NULL;
469 zdata.next_in = reinterpret_cast<Bytef*>(in);
470 zdata.avail_in = size - 1;
471 if ( Z_OK != inflateInit(&zdata))
472 throw std::string(zdata.msg ? zdata.msg : "zlib error");
473
Reid Spencer469c34b2004-10-04 17:29:25 +0000474 if (0 != getdata((char*&)zdata.next_out, zdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000475 inflateEnd(&zdata);
476 throw std::string("Can't allocate output buffer");
477 }
478
479 int zerr = Z_OK;
480 while (Z_OK == (zerr = inflate(&zdata,0))) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000481 if (0 != getdata((char*&)zdata.next_out, zdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000482 inflateEnd(&zdata);
483 throw std::string("Can't allocate output buffer");
484 }
485 }
486
487 if (zerr != Z_STREAM_END)
488 throw std::string(zdata.msg?zdata.msg:"zlib error");
489
490 result = static_cast<uint64_t>(zdata.total_out);
491 inflateEnd(&zdata);
492 break;
493#endif
494 }
495
496 case COMP_TYPE_SIMPLE: {
497 RLCOMP_stream sdata;
498 sdata.next_in = in;
499 sdata.avail_in = size - 1;
500 RLCOMP_init(&sdata);
501
Reid Spencer469c34b2004-10-04 17:29:25 +0000502 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000503 throw std::string("Can't allocate output buffer");
504 }
505
506 while (!RLCOMP_decompress(&sdata)) {
Reid Spencer469c34b2004-10-04 17:29:25 +0000507 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
Reid Spencer460eb632004-10-04 10:49:41 +0000508 throw std::string("Can't allocate output buffer");
509 }
510 }
511
512 result = sdata.output_count;
513 RLCOMP_end(&sdata);
514 break;
515 }
516
517 default:
518 throw std::string("Unknown type of compressed data");
519 }
520
521 return result;
522}
523
524}
525
526// vim: sw=2 ai