blob: 2f97a33d52d3b3558c53d5bef4fb1d76010d0325 [file] [log] [blame]
Jack Jansenfcdffea1995-08-07 14:36:51 +00001"""binhex - Macintosh binhex compression/decompression
2easy interface:
3binhex(inputfilename, outputfilename)
4hexbin(inputfilename, outputfilename)
5"""
6
7#
8# Jack Jansen, CWI, August 1995.
9#
10# The module is supposed to be as compatible as possible. Especially the
11# easy interface should work "as expected" on any platform.
12# XXXX Note: currently, textfiles appear in mac-form on all platforms.
13# We seem to lack a simple character-translate in python.
14# (we should probably use ISO-Latin-1 on all but the mac platform).
15# XXXX The simeple routines are too simple: they expect to hold the complete
16# files in-core. Should be fixed.
17# XXXX It would be nice to handle AppleDouble format on unix (for servers serving
18# macs).
19# XXXX I don't understand what happens when you get 0x90 times the same byte on
20# input. The resulting code (xx 90 90) would appear to be interpreted as an
21# escaped *value* of 0x90. All coders I've seen appear to ignore this nicety...
22#
23import sys
24import os
25import struct
26import string
27import binascii
28
Jack Jansen479c1b31995-08-14 12:41:20 +000029DEBUG=0
30if DEBUG:
31 testf=open('@binhex.dbg.out', 'w')
32
Jack Jansenfcdffea1995-08-07 14:36:51 +000033Error = 'binhex.Error'
34
35# States (what have we written)
36[_DID_HEADER, _DID_DATA, _DID_RSRC] = range(3)
37
38# Various constants
39REASONABLY_LARGE=32768 # Minimal amount we pass the rle-coder
40LINELEN=48 # What we pass to hqx-coder at once
41 # *NOTE* Must be divisible by 3!
42RUNCHAR=chr(0x90) # run-length introducer
43
44#
45# The code is currently byte-order dependent
46if struct.pack('i', 0177) != '\0\0\0\177':
47 raise ImportError, 'Module binhex is big-endian only'
48
49#
50# Workarounds for non-mac machines.
51if os.name == 'mac':
52 import macfs
53
54 def FInfo():
55 return macfs.FInfo()
56
57 def getfileinfo(name):
58 finfo = macfs.FSSpec(name).GetFInfo()
59 dir, file = os.path.split(name)
60 # XXXX Get resource/data sizes
61 fp = open(name, 'rb')
62 fp.seek(0, 2)
63 dlen = fp.tell()
64 fp = open(name, '*rb')
65 fp.seek(0, 2)
66 rlen = fp.tell()
67 return file, finfo, dlen, rlen
68
69 def openrsrc(name, *mode):
70 if mode:
71 mode = mode[0]
72 else:
73 mode = 'rb'
74 mode = '*' + mode
75 return open(name, mode)
76
77else:
78 #
79 # Glue code for non-macintosh useage
80 #
81 import regsub
82
83 class FInfo:
84 def __init__(self):
85 self.Type = '????'
86 self.Creator = '????'
87 self.Flags = 0
88
89 def getfileinfo(name):
90 finfo = FInfo()
91 # Quick check for textfile
92 fp = open(name)
93 data = open(name).read(256)
94 for c in data:
95 if not c in string.whitespace and (c<' ' or ord(c) > 0177):
96 break
97 else:
98 finfo.Type = 'TEXT'
99 fp.seek(0, 2)
100 dsize = fp.tell()
101 fp.close()
102 dir, file = os.path.split(name)
103 file = regsub.sub(':', '-', file)
104 return file, finfo, dsize, 0
105
106 class openrsrc:
107 def __init__(self, *args):
108 pass
109
110 def read(self, *args):
111 return ''
112
113 def write(self, *args):
114 pass
115
116 def close(self):
117 pass
118
119class _Hqxcoderengine:
120 """Write data to the coder in 3-byte chunks"""
121
122 def __init__(self, ofp):
123 self.ofp = ofp
124 self.data = ''
125
126 def write(self, data):
127 self.data = self.data + data
128 while len(self.data) > LINELEN:
129 hqxdata = binascii.b2a_hqx(self.data[:LINELEN])
130 self.ofp.write(hqxdata+'\n')
131 self.data = self.data[LINELEN:]
132
133 def close(self):
134 if self.data:
135 self.ofp.write(binascii.b2a_hqx(self.data))
136 self.ofp.write(':\n')
137 self.ofp.close()
138
139class _Rlecoderengine:
140 """Write data to the RLE-coder in suitably large chunks"""
141
142 def __init__(self, ofp):
143 self.ofp = ofp
144 self.data = ''
145
146 def write(self, data):
Jack Jansenac7c0df1995-08-17 14:17:39 +0000147 if DEBUG:
Jack Jansen479c1b31995-08-14 12:41:20 +0000148 testf.write(data) # XXXX
Jack Jansenfcdffea1995-08-07 14:36:51 +0000149 self.data = self.data + data
150 if len(self.data) < REASONABLY_LARGE:
151 return
152 rledata = binascii.rlecode_hqx(self.data)
153 self.ofp.write(rledata)
154 self.data = ''
155
156 def close(self):
157 if self.data:
158 rledata = binascii.rlecode_hqx(self.data)
159 self.ofp.write(rledata)
160 self.ofp.close()
161
162class BinHex:
163 def __init__(self, (name, finfo, dlen, rlen), ofp):
164 if type(ofp) == type(''):
165 ofname = ofp
166 ofp = open(ofname, 'w')
167 if os.name == 'mac':
168 fss = macfs.FSSpec(ofname)
169 fss.SetCreatorType('BnHq', 'TEXT')
170 ofp.write('(This file may be decompressed with BinHex 4.0)\n\n:')
171 hqxer = _Hqxcoderengine(ofp)
172 self.ofp = _Rlecoderengine(hqxer)
173 self.crc = 0
174 if finfo == None:
175 finfo = FInfo()
176 self.dlen = dlen
177 self.rlen = rlen
178 self._writeinfo(name, finfo)
179 self.state = _DID_HEADER
180
181 def _writeinfo(self, name, finfo):
182 if DEBUG:
183 print 'binhex info:', name, finfo.Type, finfo.Creator, self.dlen, self.rlen
184 name = name
185 nl = len(name)
186 if nl > 63:
187 raise Error, 'Filename too long'
188 d = chr(nl) + name + '\0'
189 d2 = finfo.Type + finfo.Creator
190 d3 = struct.pack('h', finfo.Flags)
191 d4 = struct.pack('ii', self.dlen, self.rlen)
192 info = d + d2 + d3 + d4
193 self._write(info)
194 self._writecrc()
195
196 def _write(self, data):
197 self.crc = binascii.crc_hqx(data, self.crc)
198 self.ofp.write(data)
199
200 def _writecrc(self):
Jack Jansen685e16d1995-10-03 14:41:15 +0000201## self.crc = binascii.crc_hqx('\0\0', self.crc) # XXXX Should this be here??
Jack Jansenfcdffea1995-08-07 14:36:51 +0000202 self.ofp.write(struct.pack('h', self.crc))
203 self.crc = 0
204
205 def write(self, data):
206 if self.state != _DID_HEADER:
207 raise Error, 'Writing data at the wrong time'
208 self.dlen = self.dlen - len(data)
209 self._write(data)
210
211 def close_data(self):
212 if self.dlen <> 0:
213 raise Error, 'Incorrect data size, diff='+`self.rlen`
214 self._writecrc()
215 self.state = _DID_DATA
216
217 def write_rsrc(self, data):
218 if self.state < _DID_DATA:
219 self.close_data()
220 if self.state != _DID_DATA:
221 raise Error, 'Writing resource data at the wrong time'
222 self.rlen = self.rlen - len(data)
223 self._write(data)
224
225 def close(self):
226 if self.state < _DID_DATA:
227 self.close_data()
228 if self.state != _DID_DATA:
229 raise Error, 'Close at the wrong time'
230 if self.rlen <> 0:
231 raise Error, "Incorrect resource-datasize, diff="+`self.rlen`
232 self._writecrc()
233 self.ofp.close()
234 self.state = None
235
236def binhex(inp, out):
237 """(infilename, outfilename) - Create binhex-encoded copy of a file"""
238 finfo = getfileinfo(inp)
239 ofp = BinHex(finfo, out)
240
241 ifp = open(inp, 'rb')
242 # XXXX Do textfile translation on non-mac systems
243 d = ifp.read()
244 ofp.write(d)
245 ofp.close_data()
246 ifp.close()
247
248 ifp = openrsrc(inp, 'rb')
249 d = ifp.read()
250 ofp.write_rsrc(d)
251 ofp.close()
252 ifp.close()
253
254class _Hqxdecoderengine:
255 """Read data via the decoder in 4-byte chunks"""
256
257 def __init__(self, ifp):
258 self.ifp = ifp
259 self.eof = 0
260
Jack Jansen685e16d1995-10-03 14:41:15 +0000261 def read(self, totalwtd):
Jack Jansenfcdffea1995-08-07 14:36:51 +0000262 """Read at least wtd bytes (or until EOF)"""
263 decdata = ''
Jack Jansen685e16d1995-10-03 14:41:15 +0000264 wtd = totalwtd
Jack Jansenfcdffea1995-08-07 14:36:51 +0000265 #
266 # The loop here is convoluted, since we don't really now how much
267 # to decode: there may be newlines in the incoming data.
268 while wtd > 0:
269 if self.eof: return decdata
270 wtd = ((wtd+2)/3)*4
271 data = self.ifp.read(wtd)
272 #
273 # Next problem: there may not be a complete number of bytes in what we
274 # pass to a2b. Solve by yet another loop.
275 #
276 while 1:
277 try:
278 decdatacur, self.eof = binascii.a2b_hqx(data)
279 if self.eof: print 'EOF'
280 break
281 except binascii.Incomplete:
282 pass
283 newdata = self.ifp.read(1)
284 if not newdata:
285 raise Error, 'Premature EOF on binhex file'
286 data = data + newdata
287 decdata = decdata + decdatacur
Jack Jansen685e16d1995-10-03 14:41:15 +0000288 wtd = totalwtd - len(decdata)
Jack Jansenfcdffea1995-08-07 14:36:51 +0000289 if not decdata and not self.eof:
290 raise Error, 'Premature EOF on binhex file'
291 return decdata
292
293 def close(self):
294 self.ifp.close()
295
296class _Rledecoderengine:
297 """Read data via the RLE-coder"""
298
299 def __init__(self, ifp):
300 self.ifp = ifp
301 self.pre_buffer = ''
302 self.post_buffer = ''
303 self.eof = 0
304
305 def read(self, wtd):
306 if wtd > len(self.post_buffer):
307 self._fill(wtd-len(self.post_buffer))
308 rv = self.post_buffer[:wtd]
309 self.post_buffer = self.post_buffer[wtd:]
310 print 'WTD', wtd, 'GOT', len(rv)
311 return rv
312
313 def _fill(self, wtd):
314 #
315 # Obfuscated code ahead. We keep at least one byte in the pre_buffer,
316 # so we don't stumble over an orphaned RUNCHAR later on. If the
317 # last or second-last char is a RUNCHAR we keep more bytes.
318 #
319 self.pre_buffer = self.pre_buffer + self.ifp.read(wtd+2)
320 if self.ifp.eof:
321 self.post_buffer = self.post_buffer + \
322 binascii.rledecode_hqx(self.pre_buffer)
323 self.pre_buffer = ''
324 return
325
326 lastrle = string.rfind(self.pre_buffer, RUNCHAR)
327 if lastrle > 0 and lastrle == len(self.pre_buffer)-1:
328 # Last byte is an RLE, keep two bytes
329 mark = len(self.pre_buffer)-2
330 elif lastrle > 0 and lastrle == len(self.pre_buffer)-2:
331 # second-last byte is an RLE. Decode all.
332 mark = len(self.pre_buffer)
333 else:
334 mark = len(self.pre_buffer)-1
335 self.post_buffer = self.post_buffer + \
336 binascii.rledecode_hqx(self.pre_buffer[:mark])
337 self.pre_buffer = self.pre_buffer[mark:]
338
339 def close(self):
340 self.ifp.close()
341
342class HexBin:
343 def __init__(self, ifp):
344 if type(ifp) == type(''):
345 ifp = open(ifp)
346 #
347 # Find initial colon.
348 #
349 while 1:
350 ch = ifp.read(1)
351 if not ch:
352 raise Error, "No binhex data found"
353 if ch == ':':
354 break
355 if ch != '\n':
356 dummy = ifp.readline()
357 if DEBUG:
358 print 'SKIP:', ch+dummy
359
360 hqxifp = _Hqxdecoderengine(ifp)
361 self.ifp = _Rledecoderengine(hqxifp)
362 self.crc = 0
363 self._readheader()
364
365 def _read(self, len):
366 data = self.ifp.read(len)
367 self.crc = binascii.crc_hqx(data, self.crc)
368 return data
369
370 def _checkcrc(self):
371 filecrc = struct.unpack('h', self.ifp.read(2))[0] & 0xffff
Jack Jansen685e16d1995-10-03 14:41:15 +0000372## self.crc = binascii.crc_hqx('\0\0', self.crc) # XXXX Is this needed??
Jack Jansenfcdffea1995-08-07 14:36:51 +0000373 self.crc = self.crc & 0xffff
374 if DEBUG:
375 print 'DBG CRC %x %x'%(self.crc, filecrc)
Jack Jansen685e16d1995-10-03 14:41:15 +0000376 if filecrc != self.crc:
377 raise Error, 'CRC error, computed %x, read %x'%(self.crc, filecrc)
Jack Jansenfcdffea1995-08-07 14:36:51 +0000378 self.crc = 0
379
380 def _readheader(self):
381 len = self._read(1)
382 fname = self._read(ord(len))
383 rest = self._read(1+4+4+2+4+4)
384 self._checkcrc()
385
386 type = rest[1:5]
387 creator = rest[5:9]
388 flags = struct.unpack('h', rest[9:11])[0]
389 self.dlen = struct.unpack('l', rest[11:15])[0]
390 self.rlen = struct.unpack('l', rest[15:19])[0]
391
392 if DEBUG:
393 print 'DATA, RLEN', self.dlen, self.rlen
394
395 self.FName = fname
396 self.FInfo = FInfo()
397 self.FInfo.Creator = creator
398 self.FInfo.Type = type
399 self.FInfo.Flags = flags
400
401 self.state = _DID_HEADER
402
403 def read(self, *n):
404 if self.state != _DID_HEADER:
405 raise Error, 'Read data at wrong time'
406 if n:
407 n = n[0]
408 n = min(n, self.dlen)
409 else:
410 n = self.dlen
411 self.dlen = self.dlen - n
412 return self._read(n)
413
414 def close_data(self):
415 if self.state != _DID_HEADER:
416 raise Error, 'close_data at wrong time'
417 if self.dlen:
418 dummy = self._read(self.dlen)
419 self._checkcrc()
420 self.state = _DID_DATA
421
422 def read_rsrc(self, *n):
423 if self.state == _DID_HEADER:
424 self.close_data()
425 if self.state != _DID_DATA:
426 raise Error, 'Read resource data at wrong time'
427 if n:
428 n = n[0]
429 n = min(n, self.rlen)
430 else:
431 n = self.rlen
432 self.rlen = self.rlen - n
433 return self._read(n)
434
435 def close(self):
436 if self.rlen:
437 dummy = self.read_rsrc(self.rlen)
438 self._checkcrc()
439 self.state = _DID_RSRC
440 self.ifp.close()
441
442def hexbin(inp, out):
443 """(infilename, outfilename) - Decode binhexed file"""
444 ifp = HexBin(inp)
445 finfo = ifp.FInfo
446 if not out:
447 out = ifp.FName
448 if os.name == 'mac':
449 ofss = macfs.FSSpec(out)
450 out = ofss.as_pathname()
451
452 ofp = open(out, 'wb')
453 # XXXX Do translation on non-mac systems
454 d = ifp.read()
455 ofp.write(d)
456 ofp.close()
457 ifp.close_data()
458
459 d = ifp.read_rsrc()
460 if d:
461 ofp = openrsrc(out, 'wb')
462 ofp.write(d)
463 ofp.close()
464
465 if os.name == 'mac':
466 nfinfo = ofss.GetFInfo()
467 nfinfo.Creator = finfo.Creator
468 nfinfo.Type = finfo.Type
469 nfinfo.Flags = finfo.Flags
470 ofss.SetFInfo(nfinfo)
471
472 ifp.close()
473
474def _test():
475 if os.name == 'mac':
Jack Jansen479c1b31995-08-14 12:41:20 +0000476 fss, ok = macfs.PromptGetFile('File to convert:')
Jack Jansenfcdffea1995-08-07 14:36:51 +0000477 if not ok:
478 sys.exit(0)
479 fname = fss.as_pathname()
480 else:
481 fname = sys.argv[1]
482 #binhex(fname, fname+'.hqx')
483 #hexbin(fname+'.hqx', fname+'.viahqx')
484 hexbin(fname, fname+'.unpacked')
485 sys.exit(1)
486
487if __name__ == '__main__':
488 _test()
489