| import StringIO |
| import regex |
| import string |
| import eexec |
| import types |
| from psOperators import * |
| |
| |
| ps_special = '()<>[]{}%' # / is one too, but we take care of that one differently |
| |
| whitespace = string.whitespace |
| skipwhiteRE = regex.compile("[%s]*" % whitespace) |
| |
| endofthingPat = "[^][(){}<>/%s%s]*" % ('%', whitespace) |
| endofthingRE = regex.compile(endofthingPat) |
| |
| commentRE = regex.compile("%[^\n\r]*") |
| |
| # XXX This not entirely correct: |
| stringPat = """ |
| ( |
| \( |
| \( |
| [^()]* \\\\ [()] |
| \) |
| \| |
| \( |
| [^()]* ( [^()]* ) |
| \) |
| \)* |
| [^()]* |
| ) |
| """ |
| stringPat = string.join(string.split(stringPat), '') |
| stringRE = regex.compile(stringPat) |
| |
| hexstringRE = regex.compile("<[%s0-9A-Fa-f]*>" % whitespace) |
| |
| ps_tokenerror = 'ps_tokenerror' |
| ps_error = 'ps_error' |
| |
| class PSTokenizer(StringIO.StringIO): |
| |
| def getnexttoken(self, |
| # localize some stuff, for performance |
| len = len, |
| ps_special = ps_special, |
| stringmatch = stringRE.match, |
| hexstringmatch = hexstringRE.match, |
| commentmatch = commentRE.match, |
| endmatch = endofthingRE.match, |
| whitematch = skipwhiteRE.match): |
| |
| self.pos = self.pos + whitematch(self.buf, self.pos) |
| if self.pos >= self.len: |
| return None, None |
| pos = self.pos |
| buf = self.buf |
| char = buf[pos] |
| if char in ps_special: |
| if char in '{}[]': |
| tokentype = 'do_special' |
| token = char |
| elif char == '%': |
| tokentype = 'do_comment' |
| commentlen = commentmatch(buf, pos) |
| token = buf[pos:pos+commentlen] |
| elif char == '(': |
| tokentype = 'do_string' |
| strlen = stringmatch(buf, pos) |
| if strlen < 0: |
| raise ps_tokenerror, 'bad string at character %d' % pos |
| token = buf[pos:pos+strlen] |
| elif char == '<': |
| tokentype = 'do_hexstring' |
| strlen = hexstringmatch(buf, pos) |
| if strlen < 0: |
| raise ps_tokenerror, 'bad hexstring at character %d' % pos |
| token = buf[pos:pos+strlen] |
| else: |
| raise ps_tokenerror, 'bad token at character %d' % pos |
| else: |
| if char == '/': |
| tokentype = 'do_literal' |
| endofthing = endmatch(buf, pos + 1) + 1 |
| else: |
| tokentype = '' |
| endofthing = endmatch(buf, pos) |
| if endofthing <= 0: |
| raise ps_tokenerror, 'bad token at character %d' % pos |
| token = buf[pos:pos + endofthing] |
| self.pos = pos + len(token) |
| return tokentype, token |
| |
| def skipwhite(self, whitematch = skipwhiteRE.match): |
| self.pos = self.pos + whitematch(self.buf, self.pos) |
| |
| def starteexec(self): |
| self.pos = self.pos + 1 |
| #self.skipwhite() |
| self.dirtybuf = self.buf[self.pos:] |
| self.buf, R = eexec.Decrypt(self.dirtybuf, 55665) |
| self.len = len(self.buf) |
| self.pos = 4 |
| |
| def stopeexec(self): |
| if not hasattr(self, 'dirtybuf'): |
| return |
| self.buf = self.dirtybuf |
| del self.dirtybuf |
| |
| def flush(self): |
| if self.buflist: |
| self.buf = self.buf + string.join(self.buflist, '') |
| self.buflist = [] |
| |
| |
| class PSInterpreter(PSOperators): |
| |
| def __init__(self): |
| systemdict = {} |
| userdict = {} |
| self.dictstack = [systemdict, userdict] |
| self.stack = [] |
| self.proclevel = 0 |
| self.procmark = ps_procmark() |
| self.fillsystemdict() |
| |
| def fillsystemdict(self): |
| systemdict = self.dictstack[0] |
| systemdict['['] = systemdict['mark'] = self.mark = ps_mark() |
| systemdict[']'] = ps_operator(']', self.do_makearray) |
| systemdict['true'] = ps_boolean(1) |
| systemdict['false'] = ps_boolean(0) |
| systemdict['StandardEncoding'] = ps_array(ps_StandardEncoding) |
| systemdict['FontDirectory'] = ps_dict({}) |
| self.suckoperators(systemdict, self.__class__) |
| |
| def suckoperators(self, systemdict, klass): |
| for name in dir(klass): |
| attr = getattr(self, name) |
| if callable(attr) and name[:3] == 'ps_': |
| name = name[3:] |
| systemdict[name] = ps_operator(name, attr) |
| for baseclass in klass.__bases__: |
| self.suckoperators(systemdict, baseclass) |
| |
| def interpret(self, data, getattr = getattr): |
| tokenizer = self.tokenizer = PSTokenizer(data) |
| getnexttoken = tokenizer.getnexttoken |
| do_token = self.do_token |
| handle_object = self.handle_object |
| try: |
| while 1: |
| tokentype, token = getnexttoken() |
| #print token |
| if not token: |
| break |
| if tokentype: |
| handler = getattr(self, tokentype) |
| object = handler(token) |
| else: |
| object = do_token(token) |
| if object is not None: |
| handle_object(object) |
| tokenizer.close() |
| self.tokenizer = None |
| finally: |
| if self.tokenizer is not None: |
| print 'ps error:\n- - - - - - -' |
| print self.tokenizer.buf[self.tokenizer.pos-50:self.tokenizer.pos] |
| print '>>>' |
| print self.tokenizer.buf[self.tokenizer.pos:self.tokenizer.pos+50] |
| print '- - - - - - -' |
| |
| def handle_object(self, object): |
| if not (self.proclevel or object.literal or object.type == 'proceduretype'): |
| if object.type <> 'operatortype': |
| object = self.resolve_name(object.value) |
| if object.literal: |
| self.push(object) |
| else: |
| if object.type == 'proceduretype': |
| self.call_procedure(object) |
| else: |
| object.function() |
| else: |
| self.push(object) |
| |
| def call_procedure(self, proc): |
| handle_object = self.handle_object |
| for item in proc.value: |
| handle_object(item) |
| |
| def resolve_name(self, name): |
| dictstack = self.dictstack |
| for i in range(len(dictstack)-1, -1, -1): |
| if dictstack[i].has_key(name): |
| return dictstack[i][name] |
| raise ps_error, 'name error: ' + str(name) |
| |
| def do_token(self, token, |
| atoi = string.atoi, |
| atof = string.atof, |
| ps_name = ps_name, |
| ps_integer = ps_integer, |
| ps_real = ps_real): |
| try: |
| num = atoi(token) |
| except (ValueError, OverflowError): |
| try: |
| num = atof(token) |
| except (ValueError, OverflowError): |
| if '#' in token: |
| hashpos = string.find(token, '#') |
| try: |
| base = string.atoi(token[:hashpos]) |
| num = string.atoi(token[hashpos+1:], base) |
| except (ValueError, OverflowError): |
| return ps_name(token) |
| else: |
| return ps_integer(num) |
| else: |
| return ps_name(token) |
| else: |
| return ps_real(num) |
| else: |
| return ps_integer(num) |
| |
| def do_comment(self, token): |
| pass |
| |
| def do_literal(self, token): |
| return ps_literal(token[1:]) |
| |
| def do_string(self, token): |
| return ps_string(token[1:-1]) |
| |
| def do_hexstring(self, token): |
| hexStr = string.join(string.split(token[1:-1]), '') |
| if len(hexStr) % 2: |
| hexStr = hexStr + '0' |
| cleanstr = [] |
| for i in range(0, len(hexStr), 2): |
| cleanstr.append(chr(string.atoi(hexStr[i:i+2], 16))) |
| cleanstr = string.join(cleanstr, '') |
| return ps_string(cleanstr) |
| |
| def do_special(self, token): |
| if token == '{': |
| self.proclevel = self.proclevel + 1 |
| return self.procmark |
| elif token == '}': |
| proc = [] |
| while 1: |
| topobject = self.pop() |
| if topobject == self.procmark: |
| break |
| proc.append(topobject) |
| self.proclevel = self.proclevel - 1 |
| proc.reverse() |
| return ps_procedure(proc) |
| elif token == '[': |
| return self.mark |
| elif token == ']': |
| return ps_name(']') |
| else: |
| raise ps_tokenerror, 'huh?' |
| |
| def push(self, object): |
| self.stack.append(object) |
| |
| def pop(self, *types): |
| stack = self.stack |
| if not stack: |
| raise ps_error, 'stack underflow' |
| object = stack[-1] |
| if types: |
| if object.type not in types: |
| raise ps_error, 'typecheck, expected %s, found %s' % (`types`, object.type) |
| del stack[-1] |
| return object |
| |
| def do_makearray(self): |
| array = [] |
| while 1: |
| topobject = self.pop() |
| if topobject == self.mark: |
| break |
| array.append(topobject) |
| array.reverse() |
| self.push(ps_array(array)) |
| |
| def close(self): |
| """Remove circular references.""" |
| del self.stack |
| del self.dictstack |
| |
| |
| def unpack_item(item): |
| tp = type(item.value) |
| if tp == types.DictionaryType: |
| newitem = {} |
| for key, value in item.value.items(): |
| newitem[key] = unpack_item(value) |
| elif tp == types.ListType: |
| newitem = [None] * len(item.value) |
| for i in range(len(item.value)): |
| newitem[i] = unpack_item(item.value[i]) |
| if item.type == 'proceduretype': |
| newitem = tuple(newitem) |
| else: |
| newitem = item.value |
| return newitem |
| |
| def suckfont(data): |
| import re |
| m = re.search(r"/FontName\s+/([^ \t\n\r]+)\s+def", data) |
| if m: |
| fontName = m.group(1) |
| else: |
| fontName = None |
| interpreter = PSInterpreter() |
| interpreter.interpret("/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop") |
| interpreter.interpret(data) |
| fontdir = interpreter.dictstack[0]['FontDirectory'].value |
| if fontdir.has_key(fontName): |
| rawfont = fontdir[fontName] |
| else: |
| # fall back, in case fontName wasn't found |
| fontNames = fontdir.keys() |
| if len(fontNames) > 1: |
| fontNames.remove("Helvetica") |
| fontNames.sort() |
| rawfont = fontdir[fontNames[0]] |
| interpreter.close() |
| return unpack_item(rawfont) |
| |
| |
| if __name__ == "__main__": |
| import macfs |
| fss, ok = macfs.StandardGetFile("LWFN") |
| if ok: |
| import t1Lib |
| data, kind = t1Lib.read(fss.as_pathname()) |
| font = suckfont(data) |