| # Conversion pipeline templates |
| # ============================= |
| |
| |
| # The problem: |
| # ------------ |
| # |
| # Suppose you have some data that you want to convert to another format |
| # (e.g. from GIF image format to PPM image format). Maybe the |
| # conversion involves several steps (e.g. piping it through compress or |
| # uuencode). Some of the conversion steps may require that their input |
| # is a disk file, others may be able to read standard input; similar for |
| # their output. The input to the entire conversion may also be read |
| # from a disk file or from an open file, and similar for its output. |
| # |
| # The module lets you construct a pipeline template by sticking one or |
| # more conversion steps together. It will take care of creating and |
| # removing temporary files if they are necessary to hold intermediate |
| # data. You can then use the template to do conversions from many |
| # different sources to many different destinations. The temporary |
| # file names used are different each time the template is used. |
| # |
| # The templates are objects so you can create templates for many |
| # different conversion steps and store them in a dictionary, for |
| # instance. |
| |
| |
| # Directions: |
| # ----------- |
| # |
| # To create a template: |
| # t = Template() |
| # |
| # To add a conversion step to a template: |
| # t.append(command, kind) |
| # where kind is a string of two characters: the first is '-' if the |
| # command reads its standard input or 'f' if it requires a file; the |
| # second likewise for the output. The command must be valid /bin/sh |
| # syntax. If input or output files are required, they are passed as |
| # $IN and $OUT; otherwise, it must be possible to use the command in |
| # a pipeline. |
| # |
| # To add a conversion step at the beginning: |
| # t.prepend(command, kind) |
| # |
| # To convert a file to another file using a template: |
| # sts = t.copy(infile, outfile) |
| # If infile or outfile are the empty string, standard input is read or |
| # standard output is written, respectively. The return value is the |
| # exit status of the conversion pipeline. |
| # |
| # To open a file for reading or writing through a conversion pipeline: |
| # fp = t.open(file, mode) |
| # where mode is 'r' to read the file, or 'w' to write it -- just like |
| # for the built-in function open() or for os.popen(). |
| # |
| # To create a new template object initialized to a given one: |
| # t2 = t.clone() |
| # |
| # For an example, see the function test() at the end of the file. |
| |
| |
| import sys |
| import regex |
| |
| import os |
| import tempfile |
| import string |
| |
| |
| # Conversion step kinds |
| |
| FILEIN_FILEOUT = 'ff' # Must read & write real files |
| STDIN_FILEOUT = '-f' # Must write a real file |
| FILEIN_STDOUT = 'f-' # Must read a real file |
| STDIN_STDOUT = '--' # Normal pipeline element |
| SOURCE = '.-' # Must be first, writes stdout |
| SINK = '-.' # Must be last, reads stdin |
| |
| stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ |
| SOURCE, SINK] |
| |
| |
| # A pipeline template is a Template object: |
| |
| class Template: |
| |
| # Template() returns a fresh pipeline template |
| def __init__(self): |
| self.debugging = 0 |
| self.reset() |
| |
| # t.__repr__() implements `t` |
| def __repr__(self): |
| return '<Template instance, steps=' + `self.steps` + '>' |
| |
| # t.reset() restores a pipeline template to its initial state |
| def reset(self): |
| self.steps = [] |
| |
| # t.clone() returns a new pipeline template with identical |
| # initial state as the current one |
| def clone(self): |
| t = Template() |
| t.steps = self.steps[:] |
| t.debugging = self.debugging |
| return t |
| |
| # t.debug(flag) turns debugging on or off |
| def debug(self, flag): |
| self.debugging = flag |
| |
| # t.append(cmd, kind) adds a new step at the end |
| def append(self, cmd, kind): |
| if type(cmd) <> type(''): |
| raise TypeError, \ |
| 'Template.append: cmd must be a string' |
| if kind not in stepkinds: |
| raise ValueError, \ |
| 'Template.append: bad kind ' + `kind` |
| if kind == SOURCE: |
| raise ValueError, \ |
| 'Template.append: SOURCE can only be prepended' |
| if self.steps <> [] and self.steps[-1][1] == SINK: |
| raise ValueError, \ |
| 'Template.append: already ends with SINK' |
| if kind[0] == 'f' and regex.search('\$IN', cmd) < 0: |
| raise ValueError, \ |
| 'Template.append: missing $IN in cmd' |
| if kind[1] == 'f' and regex.search('\$OUT', cmd) < 0: |
| raise ValueError, \ |
| 'Template.append: missing $OUT in cmd' |
| self.steps.append((cmd, kind)) |
| |
| # t.prepend(cmd, kind) adds a new step at the front |
| def prepend(self, cmd, kind): |
| if type(cmd) <> type(''): |
| raise TypeError, \ |
| 'Template.prepend: cmd must be a string' |
| if kind not in stepkinds: |
| raise ValueError, \ |
| 'Template.prepend: bad kind ' + `kind` |
| if kind == SINK: |
| raise ValueError, \ |
| 'Template.prepend: SINK can only be appended' |
| if self.steps <> [] and self.steps[0][1] == SOURCE: |
| raise ValueError, \ |
| 'Template.prepend: already begins with SOURCE' |
| if kind[0] == 'f' and regex.search('\$IN\>', cmd) < 0: |
| raise ValueError, \ |
| 'Template.prepend: missing $IN in cmd' |
| if kind[1] == 'f' and regex.search('\$OUT\>', cmd) < 0: |
| raise ValueError, \ |
| 'Template.prepend: missing $OUT in cmd' |
| self.steps.insert(0, (cmd, kind)) |
| |
| # t.open(file, rw) returns a pipe or file object open for |
| # reading or writing; the file is the other end of the pipeline |
| def open(self, file, rw): |
| if rw == 'r': |
| return self.open_r(file) |
| if rw == 'w': |
| return self.open_w(file) |
| raise ValueError, \ |
| 'Template.open: rw must be \'r\' or \'w\', not ' + `rw` |
| |
| # t.open_r(file) and t.open_w(file) implement |
| # t.open(file, 'r') and t.open(file, 'w') respectively |
| |
| def open_r(self, file): |
| if self.steps == []: |
| return open(file, 'r') |
| if self.steps[-1][1] == SINK: |
| raise ValueError, \ |
| 'Template.open_r: pipeline ends width SINK' |
| cmd = self.makepipeline(file, '') |
| return os.popen(cmd, 'r') |
| |
| def open_w(self, file): |
| if self.steps == []: |
| return open(file, 'w') |
| if self.steps[0][1] == SOURCE: |
| raise ValueError, \ |
| 'Template.open_w: pipeline begins with SOURCE' |
| cmd = self.makepipeline('', file) |
| return os.popen(cmd, 'w') |
| |
| def copy(self, infile, outfile): |
| return os.system(self.makepipeline(infile, outfile)) |
| |
| def makepipeline(self, infile, outfile): |
| cmd = makepipeline(infile, self.steps, outfile) |
| if self.debugging: |
| print cmd |
| cmd = 'set -x; ' + cmd |
| return cmd |
| |
| |
| def makepipeline(infile, steps, outfile): |
| # Build a list with for each command: |
| # [input filename or '', command string, kind, output filename or ''] |
| |
| list = [] |
| for cmd, kind in steps: |
| list.append(['', cmd, kind, '']) |
| # |
| # Make sure there is at least one step |
| # |
| if list == []: |
| list.append(['', 'cat', '--', '']) |
| # |
| # Take care of the input and output ends |
| # |
| [cmd, kind] = list[0][1:3] |
| if kind[0] == 'f' and not infile: |
| list.insert(0, ['', 'cat', '--', '']) |
| list[0][0] = infile |
| # |
| [cmd, kind] = list[-1][1:3] |
| if kind[1] == 'f' and not outfile: |
| list.append(['', 'cat', '--', '']) |
| list[-1][-1] = outfile |
| # |
| # Invent temporary files to connect stages that need files |
| # |
| garbage = [] |
| for i in range(1, len(list)): |
| lkind = list[i-1][2] |
| rkind = list[i][2] |
| if lkind[1] == 'f' or rkind[0] == 'f': |
| temp = tempfile.mktemp() |
| garbage.append(temp) |
| list[i-1][-1] = list[i][0] = temp |
| # |
| for item in list: |
| [inf, cmd, kind, outf] = item |
| if kind[1] == 'f': |
| cmd = 'OUT=' + quote(outf) + '; ' + cmd |
| if kind[0] == 'f': |
| cmd = 'IN=' + quote(inf) + '; ' + cmd |
| if kind[0] == '-' and inf: |
| cmd = cmd + ' <' + quote(inf) |
| if kind[1] == '-' and outf: |
| cmd = cmd + ' >' + quote(outf) |
| item[1] = cmd |
| # |
| cmdlist = list[0][1] |
| for item in list[1:]: |
| [cmd, kind] = item[1:3] |
| if item[0] == '': |
| if 'f' in kind: |
| cmd = '{ ' + cmd + '; }' |
| cmdlist = cmdlist + ' |\n' + cmd |
| else: |
| cmdlist = cmdlist + '\n' + cmd |
| # |
| if garbage: |
| rmcmd = 'rm -f' |
| for file in garbage: |
| rmcmd = rmcmd + ' ' + quote(file) |
| trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' |
| cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd |
| # |
| return cmdlist |
| |
| |
| # Reliably quote a string as a single argument for /bin/sh |
| |
| _safechars = string.letters + string.digits + '!@%_-+=:,./' # Safe unquoted |
| _funnychars = '"`$\\' # Unsafe inside "double quotes" |
| |
| def quote(file): |
| for c in file: |
| if c not in _safechars: |
| break |
| else: |
| return file |
| if '\'' not in file: |
| return '\'' + file + '\'' |
| res = '' |
| for c in file: |
| if c in _funnychars: |
| c = '\\' + c |
| res = res + c |
| return '"' + res + '"' |
| |
| |
| # Small test program and example |
| |
| def test(): |
| import os |
| print 'Testing...' |
| t = Template() |
| t.append('togif $IN $OUT', 'ff') |
| t.append('giftoppm', '--') |
| t.append('ppmtogif >$OUT', '-f') |
| t.append('fromgif $IN $OUT', 'ff') |
| t.debug(1) |
| FILE = '/usr/local/images/rgb/rogues/guido.rgb' |
| t.copy(FILE, '@temp') |
| print 'Done.' |