| """Conversion pipeline templates. |
| |
| The problem: |
| ------------ |
| |
| Suppose you have some data that you want to convert to another format, |
| such as from GIF image format to PPM image format. Maybe the |
| conversion involves several steps (e.g. piping it through compress or |
| uuencode). Some of the conversion steps may require that their input |
| is a disk file, others may be able to read standard input; similar for |
| their output. The input to the entire conversion may also be read |
| from a disk file or from an open file, and similar for its output. |
| |
| The module lets you construct a pipeline template by sticking one or |
| more conversion steps together. It will take care of creating and |
| removing temporary files if they are necessary to hold intermediate |
| data. You can then use the template to do conversions from many |
| different sources to many different destinations. The temporary |
| file names used are different each time the template is used. |
| |
| The templates are objects so you can create templates for many |
| different conversion steps and store them in a dictionary, for |
| instance. |
| |
| |
| Directions: |
| ----------- |
| |
| To create a template: |
| t = Template() |
| |
| To add a conversion step to a template: |
| t.append(command, kind) |
| where kind is a string of two characters: the first is '-' if the |
| command reads its standard input or 'f' if it requires a file; the |
| second likewise for the output. The command must be valid /bin/sh |
| syntax. If input or output files are required, they are passed as |
| $IN and $OUT; otherwise, it must be possible to use the command in |
| a pipeline. |
| |
| To add a conversion step at the beginning: |
| t.prepend(command, kind) |
| |
| To convert a file to another file using a template: |
| sts = t.copy(infile, outfile) |
| If infile or outfile are the empty string, standard input is read or |
| standard output is written, respectively. The return value is the |
| exit status of the conversion pipeline. |
| |
| To open a file for reading or writing through a conversion pipeline: |
| fp = t.open(file, mode) |
| where mode is 'r' to read the file, or 'w' to write it -- just like |
| for the built-in function open() or for os.popen(). |
| |
| To create a new template object initialized to a given one: |
| t2 = t.clone() |
| |
| For an example, see the function test() at the end of the file. |
| """ # ' |
| |
| |
| import re |
| import os |
| import tempfile |
| import string |
| |
| __all__ = ["Template"] |
| |
| # Conversion step kinds |
| |
| FILEIN_FILEOUT = 'ff' # Must read & write real files |
| STDIN_FILEOUT = '-f' # Must write a real file |
| FILEIN_STDOUT = 'f-' # Must read a real file |
| STDIN_STDOUT = '--' # Normal pipeline element |
| SOURCE = '.-' # Must be first, writes stdout |
| SINK = '-.' # Must be last, reads stdin |
| |
| stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ |
| SOURCE, SINK] |
| |
| |
| class Template: |
| """Class representing a pipeline template.""" |
| |
| def __init__(self): |
| """Template() returns a fresh pipeline template.""" |
| self.debugging = 0 |
| self.reset() |
| |
| def __repr__(self): |
| """t.__repr__() implements repr(t).""" |
| return '<Template instance, steps=%r>' % (self.steps,) |
| |
| def reset(self): |
| """t.reset() restores a pipeline template to its initial state.""" |
| self.steps = [] |
| |
| def clone(self): |
| """t.clone() returns a new pipeline template with identical |
| initial state as the current one.""" |
| t = Template() |
| t.steps = self.steps[:] |
| t.debugging = self.debugging |
| return t |
| |
| def debug(self, flag): |
| """t.debug(flag) turns debugging on or off.""" |
| self.debugging = flag |
| |
| def append(self, cmd, kind): |
| """t.append(cmd, kind) adds a new step at the end.""" |
| if type(cmd) is not type(''): |
| raise TypeError('Template.append: cmd must be a string') |
| if kind not in stepkinds: |
| raise ValueError('Template.append: bad kind %r' % (kind,)) |
| if kind == SOURCE: |
| raise ValueError('Template.append: SOURCE can only be prepended') |
| if self.steps and self.steps[-1][1] == SINK: |
| raise ValueError('Template.append: already ends with SINK') |
| if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): |
| raise ValueError('Template.append: missing $IN in cmd') |
| if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): |
| raise ValueError('Template.append: missing $OUT in cmd') |
| self.steps.append((cmd, kind)) |
| |
| def prepend(self, cmd, kind): |
| """t.prepend(cmd, kind) adds a new step at the front.""" |
| if type(cmd) is not type(''): |
| raise TypeError('Template.prepend: cmd must be a string') |
| if kind not in stepkinds: |
| raise ValueError('Template.prepend: bad kind %r' % (kind,)) |
| if kind == SINK: |
| raise ValueError('Template.prepend: SINK can only be appended') |
| if self.steps and self.steps[0][1] == SOURCE: |
| raise ValueError('Template.prepend: already begins with SOURCE') |
| if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): |
| raise ValueError('Template.prepend: missing $IN in cmd') |
| if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): |
| raise ValueError('Template.prepend: missing $OUT in cmd') |
| self.steps.insert(0, (cmd, kind)) |
| |
| def open(self, file, rw): |
| """t.open(file, rw) returns a pipe or file object open for |
| reading or writing; the file is the other end of the pipeline.""" |
| if rw == 'r': |
| return self.open_r(file) |
| if rw == 'w': |
| return self.open_w(file) |
| raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r' |
| % (rw,)) |
| |
| def open_r(self, file): |
| """t.open_r(file) and t.open_w(file) implement |
| t.open(file, 'r') and t.open(file, 'w') respectively.""" |
| if not self.steps: |
| return open(file, 'r') |
| if self.steps[-1][1] == SINK: |
| raise ValueError('Template.open_r: pipeline ends width SINK') |
| cmd = self.makepipeline(file, '') |
| return os.popen(cmd, 'r') |
| |
| def open_w(self, file): |
| if not self.steps: |
| return open(file, 'w') |
| if self.steps[0][1] == SOURCE: |
| raise ValueError('Template.open_w: pipeline begins with SOURCE') |
| cmd = self.makepipeline('', file) |
| return os.popen(cmd, 'w') |
| |
| def copy(self, infile, outfile): |
| return os.system(self.makepipeline(infile, outfile)) |
| |
| def makepipeline(self, infile, outfile): |
| cmd = makepipeline(infile, self.steps, outfile) |
| if self.debugging: |
| print(cmd) |
| cmd = 'set -x; ' + cmd |
| return cmd |
| |
| |
| def makepipeline(infile, steps, outfile): |
| # Build a list with for each command: |
| # [input filename or '', command string, kind, output filename or ''] |
| |
| list = [] |
| for cmd, kind in steps: |
| list.append(['', cmd, kind, '']) |
| # |
| # Make sure there is at least one step |
| # |
| if not list: |
| list.append(['', 'cat', '--', '']) |
| # |
| # Take care of the input and output ends |
| # |
| [cmd, kind] = list[0][1:3] |
| if kind[0] == 'f' and not infile: |
| list.insert(0, ['', 'cat', '--', '']) |
| list[0][0] = infile |
| # |
| [cmd, kind] = list[-1][1:3] |
| if kind[1] == 'f' and not outfile: |
| list.append(['', 'cat', '--', '']) |
| list[-1][-1] = outfile |
| # |
| # Invent temporary files to connect stages that need files |
| # |
| garbage = [] |
| for i in range(1, len(list)): |
| lkind = list[i-1][2] |
| rkind = list[i][2] |
| if lkind[1] == 'f' or rkind[0] == 'f': |
| (fd, temp) = tempfile.mkstemp() |
| os.close(fd) |
| garbage.append(temp) |
| list[i-1][-1] = list[i][0] = temp |
| # |
| for item in list: |
| [inf, cmd, kind, outf] = item |
| if kind[1] == 'f': |
| cmd = 'OUT=' + quote(outf) + '; ' + cmd |
| if kind[0] == 'f': |
| cmd = 'IN=' + quote(inf) + '; ' + cmd |
| if kind[0] == '-' and inf: |
| cmd = cmd + ' <' + quote(inf) |
| if kind[1] == '-' and outf: |
| cmd = cmd + ' >' + quote(outf) |
| item[1] = cmd |
| # |
| cmdlist = list[0][1] |
| for item in list[1:]: |
| [cmd, kind] = item[1:3] |
| if item[0] == '': |
| if 'f' in kind: |
| cmd = '{ ' + cmd + '; }' |
| cmdlist = cmdlist + ' |\n' + cmd |
| else: |
| cmdlist = cmdlist + '\n' + cmd |
| # |
| if garbage: |
| rmcmd = 'rm -f' |
| for file in garbage: |
| rmcmd = rmcmd + ' ' + quote(file) |
| trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' |
| cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd |
| # |
| return cmdlist |
| |
| |
| # Reliably quote a string as a single argument for /bin/sh |
| |
| # Safe unquoted |
| _safechars = frozenset(string.ascii_letters + string.digits + '@%_-+=:,./') |
| |
| def quote(file): |
| """Return a shell-escaped version of the file string.""" |
| for c in file: |
| if c not in _safechars: |
| break |
| else: |
| if not file: |
| return "''" |
| return file |
| # use single quotes, and put single quotes into double quotes |
| # the string $'b is then quoted as '$'"'"'b' |
| return "'" + file.replace("'", "'\"'\"'") + "'" |