Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 1 | """Conversion pipeline templates. |
| 2 | |
| 3 | The problem: |
| 4 | ------------ |
| 5 | |
Fred Drake | 0f715d2 | 2001-07-20 18:53:34 +0000 | [diff] [blame] | 6 | Suppose you have some data that you want to convert to another format, |
| 7 | such as from GIF image format to PPM image format. Maybe the |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 8 | conversion involves several steps (e.g. piping it through compress or |
| 9 | uuencode). Some of the conversion steps may require that their input |
| 10 | is a disk file, others may be able to read standard input; similar for |
| 11 | their output. The input to the entire conversion may also be read |
| 12 | from a disk file or from an open file, and similar for its output. |
| 13 | |
| 14 | The module lets you construct a pipeline template by sticking one or |
| 15 | more conversion steps together. It will take care of creating and |
| 16 | removing temporary files if they are necessary to hold intermediate |
| 17 | data. You can then use the template to do conversions from many |
| 18 | different sources to many different destinations. The temporary |
| 19 | file names used are different each time the template is used. |
| 20 | |
| 21 | The templates are objects so you can create templates for many |
| 22 | different conversion steps and store them in a dictionary, for |
| 23 | instance. |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 24 | |
| 25 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 26 | Directions: |
| 27 | ----------- |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 28 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 29 | To create a template: |
| 30 | t = Template() |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 31 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 32 | To add a conversion step to a template: |
| 33 | t.append(command, kind) |
| 34 | where kind is a string of two characters: the first is '-' if the |
| 35 | command reads its standard input or 'f' if it requires a file; the |
| 36 | second likewise for the output. The command must be valid /bin/sh |
| 37 | syntax. If input or output files are required, they are passed as |
| 38 | $IN and $OUT; otherwise, it must be possible to use the command in |
| 39 | a pipeline. |
| 40 | |
| 41 | To add a conversion step at the beginning: |
| 42 | t.prepend(command, kind) |
| 43 | |
| 44 | To convert a file to another file using a template: |
| 45 | sts = t.copy(infile, outfile) |
| 46 | If infile or outfile are the empty string, standard input is read or |
| 47 | standard output is written, respectively. The return value is the |
| 48 | exit status of the conversion pipeline. |
| 49 | |
| 50 | To open a file for reading or writing through a conversion pipeline: |
| 51 | fp = t.open(file, mode) |
| 52 | where mode is 'r' to read the file, or 'w' to write it -- just like |
| 53 | for the built-in function open() or for os.popen(). |
| 54 | |
| 55 | To create a new template object initialized to a given one: |
| 56 | t2 = t.clone() |
Skip Montanaro | 352674d | 2001-02-07 23:14:30 +0000 | [diff] [blame] | 57 | """ # ' |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 58 | |
| 59 | |
Guido van Rossum | 9694fca | 1997-10-22 21:00:49 +0000 | [diff] [blame] | 60 | import re |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 61 | import os |
| 62 | import tempfile |
Éric Araujo | 9bce311 | 2011-07-27 18:29:31 +0200 | [diff] [blame] | 63 | # we import the quote function rather than the module for backward compat |
| 64 | # (quote used to be an undocumented but used function in pipes) |
| 65 | from shlex import quote |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 66 | |
Skip Montanaro | 352674d | 2001-02-07 23:14:30 +0000 | [diff] [blame] | 67 | __all__ = ["Template"] |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 68 | |
| 69 | # Conversion step kinds |
| 70 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 71 | FILEIN_FILEOUT = 'ff' # Must read & write real files |
| 72 | STDIN_FILEOUT = '-f' # Must write a real file |
| 73 | FILEIN_STDOUT = 'f-' # Must read a real file |
| 74 | STDIN_STDOUT = '--' # Normal pipeline element |
| 75 | SOURCE = '.-' # Must be first, writes stdout |
| 76 | SINK = '-.' # Must be last, reads stdin |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 77 | |
| 78 | stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 79 | SOURCE, SINK] |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 80 | |
| 81 | |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 82 | class Template: |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 83 | """Class representing a pipeline template.""" |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 84 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 85 | def __init__(self): |
| 86 | """Template() returns a fresh pipeline template.""" |
| 87 | self.debugging = 0 |
| 88 | self.reset() |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 89 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 90 | def __repr__(self): |
Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 91 | """t.__repr__() implements repr(t).""" |
| 92 | return '<Template instance, steps=%r>' % (self.steps,) |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 93 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 94 | def reset(self): |
| 95 | """t.reset() restores a pipeline template to its initial state.""" |
| 96 | self.steps = [] |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 97 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 98 | def clone(self): |
| 99 | """t.clone() returns a new pipeline template with identical |
| 100 | initial state as the current one.""" |
| 101 | t = Template() |
| 102 | t.steps = self.steps[:] |
| 103 | t.debugging = self.debugging |
| 104 | return t |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 105 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 106 | def debug(self, flag): |
| 107 | """t.debug(flag) turns debugging on or off.""" |
| 108 | self.debugging = flag |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 109 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 110 | def append(self, cmd, kind): |
| 111 | """t.append(cmd, kind) adds a new step at the end.""" |
| 112 | if type(cmd) is not type(''): |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 113 | raise TypeError('Template.append: cmd must be a string') |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 114 | if kind not in stepkinds: |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 115 | raise ValueError('Template.append: bad kind %r' % (kind,)) |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 116 | if kind == SOURCE: |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 117 | raise ValueError('Template.append: SOURCE can only be prepended') |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 118 | if self.steps and self.steps[-1][1] == SINK: |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 119 | raise ValueError('Template.append: already ends with SINK') |
Tim Peters | c09cee4 | 2001-04-25 03:43:14 +0000 | [diff] [blame] | 120 | if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 121 | raise ValueError('Template.append: missing $IN in cmd') |
Tim Peters | c09cee4 | 2001-04-25 03:43:14 +0000 | [diff] [blame] | 122 | if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 123 | raise ValueError('Template.append: missing $OUT in cmd') |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 124 | self.steps.append((cmd, kind)) |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 125 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 126 | def prepend(self, cmd, kind): |
| 127 | """t.prepend(cmd, kind) adds a new step at the front.""" |
| 128 | if type(cmd) is not type(''): |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 129 | raise TypeError('Template.prepend: cmd must be a string') |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 130 | if kind not in stepkinds: |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 131 | raise ValueError('Template.prepend: bad kind %r' % (kind,)) |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 132 | if kind == SINK: |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 133 | raise ValueError('Template.prepend: SINK can only be appended') |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 134 | if self.steps and self.steps[0][1] == SOURCE: |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 135 | raise ValueError('Template.prepend: already begins with SOURCE') |
Tim Peters | c09cee4 | 2001-04-25 03:43:14 +0000 | [diff] [blame] | 136 | if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 137 | raise ValueError('Template.prepend: missing $IN in cmd') |
Tim Peters | c09cee4 | 2001-04-25 03:43:14 +0000 | [diff] [blame] | 138 | if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 139 | raise ValueError('Template.prepend: missing $OUT in cmd') |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 140 | self.steps.insert(0, (cmd, kind)) |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 141 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 142 | def open(self, file, rw): |
| 143 | """t.open(file, rw) returns a pipe or file object open for |
| 144 | reading or writing; the file is the other end of the pipeline.""" |
| 145 | if rw == 'r': |
| 146 | return self.open_r(file) |
| 147 | if rw == 'w': |
| 148 | return self.open_w(file) |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 149 | raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r' |
| 150 | % (rw,)) |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 151 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 152 | def open_r(self, file): |
| 153 | """t.open_r(file) and t.open_w(file) implement |
| 154 | t.open(file, 'r') and t.open(file, 'w') respectively.""" |
| 155 | if not self.steps: |
| 156 | return open(file, 'r') |
| 157 | if self.steps[-1][1] == SINK: |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 158 | raise ValueError('Template.open_r: pipeline ends width SINK') |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 159 | cmd = self.makepipeline(file, '') |
| 160 | return os.popen(cmd, 'r') |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 161 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 162 | def open_w(self, file): |
| 163 | if not self.steps: |
| 164 | return open(file, 'w') |
| 165 | if self.steps[0][1] == SOURCE: |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 166 | raise ValueError('Template.open_w: pipeline begins with SOURCE') |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 167 | cmd = self.makepipeline('', file) |
| 168 | return os.popen(cmd, 'w') |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 169 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 170 | def copy(self, infile, outfile): |
| 171 | return os.system(self.makepipeline(infile, outfile)) |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 172 | |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 173 | def makepipeline(self, infile, outfile): |
| 174 | cmd = makepipeline(infile, self.steps, outfile) |
| 175 | if self.debugging: |
Guido van Rossum | be19ed7 | 2007-02-09 05:37:30 +0000 | [diff] [blame] | 176 | print(cmd) |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 177 | cmd = 'set -x; ' + cmd |
| 178 | return cmd |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 179 | |
| 180 | |
| 181 | def makepipeline(infile, steps, outfile): |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 182 | # Build a list with for each command: |
| 183 | # [input filename or '', command string, kind, output filename or ''] |
| 184 | |
| 185 | list = [] |
| 186 | for cmd, kind in steps: |
| 187 | list.append(['', cmd, kind, '']) |
| 188 | # |
| 189 | # Make sure there is at least one step |
| 190 | # |
| 191 | if not list: |
| 192 | list.append(['', 'cat', '--', '']) |
| 193 | # |
| 194 | # Take care of the input and output ends |
| 195 | # |
| 196 | [cmd, kind] = list[0][1:3] |
| 197 | if kind[0] == 'f' and not infile: |
| 198 | list.insert(0, ['', 'cat', '--', '']) |
| 199 | list[0][0] = infile |
| 200 | # |
| 201 | [cmd, kind] = list[-1][1:3] |
| 202 | if kind[1] == 'f' and not outfile: |
| 203 | list.append(['', 'cat', '--', '']) |
| 204 | list[-1][-1] = outfile |
| 205 | # |
| 206 | # Invent temporary files to connect stages that need files |
| 207 | # |
| 208 | garbage = [] |
| 209 | for i in range(1, len(list)): |
| 210 | lkind = list[i-1][2] |
| 211 | rkind = list[i][2] |
| 212 | if lkind[1] == 'f' or rkind[0] == 'f': |
Guido van Rossum | 3b0a329 | 2002-08-09 16:38:32 +0000 | [diff] [blame] | 213 | (fd, temp) = tempfile.mkstemp() |
| 214 | os.close(fd) |
Tim Peters | 2344fae | 2001-01-15 00:50:52 +0000 | [diff] [blame] | 215 | garbage.append(temp) |
| 216 | list[i-1][-1] = list[i][0] = temp |
| 217 | # |
| 218 | for item in list: |
| 219 | [inf, cmd, kind, outf] = item |
| 220 | if kind[1] == 'f': |
| 221 | cmd = 'OUT=' + quote(outf) + '; ' + cmd |
| 222 | if kind[0] == 'f': |
| 223 | cmd = 'IN=' + quote(inf) + '; ' + cmd |
| 224 | if kind[0] == '-' and inf: |
| 225 | cmd = cmd + ' <' + quote(inf) |
| 226 | if kind[1] == '-' and outf: |
| 227 | cmd = cmd + ' >' + quote(outf) |
| 228 | item[1] = cmd |
| 229 | # |
| 230 | cmdlist = list[0][1] |
| 231 | for item in list[1:]: |
| 232 | [cmd, kind] = item[1:3] |
| 233 | if item[0] == '': |
| 234 | if 'f' in kind: |
| 235 | cmd = '{ ' + cmd + '; }' |
| 236 | cmdlist = cmdlist + ' |\n' + cmd |
| 237 | else: |
| 238 | cmdlist = cmdlist + '\n' + cmd |
| 239 | # |
| 240 | if garbage: |
| 241 | rmcmd = 'rm -f' |
| 242 | for file in garbage: |
| 243 | rmcmd = rmcmd + ' ' + quote(file) |
| 244 | trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' |
| 245 | cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd |
| 246 | # |
| 247 | return cmdlist |