Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 1 | # Conversion pipeline templates |
| 2 | # ============================= |
| 3 | |
| 4 | |
| 5 | # The problem: |
| 6 | # ------------ |
| 7 | # |
| 8 | # Suppose you have some data that you want to convert to another format |
| 9 | # (e.g. from GIF image format to PPM image format). Maybe the |
| 10 | # conversion involves several steps (e.g. piping it through compress or |
| 11 | # uuencode). Some of the conversion steps may require that their input |
| 12 | # is a disk file, others may be able to read standard input; similar for |
| 13 | # their output. The input to the entire conversion may also be read |
| 14 | # from a disk file or from an open file, and similar for its output. |
| 15 | # |
| 16 | # The module lets you construct a pipeline template by sticking one or |
| 17 | # more conversion steps together. It will take care of creating and |
| 18 | # removing temporary files if they are necessary to hold intermediate |
| 19 | # data. You can then use the template to do conversions from many |
| 20 | # different sources to many different destinations. The temporary |
| 21 | # file names used are different each time the template is used. |
| 22 | # |
| 23 | # The templates are objects so you can create templates for many |
| 24 | # different conversion steps and store them in a dictionary, for |
| 25 | # instance. |
| 26 | |
| 27 | |
| 28 | # Directions: |
| 29 | # ----------- |
| 30 | # |
| 31 | # To create a template: |
Guido van Rossum | 7bc817d | 1993-12-17 15:25:27 +0000 | [diff] [blame] | 32 | # t = Template() |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 33 | # |
| 34 | # To add a conversion step to a template: |
| 35 | # t.append(command, kind) |
| 36 | # where kind is a string of two characters: the first is '-' if the |
| 37 | # command reads its standard input or 'f' if it requires a file; the |
| 38 | # second likewise for the output. The command must be valid /bin/sh |
| 39 | # syntax. If input or output files are required, they are passed as |
| 40 | # $IN and $OUT; otherwise, it must be possible to use the command in |
| 41 | # a pipeline. |
| 42 | # |
| 43 | # To add a conversion step at the beginning: |
| 44 | # t.prepend(command, kind) |
| 45 | # |
| 46 | # To convert a file to another file using a template: |
| 47 | # sts = t.copy(infile, outfile) |
| 48 | # If infile or outfile are the empty string, standard input is read or |
| 49 | # standard output is written, respectively. The return value is the |
| 50 | # exit status of the conversion pipeline. |
| 51 | # |
| 52 | # To open a file for reading or writing through a conversion pipeline: |
| 53 | # fp = t.open(file, mode) |
| 54 | # where mode is 'r' to read the file, or 'w' to write it -- just like |
| 55 | # for the built-in function open() or for os.popen(). |
| 56 | # |
| 57 | # To create a new template object initialized to a given one: |
| 58 | # t2 = t.clone() |
| 59 | # |
| 60 | # For an example, see the function test() at the end of the file. |
| 61 | |
| 62 | |
| 63 | import sys |
| 64 | import regex |
| 65 | |
| 66 | import os |
| 67 | import tempfile |
| 68 | import string |
| 69 | |
| 70 | |
| 71 | # Conversion step kinds |
| 72 | |
| 73 | FILEIN_FILEOUT = 'ff' # Must read & write real files |
| 74 | STDIN_FILEOUT = '-f' # Must write a real file |
| 75 | FILEIN_STDOUT = 'f-' # Must read a real file |
| 76 | STDIN_STDOUT = '--' # Normal pipeline element |
| 77 | SOURCE = '.-' # Must be first, writes stdout |
| 78 | SINK = '-.' # Must be last, reads stdin |
| 79 | |
| 80 | stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ |
| 81 | SOURCE, SINK] |
| 82 | |
| 83 | |
| 84 | # A pipeline template is a Template object: |
| 85 | |
| 86 | class Template: |
| 87 | |
Guido van Rossum | 7bc817d | 1993-12-17 15:25:27 +0000 | [diff] [blame] | 88 | # Template() returns a fresh pipeline template |
| 89 | def __init__(self): |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 90 | self.debugging = 0 |
| 91 | self.reset() |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 92 | |
| 93 | # t.__repr__() implements `t` |
| 94 | def __repr__(self): |
| 95 | return '<Template instance, steps=' + `self.steps` + '>' |
| 96 | |
| 97 | # t.reset() restores a pipeline template to its initial state |
| 98 | def reset(self): |
| 99 | self.steps = [] |
| 100 | |
| 101 | # t.clone() returns a new pipeline template with identical |
| 102 | # initial state as the current one |
| 103 | def clone(self): |
Guido van Rossum | 7bc817d | 1993-12-17 15:25:27 +0000 | [diff] [blame] | 104 | t = Template() |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 105 | t.steps = self.steps[:] |
| 106 | t.debugging = self.debugging |
| 107 | return t |
| 108 | |
| 109 | # t.debug(flag) turns debugging on or off |
| 110 | def debug(self, flag): |
| 111 | self.debugging = flag |
| 112 | |
| 113 | # t.append(cmd, kind) adds a new step at the end |
| 114 | def append(self, cmd, kind): |
| 115 | if type(cmd) <> type(''): |
| 116 | raise TypeError, \ |
| 117 | 'Template.append: cmd must be a string' |
| 118 | if kind not in stepkinds: |
| 119 | raise ValueError, \ |
| 120 | 'Template.append: bad kind ' + `kind` |
| 121 | if kind == SOURCE: |
| 122 | raise ValueError, \ |
| 123 | 'Template.append: SOURCE can only be prepended' |
| 124 | if self.steps <> [] and self.steps[-1][1] == SINK: |
| 125 | raise ValueError, \ |
| 126 | 'Template.append: already ends with SINK' |
| 127 | if kind[0] == 'f' and regex.search('\$IN', cmd) < 0: |
| 128 | raise ValueError, \ |
| 129 | 'Template.append: missing $IN in cmd' |
| 130 | if kind[1] == 'f' and regex.search('\$OUT', cmd) < 0: |
| 131 | raise ValueError, \ |
| 132 | 'Template.append: missing $OUT in cmd' |
| 133 | self.steps.append((cmd, kind)) |
| 134 | |
| 135 | # t.prepend(cmd, kind) adds a new step at the front |
| 136 | def prepend(self, cmd, kind): |
| 137 | if type(cmd) <> type(''): |
| 138 | raise TypeError, \ |
| 139 | 'Template.prepend: cmd must be a string' |
| 140 | if kind not in stepkinds: |
| 141 | raise ValueError, \ |
| 142 | 'Template.prepend: bad kind ' + `kind` |
| 143 | if kind == SINK: |
| 144 | raise ValueError, \ |
| 145 | 'Template.prepend: SINK can only be appended' |
| 146 | if self.steps <> [] and self.steps[0][1] == SOURCE: |
| 147 | raise ValueError, \ |
| 148 | 'Template.prepend: already begins with SOURCE' |
| 149 | if kind[0] == 'f' and regex.search('\$IN\>', cmd) < 0: |
| 150 | raise ValueError, \ |
| 151 | 'Template.prepend: missing $IN in cmd' |
| 152 | if kind[1] == 'f' and regex.search('\$OUT\>', cmd) < 0: |
| 153 | raise ValueError, \ |
| 154 | 'Template.prepend: missing $OUT in cmd' |
| 155 | self.steps.insert(0, (cmd, kind)) |
| 156 | |
| 157 | # t.open(file, rw) returns a pipe or file object open for |
| 158 | # reading or writing; the file is the other end of the pipeline |
| 159 | def open(self, file, rw): |
| 160 | if rw == 'r': |
| 161 | return self.open_r(file) |
| 162 | if rw == 'w': |
| 163 | return self.open_w(file) |
| 164 | raise ValueError, \ |
| 165 | 'Template.open: rw must be \'r\' or \'w\', not ' + `rw` |
| 166 | |
| 167 | # t.open_r(file) and t.open_w(file) implement |
| 168 | # t.open(file, 'r') and t.open(file, 'w') respectively |
| 169 | |
| 170 | def open_r(self, file): |
| 171 | if self.steps == []: |
| 172 | return open(file, 'r') |
| 173 | if self.steps[-1][1] == SINK: |
| 174 | raise ValueError, \ |
| 175 | 'Template.open_r: pipeline ends width SINK' |
| 176 | cmd = self.makepipeline(file, '') |
| 177 | return os.popen(cmd, 'r') |
| 178 | |
| 179 | def open_w(self, file): |
| 180 | if self.steps == []: |
| 181 | return open(file, 'w') |
| 182 | if self.steps[0][1] == SOURCE: |
| 183 | raise ValueError, \ |
| 184 | 'Template.open_w: pipeline begins with SOURCE' |
| 185 | cmd = self.makepipeline('', file) |
| 186 | return os.popen(cmd, 'w') |
| 187 | |
| 188 | def copy(self, infile, outfile): |
| 189 | return os.system(self.makepipeline(infile, outfile)) |
| 190 | |
| 191 | def makepipeline(self, infile, outfile): |
| 192 | cmd = makepipeline(infile, self.steps, outfile) |
| 193 | if self.debugging: |
| 194 | print cmd |
| 195 | cmd = 'set -x; ' + cmd |
| 196 | return cmd |
| 197 | |
| 198 | |
| 199 | def makepipeline(infile, steps, outfile): |
| 200 | # Build a list with for each command: |
| 201 | # [input filename or '', command string, kind, output filename or ''] |
| 202 | |
| 203 | list = [] |
| 204 | for cmd, kind in steps: |
| 205 | list.append(['', cmd, kind, '']) |
| 206 | # |
| 207 | # Make sure there is at least one step |
| 208 | # |
| 209 | if list == []: |
| 210 | list.append(['', 'cat', '--', '']) |
| 211 | # |
| 212 | # Take care of the input and output ends |
| 213 | # |
| 214 | [cmd, kind] = list[0][1:3] |
| 215 | if kind[0] == 'f' and not infile: |
| 216 | list.insert(0, ['', 'cat', '--', '']) |
| 217 | list[0][0] = infile |
| 218 | # |
| 219 | [cmd, kind] = list[-1][1:3] |
| 220 | if kind[1] == 'f' and not outfile: |
| 221 | list.append(['', 'cat', '--', '']) |
| 222 | list[-1][-1] = outfile |
| 223 | # |
| 224 | # Invent temporary files to connect stages that need files |
| 225 | # |
| 226 | garbage = [] |
| 227 | for i in range(1, len(list)): |
| 228 | lkind = list[i-1][2] |
| 229 | rkind = list[i][2] |
| 230 | if lkind[1] == 'f' or rkind[0] == 'f': |
| 231 | temp = tempfile.mktemp() |
| 232 | garbage.append(temp) |
| 233 | list[i-1][-1] = list[i][0] = temp |
| 234 | # |
| 235 | for item in list: |
| 236 | [inf, cmd, kind, outf] = item |
| 237 | if kind[1] == 'f': |
| 238 | cmd = 'OUT=' + quote(outf) + '; ' + cmd |
| 239 | if kind[0] == 'f': |
| 240 | cmd = 'IN=' + quote(inf) + '; ' + cmd |
| 241 | if kind[0] == '-' and inf: |
| 242 | cmd = cmd + ' <' + quote(inf) |
| 243 | if kind[1] == '-' and outf: |
| 244 | cmd = cmd + ' >' + quote(outf) |
| 245 | item[1] = cmd |
| 246 | # |
| 247 | cmdlist = list[0][1] |
| 248 | for item in list[1:]: |
| 249 | [cmd, kind] = item[1:3] |
| 250 | if item[0] == '': |
| 251 | if 'f' in kind: |
| 252 | cmd = '{ ' + cmd + '; }' |
| 253 | cmdlist = cmdlist + ' |\n' + cmd |
| 254 | else: |
| 255 | cmdlist = cmdlist + '\n' + cmd |
| 256 | # |
| 257 | if garbage: |
| 258 | rmcmd = 'rm -f' |
| 259 | for file in garbage: |
| 260 | rmcmd = rmcmd + ' ' + quote(file) |
| 261 | trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' |
| 262 | cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd |
| 263 | # |
| 264 | return cmdlist |
| 265 | |
| 266 | |
| 267 | # Reliably quote a string as a single argument for /bin/sh |
| 268 | |
| 269 | _safechars = string.letters + string.digits + '!@%_-+=:,./' # Safe unquoted |
| 270 | _funnychars = '"`$\\' # Unsafe inside "double quotes" |
| 271 | |
| 272 | def quote(file): |
| 273 | for c in file: |
| 274 | if c not in _safechars: |
| 275 | break |
| 276 | else: |
| 277 | return file |
| 278 | if '\'' not in file: |
| 279 | return '\'' + file + '\'' |
| 280 | res = '' |
| 281 | for c in file: |
| 282 | if c in _funnychars: |
| 283 | c = '\\' + c |
| 284 | res = res + c |
| 285 | return '"' + res + '"' |
| 286 | |
| 287 | |
| 288 | # Small test program and example |
| 289 | |
| 290 | def test(): |
| 291 | import os |
| 292 | print 'Testing...' |
Guido van Rossum | 7bc817d | 1993-12-17 15:25:27 +0000 | [diff] [blame] | 293 | t = Template() |
Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 294 | t.append('togif $IN $OUT', 'ff') |
| 295 | t.append('giftoppm', '--') |
| 296 | t.append('ppmtogif >$OUT', '-f') |
| 297 | t.append('fromgif $IN $OUT', 'ff') |
| 298 | t.debug(1) |
| 299 | FILE = '/usr/local/images/rgb/rogues/guido.rgb' |
| 300 | t.copy(FILE, '@temp') |
| 301 | print 'Done.' |