blob: 3aa1bf1f221f8b5c1d7ed838812a0431e43d87eb [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Conversion pipeline templates.
2
3The problem:
4------------
5
6Suppose you have some data that you want to convert to another format
7(e.g. from GIF image format to PPM image format). Maybe the
8conversion involves several steps (e.g. piping it through compress or
9uuencode). Some of the conversion steps may require that their input
10is a disk file, others may be able to read standard input; similar for
11their output. The input to the entire conversion may also be read
12from a disk file or from an open file, and similar for its output.
13
14The module lets you construct a pipeline template by sticking one or
15more conversion steps together. It will take care of creating and
16removing temporary files if they are necessary to hold intermediate
17data. You can then use the template to do conversions from many
18different sources to many different destinations. The temporary
19file names used are different each time the template is used.
20
21The templates are objects so you can create templates for many
22different conversion steps and store them in a dictionary, for
23instance.
Guido van Rossum2db91351992-10-18 17:09:59 +000024
25
Guido van Rossum54f22ed2000-02-04 15:10:34 +000026Directions:
27-----------
Guido van Rossum2db91351992-10-18 17:09:59 +000028
Guido van Rossum54f22ed2000-02-04 15:10:34 +000029To create a template:
30 t = Template()
Guido van Rossum2db91351992-10-18 17:09:59 +000031
Guido van Rossum54f22ed2000-02-04 15:10:34 +000032To add a conversion step to a template:
33 t.append(command, kind)
34where kind is a string of two characters: the first is '-' if the
35command reads its standard input or 'f' if it requires a file; the
36second likewise for the output. The command must be valid /bin/sh
37syntax. If input or output files are required, they are passed as
38$IN and $OUT; otherwise, it must be possible to use the command in
39a pipeline.
40
41To add a conversion step at the beginning:
42 t.prepend(command, kind)
43
44To convert a file to another file using a template:
45 sts = t.copy(infile, outfile)
46If infile or outfile are the empty string, standard input is read or
47standard output is written, respectively. The return value is the
48exit status of the conversion pipeline.
49
50To open a file for reading or writing through a conversion pipeline:
51 fp = t.open(file, mode)
52where mode is 'r' to read the file, or 'w' to write it -- just like
53for the built-in function open() or for os.popen().
54
55To create a new template object initialized to a given one:
56 t2 = t.clone()
57
58For an example, see the function test() at the end of the file.
59"""
Guido van Rossum2db91351992-10-18 17:09:59 +000060
61
62import sys
Guido van Rossum9694fca1997-10-22 21:00:49 +000063import re
Guido van Rossum2db91351992-10-18 17:09:59 +000064
65import os
66import tempfile
67import string
68
69
70# Conversion step kinds
71
72FILEIN_FILEOUT = 'ff' # Must read & write real files
73STDIN_FILEOUT = '-f' # Must write a real file
74FILEIN_STDOUT = 'f-' # Must read a real file
75STDIN_STDOUT = '--' # Normal pipeline element
76SOURCE = '.-' # Must be first, writes stdout
77SINK = '-.' # Must be last, reads stdin
78
79stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
80 SOURCE, SINK]
81
82
Guido van Rossum2db91351992-10-18 17:09:59 +000083class Template:
Guido van Rossum54f22ed2000-02-04 15:10:34 +000084 """Class representing a pipeline template."""
Guido van Rossum2db91351992-10-18 17:09:59 +000085
Guido van Rossum7bc817d1993-12-17 15:25:27 +000086 def __init__(self):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000087 """Template() returns a fresh pipeline template."""
Guido van Rossum2db91351992-10-18 17:09:59 +000088 self.debugging = 0
89 self.reset()
Guido van Rossum2db91351992-10-18 17:09:59 +000090
Guido van Rossum2db91351992-10-18 17:09:59 +000091 def __repr__(self):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000092 """t.__repr__() implements `t`."""
Guido van Rossum2db91351992-10-18 17:09:59 +000093 return '<Template instance, steps=' + `self.steps` + '>'
94
Guido van Rossum2db91351992-10-18 17:09:59 +000095 def reset(self):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000096 """t.reset() restores a pipeline template to its initial state."""
Guido van Rossum2db91351992-10-18 17:09:59 +000097 self.steps = []
98
Guido van Rossum2db91351992-10-18 17:09:59 +000099 def clone(self):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000100 """t.clone() returns a new pipeline template with identical
101 initial state as the current one."""
Guido van Rossum7bc817d1993-12-17 15:25:27 +0000102 t = Template()
Guido van Rossum2db91351992-10-18 17:09:59 +0000103 t.steps = self.steps[:]
104 t.debugging = self.debugging
105 return t
106
Guido van Rossum2db91351992-10-18 17:09:59 +0000107 def debug(self, flag):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000108 """t.debug(flag) turns debugging on or off."""
Guido van Rossum2db91351992-10-18 17:09:59 +0000109 self.debugging = flag
110
Guido van Rossum2db91351992-10-18 17:09:59 +0000111 def append(self, cmd, kind):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000112 """t.append(cmd, kind) adds a new step at the end."""
Fred Drake8152d322000-12-12 23:20:45 +0000113 if type(cmd) is not type(''):
Guido van Rossum2db91351992-10-18 17:09:59 +0000114 raise TypeError, \
115 'Template.append: cmd must be a string'
116 if kind not in stepkinds:
117 raise ValueError, \
118 'Template.append: bad kind ' + `kind`
119 if kind == SOURCE:
120 raise ValueError, \
121 'Template.append: SOURCE can only be prepended'
Fred Drake8152d322000-12-12 23:20:45 +0000122 if self.steps and self.steps[-1][1] == SINK:
Guido van Rossum2db91351992-10-18 17:09:59 +0000123 raise ValueError, \
124 'Template.append: already ends with SINK'
Guido van Rossum9694fca1997-10-22 21:00:49 +0000125 if kind[0] == 'f' and not re.search('\$IN\b', cmd):
Guido van Rossum2db91351992-10-18 17:09:59 +0000126 raise ValueError, \
127 'Template.append: missing $IN in cmd'
Guido van Rossum9694fca1997-10-22 21:00:49 +0000128 if kind[1] == 'f' and not re.search('\$OUT\b', cmd):
Guido van Rossum2db91351992-10-18 17:09:59 +0000129 raise ValueError, \
130 'Template.append: missing $OUT in cmd'
131 self.steps.append((cmd, kind))
132
Guido van Rossum2db91351992-10-18 17:09:59 +0000133 def prepend(self, cmd, kind):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000134 """t.prepend(cmd, kind) adds a new step at the front."""
Fred Drake8152d322000-12-12 23:20:45 +0000135 if type(cmd) is not type(''):
Guido van Rossum2db91351992-10-18 17:09:59 +0000136 raise TypeError, \
137 'Template.prepend: cmd must be a string'
138 if kind not in stepkinds:
139 raise ValueError, \
140 'Template.prepend: bad kind ' + `kind`
141 if kind == SINK:
142 raise ValueError, \
143 'Template.prepend: SINK can only be appended'
Fred Drake8152d322000-12-12 23:20:45 +0000144 if self.steps and self.steps[0][1] == SOURCE:
Guido van Rossum2db91351992-10-18 17:09:59 +0000145 raise ValueError, \
146 'Template.prepend: already begins with SOURCE'
Guido van Rossum9694fca1997-10-22 21:00:49 +0000147 if kind[0] == 'f' and not re.search('\$IN\b', cmd):
Guido van Rossum2db91351992-10-18 17:09:59 +0000148 raise ValueError, \
149 'Template.prepend: missing $IN in cmd'
Guido van Rossum9694fca1997-10-22 21:00:49 +0000150 if kind[1] == 'f' and not re.search('\$OUT\b', cmd):
Guido van Rossum2db91351992-10-18 17:09:59 +0000151 raise ValueError, \
152 'Template.prepend: missing $OUT in cmd'
153 self.steps.insert(0, (cmd, kind))
154
Guido van Rossum2db91351992-10-18 17:09:59 +0000155 def open(self, file, rw):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000156 """t.open(file, rw) returns a pipe or file object open for
157 reading or writing; the file is the other end of the pipeline."""
Guido van Rossum2db91351992-10-18 17:09:59 +0000158 if rw == 'r':
159 return self.open_r(file)
160 if rw == 'w':
161 return self.open_w(file)
162 raise ValueError, \
163 'Template.open: rw must be \'r\' or \'w\', not ' + `rw`
164
Guido van Rossum2db91351992-10-18 17:09:59 +0000165 def open_r(self, file):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000166 """t.open_r(file) and t.open_w(file) implement
167 t.open(file, 'r') and t.open(file, 'w') respectively."""
Fred Drake8152d322000-12-12 23:20:45 +0000168 if not self.steps:
Guido van Rossum2db91351992-10-18 17:09:59 +0000169 return open(file, 'r')
170 if self.steps[-1][1] == SINK:
171 raise ValueError, \
172 'Template.open_r: pipeline ends width SINK'
173 cmd = self.makepipeline(file, '')
174 return os.popen(cmd, 'r')
175
176 def open_w(self, file):
Fred Drake8152d322000-12-12 23:20:45 +0000177 if not self.steps:
Guido van Rossum2db91351992-10-18 17:09:59 +0000178 return open(file, 'w')
179 if self.steps[0][1] == SOURCE:
180 raise ValueError, \
181 'Template.open_w: pipeline begins with SOURCE'
182 cmd = self.makepipeline('', file)
183 return os.popen(cmd, 'w')
184
185 def copy(self, infile, outfile):
186 return os.system(self.makepipeline(infile, outfile))
187
188 def makepipeline(self, infile, outfile):
189 cmd = makepipeline(infile, self.steps, outfile)
190 if self.debugging:
191 print cmd
192 cmd = 'set -x; ' + cmd
193 return cmd
194
195
196def makepipeline(infile, steps, outfile):
197 # Build a list with for each command:
198 # [input filename or '', command string, kind, output filename or '']
199
200 list = []
201 for cmd, kind in steps:
202 list.append(['', cmd, kind, ''])
203 #
204 # Make sure there is at least one step
205 #
Fred Drake8152d322000-12-12 23:20:45 +0000206 if not list:
Guido van Rossum2db91351992-10-18 17:09:59 +0000207 list.append(['', 'cat', '--', ''])
208 #
209 # Take care of the input and output ends
210 #
211 [cmd, kind] = list[0][1:3]
212 if kind[0] == 'f' and not infile:
213 list.insert(0, ['', 'cat', '--', ''])
214 list[0][0] = infile
215 #
216 [cmd, kind] = list[-1][1:3]
217 if kind[1] == 'f' and not outfile:
218 list.append(['', 'cat', '--', ''])
219 list[-1][-1] = outfile
220 #
221 # Invent temporary files to connect stages that need files
222 #
223 garbage = []
224 for i in range(1, len(list)):
225 lkind = list[i-1][2]
226 rkind = list[i][2]
227 if lkind[1] == 'f' or rkind[0] == 'f':
228 temp = tempfile.mktemp()
229 garbage.append(temp)
230 list[i-1][-1] = list[i][0] = temp
231 #
232 for item in list:
233 [inf, cmd, kind, outf] = item
234 if kind[1] == 'f':
235 cmd = 'OUT=' + quote(outf) + '; ' + cmd
236 if kind[0] == 'f':
237 cmd = 'IN=' + quote(inf) + '; ' + cmd
238 if kind[0] == '-' and inf:
239 cmd = cmd + ' <' + quote(inf)
240 if kind[1] == '-' and outf:
241 cmd = cmd + ' >' + quote(outf)
242 item[1] = cmd
243 #
244 cmdlist = list[0][1]
245 for item in list[1:]:
246 [cmd, kind] = item[1:3]
247 if item[0] == '':
248 if 'f' in kind:
249 cmd = '{ ' + cmd + '; }'
250 cmdlist = cmdlist + ' |\n' + cmd
251 else:
252 cmdlist = cmdlist + '\n' + cmd
253 #
254 if garbage:
255 rmcmd = 'rm -f'
256 for file in garbage:
257 rmcmd = rmcmd + ' ' + quote(file)
258 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
259 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
260 #
261 return cmdlist
262
263
264# Reliably quote a string as a single argument for /bin/sh
265
266_safechars = string.letters + string.digits + '!@%_-+=:,./' # Safe unquoted
267_funnychars = '"`$\\' # Unsafe inside "double quotes"
268
269def quote(file):
270 for c in file:
271 if c not in _safechars:
272 break
273 else:
274 return file
275 if '\'' not in file:
276 return '\'' + file + '\''
277 res = ''
278 for c in file:
279 if c in _funnychars:
280 c = '\\' + c
281 res = res + c
282 return '"' + res + '"'
283
284
285# Small test program and example
286
287def test():
Guido van Rossum2db91351992-10-18 17:09:59 +0000288 print 'Testing...'
Guido van Rossum7bc817d1993-12-17 15:25:27 +0000289 t = Template()
Guido van Rossum2db91351992-10-18 17:09:59 +0000290 t.append('togif $IN $OUT', 'ff')
291 t.append('giftoppm', '--')
292 t.append('ppmtogif >$OUT', '-f')
293 t.append('fromgif $IN $OUT', 'ff')
294 t.debug(1)
295 FILE = '/usr/local/images/rgb/rogues/guido.rgb'
296 t.copy(FILE, '@temp')
297 print 'Done.'