blob: ceb32a8697806c3c7f61422f0a1b72a82ca6e1a0 [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Conversion pipeline templates.
2
3The problem:
4------------
5
6Suppose you have some data that you want to convert to another format
7(e.g. from GIF image format to PPM image format). Maybe the
8conversion involves several steps (e.g. piping it through compress or
9uuencode). Some of the conversion steps may require that their input
10is a disk file, others may be able to read standard input; similar for
11their output. The input to the entire conversion may also be read
12from a disk file or from an open file, and similar for its output.
13
14The module lets you construct a pipeline template by sticking one or
15more conversion steps together. It will take care of creating and
16removing temporary files if they are necessary to hold intermediate
17data. You can then use the template to do conversions from many
18different sources to many different destinations. The temporary
19file names used are different each time the template is used.
20
21The templates are objects so you can create templates for many
22different conversion steps and store them in a dictionary, for
23instance.
Guido van Rossum2db91351992-10-18 17:09:59 +000024
25
Guido van Rossum54f22ed2000-02-04 15:10:34 +000026Directions:
27-----------
Guido van Rossum2db91351992-10-18 17:09:59 +000028
Guido van Rossum54f22ed2000-02-04 15:10:34 +000029To create a template:
30 t = Template()
Guido van Rossum2db91351992-10-18 17:09:59 +000031
Guido van Rossum54f22ed2000-02-04 15:10:34 +000032To add a conversion step to a template:
33 t.append(command, kind)
34where kind is a string of two characters: the first is '-' if the
35command reads its standard input or 'f' if it requires a file; the
36second likewise for the output. The command must be valid /bin/sh
37syntax. If input or output files are required, they are passed as
38$IN and $OUT; otherwise, it must be possible to use the command in
39a pipeline.
40
41To add a conversion step at the beginning:
42 t.prepend(command, kind)
43
44To convert a file to another file using a template:
45 sts = t.copy(infile, outfile)
46If infile or outfile are the empty string, standard input is read or
47standard output is written, respectively. The return value is the
48exit status of the conversion pipeline.
49
50To open a file for reading or writing through a conversion pipeline:
51 fp = t.open(file, mode)
52where mode is 'r' to read the file, or 'w' to write it -- just like
53for the built-in function open() or for os.popen().
54
55To create a new template object initialized to a given one:
56 t2 = t.clone()
57
58For an example, see the function test() at the end of the file.
Skip Montanaro352674d2001-02-07 23:14:30 +000059""" # '
Guido van Rossum2db91351992-10-18 17:09:59 +000060
61
62import sys
Guido van Rossum9694fca1997-10-22 21:00:49 +000063import re
Guido van Rossum2db91351992-10-18 17:09:59 +000064
65import os
66import tempfile
67import string
68
Skip Montanaro352674d2001-02-07 23:14:30 +000069__all__ = ["Template"]
Guido van Rossum2db91351992-10-18 17:09:59 +000070
71# Conversion step kinds
72
Tim Peters2344fae2001-01-15 00:50:52 +000073FILEIN_FILEOUT = 'ff' # Must read & write real files
74STDIN_FILEOUT = '-f' # Must write a real file
75FILEIN_STDOUT = 'f-' # Must read a real file
76STDIN_STDOUT = '--' # Normal pipeline element
77SOURCE = '.-' # Must be first, writes stdout
78SINK = '-.' # Must be last, reads stdin
Guido van Rossum2db91351992-10-18 17:09:59 +000079
80stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
Tim Peters2344fae2001-01-15 00:50:52 +000081 SOURCE, SINK]
Guido van Rossum2db91351992-10-18 17:09:59 +000082
83
Guido van Rossum2db91351992-10-18 17:09:59 +000084class Template:
Tim Peters2344fae2001-01-15 00:50:52 +000085 """Class representing a pipeline template."""
Guido van Rossum2db91351992-10-18 17:09:59 +000086
Tim Peters2344fae2001-01-15 00:50:52 +000087 def __init__(self):
88 """Template() returns a fresh pipeline template."""
89 self.debugging = 0
90 self.reset()
Guido van Rossum2db91351992-10-18 17:09:59 +000091
Tim Peters2344fae2001-01-15 00:50:52 +000092 def __repr__(self):
93 """t.__repr__() implements `t`."""
94 return '<Template instance, steps=' + `self.steps` + '>'
Guido van Rossum2db91351992-10-18 17:09:59 +000095
Tim Peters2344fae2001-01-15 00:50:52 +000096 def reset(self):
97 """t.reset() restores a pipeline template to its initial state."""
98 self.steps = []
Guido van Rossum2db91351992-10-18 17:09:59 +000099
Tim Peters2344fae2001-01-15 00:50:52 +0000100 def clone(self):
101 """t.clone() returns a new pipeline template with identical
102 initial state as the current one."""
103 t = Template()
104 t.steps = self.steps[:]
105 t.debugging = self.debugging
106 return t
Guido van Rossum2db91351992-10-18 17:09:59 +0000107
Tim Peters2344fae2001-01-15 00:50:52 +0000108 def debug(self, flag):
109 """t.debug(flag) turns debugging on or off."""
110 self.debugging = flag
Guido van Rossum2db91351992-10-18 17:09:59 +0000111
Tim Peters2344fae2001-01-15 00:50:52 +0000112 def append(self, cmd, kind):
113 """t.append(cmd, kind) adds a new step at the end."""
114 if type(cmd) is not type(''):
115 raise TypeError, \
116 'Template.append: cmd must be a string'
117 if kind not in stepkinds:
118 raise ValueError, \
119 'Template.append: bad kind ' + `kind`
120 if kind == SOURCE:
121 raise ValueError, \
122 'Template.append: SOURCE can only be prepended'
123 if self.steps and self.steps[-1][1] == SINK:
124 raise ValueError, \
125 'Template.append: already ends with SINK'
126 if kind[0] == 'f' and not re.search('\$IN\b', cmd):
127 raise ValueError, \
128 'Template.append: missing $IN in cmd'
129 if kind[1] == 'f' and not re.search('\$OUT\b', cmd):
130 raise ValueError, \
131 'Template.append: missing $OUT in cmd'
132 self.steps.append((cmd, kind))
Guido van Rossum2db91351992-10-18 17:09:59 +0000133
Tim Peters2344fae2001-01-15 00:50:52 +0000134 def prepend(self, cmd, kind):
135 """t.prepend(cmd, kind) adds a new step at the front."""
136 if type(cmd) is not type(''):
137 raise TypeError, \
138 'Template.prepend: cmd must be a string'
139 if kind not in stepkinds:
140 raise ValueError, \
141 'Template.prepend: bad kind ' + `kind`
142 if kind == SINK:
143 raise ValueError, \
144 'Template.prepend: SINK can only be appended'
145 if self.steps and self.steps[0][1] == SOURCE:
146 raise ValueError, \
147 'Template.prepend: already begins with SOURCE'
148 if kind[0] == 'f' and not re.search('\$IN\b', cmd):
149 raise ValueError, \
150 'Template.prepend: missing $IN in cmd'
151 if kind[1] == 'f' and not re.search('\$OUT\b', cmd):
152 raise ValueError, \
153 'Template.prepend: missing $OUT in cmd'
154 self.steps.insert(0, (cmd, kind))
Guido van Rossum2db91351992-10-18 17:09:59 +0000155
Tim Peters2344fae2001-01-15 00:50:52 +0000156 def open(self, file, rw):
157 """t.open(file, rw) returns a pipe or file object open for
158 reading or writing; the file is the other end of the pipeline."""
159 if rw == 'r':
160 return self.open_r(file)
161 if rw == 'w':
162 return self.open_w(file)
163 raise ValueError, \
164 'Template.open: rw must be \'r\' or \'w\', not ' + `rw`
Guido van Rossum2db91351992-10-18 17:09:59 +0000165
Tim Peters2344fae2001-01-15 00:50:52 +0000166 def open_r(self, file):
167 """t.open_r(file) and t.open_w(file) implement
168 t.open(file, 'r') and t.open(file, 'w') respectively."""
169 if not self.steps:
170 return open(file, 'r')
171 if self.steps[-1][1] == SINK:
172 raise ValueError, \
173 'Template.open_r: pipeline ends width SINK'
174 cmd = self.makepipeline(file, '')
175 return os.popen(cmd, 'r')
Guido van Rossum2db91351992-10-18 17:09:59 +0000176
Tim Peters2344fae2001-01-15 00:50:52 +0000177 def open_w(self, file):
178 if not self.steps:
179 return open(file, 'w')
180 if self.steps[0][1] == SOURCE:
181 raise ValueError, \
182 'Template.open_w: pipeline begins with SOURCE'
183 cmd = self.makepipeline('', file)
184 return os.popen(cmd, 'w')
Guido van Rossum2db91351992-10-18 17:09:59 +0000185
Tim Peters2344fae2001-01-15 00:50:52 +0000186 def copy(self, infile, outfile):
187 return os.system(self.makepipeline(infile, outfile))
Guido van Rossum2db91351992-10-18 17:09:59 +0000188
Tim Peters2344fae2001-01-15 00:50:52 +0000189 def makepipeline(self, infile, outfile):
190 cmd = makepipeline(infile, self.steps, outfile)
191 if self.debugging:
192 print cmd
193 cmd = 'set -x; ' + cmd
194 return cmd
Guido van Rossum2db91351992-10-18 17:09:59 +0000195
196
197def makepipeline(infile, steps, outfile):
Tim Peters2344fae2001-01-15 00:50:52 +0000198 # Build a list with for each command:
199 # [input filename or '', command string, kind, output filename or '']
200
201 list = []
202 for cmd, kind in steps:
203 list.append(['', cmd, kind, ''])
204 #
205 # Make sure there is at least one step
206 #
207 if not list:
208 list.append(['', 'cat', '--', ''])
209 #
210 # Take care of the input and output ends
211 #
212 [cmd, kind] = list[0][1:3]
213 if kind[0] == 'f' and not infile:
214 list.insert(0, ['', 'cat', '--', ''])
215 list[0][0] = infile
216 #
217 [cmd, kind] = list[-1][1:3]
218 if kind[1] == 'f' and not outfile:
219 list.append(['', 'cat', '--', ''])
220 list[-1][-1] = outfile
221 #
222 # Invent temporary files to connect stages that need files
223 #
224 garbage = []
225 for i in range(1, len(list)):
226 lkind = list[i-1][2]
227 rkind = list[i][2]
228 if lkind[1] == 'f' or rkind[0] == 'f':
229 temp = tempfile.mktemp()
230 garbage.append(temp)
231 list[i-1][-1] = list[i][0] = temp
232 #
233 for item in list:
234 [inf, cmd, kind, outf] = item
235 if kind[1] == 'f':
236 cmd = 'OUT=' + quote(outf) + '; ' + cmd
237 if kind[0] == 'f':
238 cmd = 'IN=' + quote(inf) + '; ' + cmd
239 if kind[0] == '-' and inf:
240 cmd = cmd + ' <' + quote(inf)
241 if kind[1] == '-' and outf:
242 cmd = cmd + ' >' + quote(outf)
243 item[1] = cmd
244 #
245 cmdlist = list[0][1]
246 for item in list[1:]:
247 [cmd, kind] = item[1:3]
248 if item[0] == '':
249 if 'f' in kind:
250 cmd = '{ ' + cmd + '; }'
251 cmdlist = cmdlist + ' |\n' + cmd
252 else:
253 cmdlist = cmdlist + '\n' + cmd
254 #
255 if garbage:
256 rmcmd = 'rm -f'
257 for file in garbage:
258 rmcmd = rmcmd + ' ' + quote(file)
259 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
260 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
261 #
262 return cmdlist
Guido van Rossum2db91351992-10-18 17:09:59 +0000263
264
265# Reliably quote a string as a single argument for /bin/sh
266
Tim Peters2344fae2001-01-15 00:50:52 +0000267_safechars = string.letters + string.digits + '!@%_-+=:,./' # Safe unquoted
268_funnychars = '"`$\\' # Unsafe inside "double quotes"
Guido van Rossum2db91351992-10-18 17:09:59 +0000269
270def quote(file):
Tim Peters2344fae2001-01-15 00:50:52 +0000271 for c in file:
272 if c not in _safechars:
273 break
274 else:
275 return file
276 if '\'' not in file:
277 return '\'' + file + '\''
278 res = ''
279 for c in file:
280 if c in _funnychars:
281 c = '\\' + c
282 res = res + c
283 return '"' + res + '"'
Guido van Rossum2db91351992-10-18 17:09:59 +0000284
285
286# Small test program and example
287
288def test():
Tim Peters2344fae2001-01-15 00:50:52 +0000289 print 'Testing...'
290 t = Template()
291 t.append('togif $IN $OUT', 'ff')
292 t.append('giftoppm', '--')
293 t.append('ppmtogif >$OUT', '-f')
294 t.append('fromgif $IN $OUT', 'ff')
295 t.debug(1)
296 FILE = '/usr/local/images/rgb/rogues/guido.rgb'
297 t.copy(FILE, '@temp')
298 print 'Done.'