blob: 2bb6ee317ab19f5ecb7ed57132e9701b183887c7 [file] [log] [blame]
Guido van Rossum2db91351992-10-18 17:09:59 +00001# Conversion pipeline templates
2# =============================
3
4
5# The problem:
6# ------------
7#
8# Suppose you have some data that you want to convert to another format
9# (e.g. from GIF image format to PPM image format). Maybe the
10# conversion involves several steps (e.g. piping it through compress or
11# uuencode). Some of the conversion steps may require that their input
12# is a disk file, others may be able to read standard input; similar for
13# their output. The input to the entire conversion may also be read
14# from a disk file or from an open file, and similar for its output.
15#
16# The module lets you construct a pipeline template by sticking one or
17# more conversion steps together. It will take care of creating and
18# removing temporary files if they are necessary to hold intermediate
19# data. You can then use the template to do conversions from many
20# different sources to many different destinations. The temporary
21# file names used are different each time the template is used.
22#
23# The templates are objects so you can create templates for many
24# different conversion steps and store them in a dictionary, for
25# instance.
26
27
28# Directions:
29# -----------
30#
31# To create a template:
Guido van Rossum7bc817d1993-12-17 15:25:27 +000032# t = Template()
Guido van Rossum2db91351992-10-18 17:09:59 +000033#
34# To add a conversion step to a template:
35# t.append(command, kind)
36# where kind is a string of two characters: the first is '-' if the
37# command reads its standard input or 'f' if it requires a file; the
38# second likewise for the output. The command must be valid /bin/sh
39# syntax. If input or output files are required, they are passed as
40# $IN and $OUT; otherwise, it must be possible to use the command in
41# a pipeline.
42#
43# To add a conversion step at the beginning:
44# t.prepend(command, kind)
45#
46# To convert a file to another file using a template:
47# sts = t.copy(infile, outfile)
48# If infile or outfile are the empty string, standard input is read or
49# standard output is written, respectively. The return value is the
50# exit status of the conversion pipeline.
51#
52# To open a file for reading or writing through a conversion pipeline:
53# fp = t.open(file, mode)
54# where mode is 'r' to read the file, or 'w' to write it -- just like
55# for the built-in function open() or for os.popen().
56#
57# To create a new template object initialized to a given one:
58# t2 = t.clone()
59#
60# For an example, see the function test() at the end of the file.
61
62
63import sys
Guido van Rossum9694fca1997-10-22 21:00:49 +000064import re
Guido van Rossum2db91351992-10-18 17:09:59 +000065
66import os
67import tempfile
68import string
69
70
71# Conversion step kinds
72
73FILEIN_FILEOUT = 'ff' # Must read & write real files
74STDIN_FILEOUT = '-f' # Must write a real file
75FILEIN_STDOUT = 'f-' # Must read a real file
76STDIN_STDOUT = '--' # Normal pipeline element
77SOURCE = '.-' # Must be first, writes stdout
78SINK = '-.' # Must be last, reads stdin
79
80stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
81 SOURCE, SINK]
82
83
84# A pipeline template is a Template object:
85
86class Template:
87
Guido van Rossum7bc817d1993-12-17 15:25:27 +000088 # Template() returns a fresh pipeline template
89 def __init__(self):
Guido van Rossum2db91351992-10-18 17:09:59 +000090 self.debugging = 0
91 self.reset()
Guido van Rossum2db91351992-10-18 17:09:59 +000092
93 # t.__repr__() implements `t`
94 def __repr__(self):
95 return '<Template instance, steps=' + `self.steps` + '>'
96
97 # t.reset() restores a pipeline template to its initial state
98 def reset(self):
99 self.steps = []
100
101 # t.clone() returns a new pipeline template with identical
102 # initial state as the current one
103 def clone(self):
Guido van Rossum7bc817d1993-12-17 15:25:27 +0000104 t = Template()
Guido van Rossum2db91351992-10-18 17:09:59 +0000105 t.steps = self.steps[:]
106 t.debugging = self.debugging
107 return t
108
109 # t.debug(flag) turns debugging on or off
110 def debug(self, flag):
111 self.debugging = flag
112
113 # t.append(cmd, kind) adds a new step at the end
114 def append(self, cmd, kind):
115 if type(cmd) <> type(''):
116 raise TypeError, \
117 'Template.append: cmd must be a string'
118 if kind not in stepkinds:
119 raise ValueError, \
120 'Template.append: bad kind ' + `kind`
121 if kind == SOURCE:
122 raise ValueError, \
123 'Template.append: SOURCE can only be prepended'
124 if self.steps <> [] and self.steps[-1][1] == SINK:
125 raise ValueError, \
126 'Template.append: already ends with SINK'
Guido van Rossum9694fca1997-10-22 21:00:49 +0000127 if kind[0] == 'f' and not re.search('\$IN\b', cmd):
Guido van Rossum2db91351992-10-18 17:09:59 +0000128 raise ValueError, \
129 'Template.append: missing $IN in cmd'
Guido van Rossum9694fca1997-10-22 21:00:49 +0000130 if kind[1] == 'f' and not re.search('\$OUT\b', cmd):
Guido van Rossum2db91351992-10-18 17:09:59 +0000131 raise ValueError, \
132 'Template.append: missing $OUT in cmd'
133 self.steps.append((cmd, kind))
134
135 # t.prepend(cmd, kind) adds a new step at the front
136 def prepend(self, cmd, kind):
137 if type(cmd) <> type(''):
138 raise TypeError, \
139 'Template.prepend: cmd must be a string'
140 if kind not in stepkinds:
141 raise ValueError, \
142 'Template.prepend: bad kind ' + `kind`
143 if kind == SINK:
144 raise ValueError, \
145 'Template.prepend: SINK can only be appended'
146 if self.steps <> [] and self.steps[0][1] == SOURCE:
147 raise ValueError, \
148 'Template.prepend: already begins with SOURCE'
Guido van Rossum9694fca1997-10-22 21:00:49 +0000149 if kind[0] == 'f' and not re.search('\$IN\b', cmd):
Guido van Rossum2db91351992-10-18 17:09:59 +0000150 raise ValueError, \
151 'Template.prepend: missing $IN in cmd'
Guido van Rossum9694fca1997-10-22 21:00:49 +0000152 if kind[1] == 'f' and not re.search('\$OUT\b', cmd):
Guido van Rossum2db91351992-10-18 17:09:59 +0000153 raise ValueError, \
154 'Template.prepend: missing $OUT in cmd'
155 self.steps.insert(0, (cmd, kind))
156
157 # t.open(file, rw) returns a pipe or file object open for
158 # reading or writing; the file is the other end of the pipeline
159 def open(self, file, rw):
160 if rw == 'r':
161 return self.open_r(file)
162 if rw == 'w':
163 return self.open_w(file)
164 raise ValueError, \
165 'Template.open: rw must be \'r\' or \'w\', not ' + `rw`
166
167 # t.open_r(file) and t.open_w(file) implement
168 # t.open(file, 'r') and t.open(file, 'w') respectively
169
170 def open_r(self, file):
171 if self.steps == []:
172 return open(file, 'r')
173 if self.steps[-1][1] == SINK:
174 raise ValueError, \
175 'Template.open_r: pipeline ends width SINK'
176 cmd = self.makepipeline(file, '')
177 return os.popen(cmd, 'r')
178
179 def open_w(self, file):
180 if self.steps == []:
181 return open(file, 'w')
182 if self.steps[0][1] == SOURCE:
183 raise ValueError, \
184 'Template.open_w: pipeline begins with SOURCE'
185 cmd = self.makepipeline('', file)
186 return os.popen(cmd, 'w')
187
188 def copy(self, infile, outfile):
189 return os.system(self.makepipeline(infile, outfile))
190
191 def makepipeline(self, infile, outfile):
192 cmd = makepipeline(infile, self.steps, outfile)
193 if self.debugging:
194 print cmd
195 cmd = 'set -x; ' + cmd
196 return cmd
197
198
199def makepipeline(infile, steps, outfile):
200 # Build a list with for each command:
201 # [input filename or '', command string, kind, output filename or '']
202
203 list = []
204 for cmd, kind in steps:
205 list.append(['', cmd, kind, ''])
206 #
207 # Make sure there is at least one step
208 #
209 if list == []:
210 list.append(['', 'cat', '--', ''])
211 #
212 # Take care of the input and output ends
213 #
214 [cmd, kind] = list[0][1:3]
215 if kind[0] == 'f' and not infile:
216 list.insert(0, ['', 'cat', '--', ''])
217 list[0][0] = infile
218 #
219 [cmd, kind] = list[-1][1:3]
220 if kind[1] == 'f' and not outfile:
221 list.append(['', 'cat', '--', ''])
222 list[-1][-1] = outfile
223 #
224 # Invent temporary files to connect stages that need files
225 #
226 garbage = []
227 for i in range(1, len(list)):
228 lkind = list[i-1][2]
229 rkind = list[i][2]
230 if lkind[1] == 'f' or rkind[0] == 'f':
231 temp = tempfile.mktemp()
232 garbage.append(temp)
233 list[i-1][-1] = list[i][0] = temp
234 #
235 for item in list:
236 [inf, cmd, kind, outf] = item
237 if kind[1] == 'f':
238 cmd = 'OUT=' + quote(outf) + '; ' + cmd
239 if kind[0] == 'f':
240 cmd = 'IN=' + quote(inf) + '; ' + cmd
241 if kind[0] == '-' and inf:
242 cmd = cmd + ' <' + quote(inf)
243 if kind[1] == '-' and outf:
244 cmd = cmd + ' >' + quote(outf)
245 item[1] = cmd
246 #
247 cmdlist = list[0][1]
248 for item in list[1:]:
249 [cmd, kind] = item[1:3]
250 if item[0] == '':
251 if 'f' in kind:
252 cmd = '{ ' + cmd + '; }'
253 cmdlist = cmdlist + ' |\n' + cmd
254 else:
255 cmdlist = cmdlist + '\n' + cmd
256 #
257 if garbage:
258 rmcmd = 'rm -f'
259 for file in garbage:
260 rmcmd = rmcmd + ' ' + quote(file)
261 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
262 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
263 #
264 return cmdlist
265
266
267# Reliably quote a string as a single argument for /bin/sh
268
269_safechars = string.letters + string.digits + '!@%_-+=:,./' # Safe unquoted
270_funnychars = '"`$\\' # Unsafe inside "double quotes"
271
272def quote(file):
273 for c in file:
274 if c not in _safechars:
275 break
276 else:
277 return file
278 if '\'' not in file:
279 return '\'' + file + '\''
280 res = ''
281 for c in file:
282 if c in _funnychars:
283 c = '\\' + c
284 res = res + c
285 return '"' + res + '"'
286
287
288# Small test program and example
289
290def test():
291 import os
292 print 'Testing...'
Guido van Rossum7bc817d1993-12-17 15:25:27 +0000293 t = Template()
Guido van Rossum2db91351992-10-18 17:09:59 +0000294 t.append('togif $IN $OUT', 'ff')
295 t.append('giftoppm', '--')
296 t.append('ppmtogif >$OUT', '-f')
297 t.append('fromgif $IN $OUT', 'ff')
298 t.debug(1)
299 FILE = '/usr/local/images/rgb/rogues/guido.rgb'
300 t.copy(FILE, '@temp')
301 print 'Done.'