blob: 426c3774722f84eaf0b6aa78ad8daf123f30703f [file] [log] [blame]
Guido van Rossum2db91351992-10-18 17:09:59 +00001# Conversion pipeline templates
2# =============================
3
4
5# The problem:
6# ------------
7#
8# Suppose you have some data that you want to convert to another format
9# (e.g. from GIF image format to PPM image format). Maybe the
10# conversion involves several steps (e.g. piping it through compress or
11# uuencode). Some of the conversion steps may require that their input
12# is a disk file, others may be able to read standard input; similar for
13# their output. The input to the entire conversion may also be read
14# from a disk file or from an open file, and similar for its output.
15#
16# The module lets you construct a pipeline template by sticking one or
17# more conversion steps together. It will take care of creating and
18# removing temporary files if they are necessary to hold intermediate
19# data. You can then use the template to do conversions from many
20# different sources to many different destinations. The temporary
21# file names used are different each time the template is used.
22#
23# The templates are objects so you can create templates for many
24# different conversion steps and store them in a dictionary, for
25# instance.
26
27
28# Directions:
29# -----------
30#
31# To create a template:
32# t = Template().init()
33#
34# To add a conversion step to a template:
35# t.append(command, kind)
36# where kind is a string of two characters: the first is '-' if the
37# command reads its standard input or 'f' if it requires a file; the
38# second likewise for the output. The command must be valid /bin/sh
39# syntax. If input or output files are required, they are passed as
40# $IN and $OUT; otherwise, it must be possible to use the command in
41# a pipeline.
42#
43# To add a conversion step at the beginning:
44# t.prepend(command, kind)
45#
46# To convert a file to another file using a template:
47# sts = t.copy(infile, outfile)
48# If infile or outfile are the empty string, standard input is read or
49# standard output is written, respectively. The return value is the
50# exit status of the conversion pipeline.
51#
52# To open a file for reading or writing through a conversion pipeline:
53# fp = t.open(file, mode)
54# where mode is 'r' to read the file, or 'w' to write it -- just like
55# for the built-in function open() or for os.popen().
56#
57# To create a new template object initialized to a given one:
58# t2 = t.clone()
59#
60# For an example, see the function test() at the end of the file.
61
62
63import sys
64import regex
65
66import os
67import tempfile
68import string
69
70
71# Conversion step kinds
72
73FILEIN_FILEOUT = 'ff' # Must read & write real files
74STDIN_FILEOUT = '-f' # Must write a real file
75FILEIN_STDOUT = 'f-' # Must read a real file
76STDIN_STDOUT = '--' # Normal pipeline element
77SOURCE = '.-' # Must be first, writes stdout
78SINK = '-.' # Must be last, reads stdin
79
80stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
81 SOURCE, SINK]
82
83
84# A pipeline template is a Template object:
85
86class Template:
87
88 # Template().init() returns a fresh pipeline template
89 def init(self):
90 self.debugging = 0
91 self.reset()
92 return self
93
94 # t.__repr__() implements `t`
95 def __repr__(self):
96 return '<Template instance, steps=' + `self.steps` + '>'
97
98 # t.reset() restores a pipeline template to its initial state
99 def reset(self):
100 self.steps = []
101
102 # t.clone() returns a new pipeline template with identical
103 # initial state as the current one
104 def clone(self):
105 t = Template().init()
106 t.steps = self.steps[:]
107 t.debugging = self.debugging
108 return t
109
110 # t.debug(flag) turns debugging on or off
111 def debug(self, flag):
112 self.debugging = flag
113
114 # t.append(cmd, kind) adds a new step at the end
115 def append(self, cmd, kind):
116 if type(cmd) <> type(''):
117 raise TypeError, \
118 'Template.append: cmd must be a string'
119 if kind not in stepkinds:
120 raise ValueError, \
121 'Template.append: bad kind ' + `kind`
122 if kind == SOURCE:
123 raise ValueError, \
124 'Template.append: SOURCE can only be prepended'
125 if self.steps <> [] and self.steps[-1][1] == SINK:
126 raise ValueError, \
127 'Template.append: already ends with SINK'
128 if kind[0] == 'f' and regex.search('\$IN', cmd) < 0:
129 raise ValueError, \
130 'Template.append: missing $IN in cmd'
131 if kind[1] == 'f' and regex.search('\$OUT', cmd) < 0:
132 raise ValueError, \
133 'Template.append: missing $OUT in cmd'
134 self.steps.append((cmd, kind))
135
136 # t.prepend(cmd, kind) adds a new step at the front
137 def prepend(self, cmd, kind):
138 if type(cmd) <> type(''):
139 raise TypeError, \
140 'Template.prepend: cmd must be a string'
141 if kind not in stepkinds:
142 raise ValueError, \
143 'Template.prepend: bad kind ' + `kind`
144 if kind == SINK:
145 raise ValueError, \
146 'Template.prepend: SINK can only be appended'
147 if self.steps <> [] and self.steps[0][1] == SOURCE:
148 raise ValueError, \
149 'Template.prepend: already begins with SOURCE'
150 if kind[0] == 'f' and regex.search('\$IN\>', cmd) < 0:
151 raise ValueError, \
152 'Template.prepend: missing $IN in cmd'
153 if kind[1] == 'f' and regex.search('\$OUT\>', cmd) < 0:
154 raise ValueError, \
155 'Template.prepend: missing $OUT in cmd'
156 self.steps.insert(0, (cmd, kind))
157
158 # t.open(file, rw) returns a pipe or file object open for
159 # reading or writing; the file is the other end of the pipeline
160 def open(self, file, rw):
161 if rw == 'r':
162 return self.open_r(file)
163 if rw == 'w':
164 return self.open_w(file)
165 raise ValueError, \
166 'Template.open: rw must be \'r\' or \'w\', not ' + `rw`
167
168 # t.open_r(file) and t.open_w(file) implement
169 # t.open(file, 'r') and t.open(file, 'w') respectively
170
171 def open_r(self, file):
172 if self.steps == []:
173 return open(file, 'r')
174 if self.steps[-1][1] == SINK:
175 raise ValueError, \
176 'Template.open_r: pipeline ends width SINK'
177 cmd = self.makepipeline(file, '')
178 return os.popen(cmd, 'r')
179
180 def open_w(self, file):
181 if self.steps == []:
182 return open(file, 'w')
183 if self.steps[0][1] == SOURCE:
184 raise ValueError, \
185 'Template.open_w: pipeline begins with SOURCE'
186 cmd = self.makepipeline('', file)
187 return os.popen(cmd, 'w')
188
189 def copy(self, infile, outfile):
190 return os.system(self.makepipeline(infile, outfile))
191
192 def makepipeline(self, infile, outfile):
193 cmd = makepipeline(infile, self.steps, outfile)
194 if self.debugging:
195 print cmd
196 cmd = 'set -x; ' + cmd
197 return cmd
198
199
200def makepipeline(infile, steps, outfile):
201 # Build a list with for each command:
202 # [input filename or '', command string, kind, output filename or '']
203
204 list = []
205 for cmd, kind in steps:
206 list.append(['', cmd, kind, ''])
207 #
208 # Make sure there is at least one step
209 #
210 if list == []:
211 list.append(['', 'cat', '--', ''])
212 #
213 # Take care of the input and output ends
214 #
215 [cmd, kind] = list[0][1:3]
216 if kind[0] == 'f' and not infile:
217 list.insert(0, ['', 'cat', '--', ''])
218 list[0][0] = infile
219 #
220 [cmd, kind] = list[-1][1:3]
221 if kind[1] == 'f' and not outfile:
222 list.append(['', 'cat', '--', ''])
223 list[-1][-1] = outfile
224 #
225 # Invent temporary files to connect stages that need files
226 #
227 garbage = []
228 for i in range(1, len(list)):
229 lkind = list[i-1][2]
230 rkind = list[i][2]
231 if lkind[1] == 'f' or rkind[0] == 'f':
232 temp = tempfile.mktemp()
233 garbage.append(temp)
234 list[i-1][-1] = list[i][0] = temp
235 #
236 for item in list:
237 [inf, cmd, kind, outf] = item
238 if kind[1] == 'f':
239 cmd = 'OUT=' + quote(outf) + '; ' + cmd
240 if kind[0] == 'f':
241 cmd = 'IN=' + quote(inf) + '; ' + cmd
242 if kind[0] == '-' and inf:
243 cmd = cmd + ' <' + quote(inf)
244 if kind[1] == '-' and outf:
245 cmd = cmd + ' >' + quote(outf)
246 item[1] = cmd
247 #
248 cmdlist = list[0][1]
249 for item in list[1:]:
250 [cmd, kind] = item[1:3]
251 if item[0] == '':
252 if 'f' in kind:
253 cmd = '{ ' + cmd + '; }'
254 cmdlist = cmdlist + ' |\n' + cmd
255 else:
256 cmdlist = cmdlist + '\n' + cmd
257 #
258 if garbage:
259 rmcmd = 'rm -f'
260 for file in garbage:
261 rmcmd = rmcmd + ' ' + quote(file)
262 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
263 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
264 #
265 return cmdlist
266
267
268# Reliably quote a string as a single argument for /bin/sh
269
270_safechars = string.letters + string.digits + '!@%_-+=:,./' # Safe unquoted
271_funnychars = '"`$\\' # Unsafe inside "double quotes"
272
273def quote(file):
274 for c in file:
275 if c not in _safechars:
276 break
277 else:
278 return file
279 if '\'' not in file:
280 return '\'' + file + '\''
281 res = ''
282 for c in file:
283 if c in _funnychars:
284 c = '\\' + c
285 res = res + c
286 return '"' + res + '"'
287
288
289# Small test program and example
290
291def test():
292 import os
293 print 'Testing...'
294 t = Template().init()
295 t.append('togif $IN $OUT', 'ff')
296 t.append('giftoppm', '--')
297 t.append('ppmtogif >$OUT', '-f')
298 t.append('fromgif $IN $OUT', 'ff')
299 t.debug(1)
300 FILE = '/usr/local/images/rgb/rogues/guido.rgb'
301 t.copy(FILE, '@temp')
302 print 'Done.'