blob: 6dcc997b4c7327471116aa56a186ebdd482c8c16 [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Conversion pipeline templates.
2
3The problem:
4------------
5
Fred Drake0f715d22001-07-20 18:53:34 +00006Suppose you have some data that you want to convert to another format,
7such as from GIF image format to PPM image format. Maybe the
Guido van Rossum54f22ed2000-02-04 15:10:34 +00008conversion involves several steps (e.g. piping it through compress or
9uuencode). Some of the conversion steps may require that their input
10is a disk file, others may be able to read standard input; similar for
11their output. The input to the entire conversion may also be read
12from a disk file or from an open file, and similar for its output.
13
14The module lets you construct a pipeline template by sticking one or
15more conversion steps together. It will take care of creating and
16removing temporary files if they are necessary to hold intermediate
17data. You can then use the template to do conversions from many
18different sources to many different destinations. The temporary
19file names used are different each time the template is used.
20
21The templates are objects so you can create templates for many
22different conversion steps and store them in a dictionary, for
23instance.
Guido van Rossum2db91351992-10-18 17:09:59 +000024
25
Guido van Rossum54f22ed2000-02-04 15:10:34 +000026Directions:
27-----------
Guido van Rossum2db91351992-10-18 17:09:59 +000028
Guido van Rossum54f22ed2000-02-04 15:10:34 +000029To create a template:
30 t = Template()
Guido van Rossum2db91351992-10-18 17:09:59 +000031
Guido van Rossum54f22ed2000-02-04 15:10:34 +000032To add a conversion step to a template:
33 t.append(command, kind)
34where kind is a string of two characters: the first is '-' if the
35command reads its standard input or 'f' if it requires a file; the
36second likewise for the output. The command must be valid /bin/sh
37syntax. If input or output files are required, they are passed as
38$IN and $OUT; otherwise, it must be possible to use the command in
39a pipeline.
40
41To add a conversion step at the beginning:
42 t.prepend(command, kind)
43
44To convert a file to another file using a template:
45 sts = t.copy(infile, outfile)
46If infile or outfile are the empty string, standard input is read or
47standard output is written, respectively. The return value is the
48exit status of the conversion pipeline.
49
50To open a file for reading or writing through a conversion pipeline:
51 fp = t.open(file, mode)
52where mode is 'r' to read the file, or 'w' to write it -- just like
53for the built-in function open() or for os.popen().
54
55To create a new template object initialized to a given one:
56 t2 = t.clone()
57
58For an example, see the function test() at the end of the file.
Skip Montanaro352674d2001-02-07 23:14:30 +000059""" # '
Guido van Rossum2db91351992-10-18 17:09:59 +000060
61
Guido van Rossum9694fca1997-10-22 21:00:49 +000062import re
Guido van Rossum2db91351992-10-18 17:09:59 +000063import os
64import tempfile
65import string
66
Skip Montanaro352674d2001-02-07 23:14:30 +000067__all__ = ["Template"]
Guido van Rossum2db91351992-10-18 17:09:59 +000068
69# Conversion step kinds
70
Tim Peters2344fae2001-01-15 00:50:52 +000071FILEIN_FILEOUT = 'ff' # Must read & write real files
72STDIN_FILEOUT = '-f' # Must write a real file
73FILEIN_STDOUT = 'f-' # Must read a real file
74STDIN_STDOUT = '--' # Normal pipeline element
75SOURCE = '.-' # Must be first, writes stdout
76SINK = '-.' # Must be last, reads stdin
Guido van Rossum2db91351992-10-18 17:09:59 +000077
78stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
Tim Peters2344fae2001-01-15 00:50:52 +000079 SOURCE, SINK]
Guido van Rossum2db91351992-10-18 17:09:59 +000080
81
Guido van Rossum2db91351992-10-18 17:09:59 +000082class Template:
Tim Peters2344fae2001-01-15 00:50:52 +000083 """Class representing a pipeline template."""
Guido van Rossum2db91351992-10-18 17:09:59 +000084
Tim Peters2344fae2001-01-15 00:50:52 +000085 def __init__(self):
86 """Template() returns a fresh pipeline template."""
87 self.debugging = 0
88 self.reset()
Guido van Rossum2db91351992-10-18 17:09:59 +000089
Tim Peters2344fae2001-01-15 00:50:52 +000090 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +000091 """t.__repr__() implements repr(t)."""
92 return '<Template instance, steps=%r>' % (self.steps,)
Guido van Rossum2db91351992-10-18 17:09:59 +000093
Tim Peters2344fae2001-01-15 00:50:52 +000094 def reset(self):
95 """t.reset() restores a pipeline template to its initial state."""
96 self.steps = []
Guido van Rossum2db91351992-10-18 17:09:59 +000097
Tim Peters2344fae2001-01-15 00:50:52 +000098 def clone(self):
99 """t.clone() returns a new pipeline template with identical
100 initial state as the current one."""
101 t = Template()
102 t.steps = self.steps[:]
103 t.debugging = self.debugging
104 return t
Guido van Rossum2db91351992-10-18 17:09:59 +0000105
Tim Peters2344fae2001-01-15 00:50:52 +0000106 def debug(self, flag):
107 """t.debug(flag) turns debugging on or off."""
108 self.debugging = flag
Guido van Rossum2db91351992-10-18 17:09:59 +0000109
Tim Peters2344fae2001-01-15 00:50:52 +0000110 def append(self, cmd, kind):
111 """t.append(cmd, kind) adds a new step at the end."""
112 if type(cmd) is not type(''):
113 raise TypeError, \
114 'Template.append: cmd must be a string'
115 if kind not in stepkinds:
116 raise ValueError, \
Walter Dörwald70a6b492004-02-12 17:35:32 +0000117 'Template.append: bad kind %r' % (kind,)
Tim Peters2344fae2001-01-15 00:50:52 +0000118 if kind == SOURCE:
119 raise ValueError, \
120 'Template.append: SOURCE can only be prepended'
121 if self.steps and self.steps[-1][1] == SINK:
122 raise ValueError, \
123 'Template.append: already ends with SINK'
Tim Petersc09cee42001-04-25 03:43:14 +0000124 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
Tim Peters2344fae2001-01-15 00:50:52 +0000125 raise ValueError, \
126 'Template.append: missing $IN in cmd'
Tim Petersc09cee42001-04-25 03:43:14 +0000127 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
Tim Peters2344fae2001-01-15 00:50:52 +0000128 raise ValueError, \
129 'Template.append: missing $OUT in cmd'
130 self.steps.append((cmd, kind))
Guido van Rossum2db91351992-10-18 17:09:59 +0000131
Tim Peters2344fae2001-01-15 00:50:52 +0000132 def prepend(self, cmd, kind):
133 """t.prepend(cmd, kind) adds a new step at the front."""
134 if type(cmd) is not type(''):
135 raise TypeError, \
136 'Template.prepend: cmd must be a string'
137 if kind not in stepkinds:
138 raise ValueError, \
Walter Dörwald70a6b492004-02-12 17:35:32 +0000139 'Template.prepend: bad kind %r' % (kind,)
Tim Peters2344fae2001-01-15 00:50:52 +0000140 if kind == SINK:
141 raise ValueError, \
142 'Template.prepend: SINK can only be appended'
143 if self.steps and self.steps[0][1] == SOURCE:
144 raise ValueError, \
145 'Template.prepend: already begins with SOURCE'
Tim Petersc09cee42001-04-25 03:43:14 +0000146 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
Tim Peters2344fae2001-01-15 00:50:52 +0000147 raise ValueError, \
148 'Template.prepend: missing $IN in cmd'
Tim Petersc09cee42001-04-25 03:43:14 +0000149 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
Tim Peters2344fae2001-01-15 00:50:52 +0000150 raise ValueError, \
151 'Template.prepend: missing $OUT in cmd'
152 self.steps.insert(0, (cmd, kind))
Guido van Rossum2db91351992-10-18 17:09:59 +0000153
Tim Peters2344fae2001-01-15 00:50:52 +0000154 def open(self, file, rw):
155 """t.open(file, rw) returns a pipe or file object open for
156 reading or writing; the file is the other end of the pipeline."""
157 if rw == 'r':
158 return self.open_r(file)
159 if rw == 'w':
160 return self.open_w(file)
161 raise ValueError, \
Walter Dörwald70a6b492004-02-12 17:35:32 +0000162 'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,)
Guido van Rossum2db91351992-10-18 17:09:59 +0000163
Tim Peters2344fae2001-01-15 00:50:52 +0000164 def open_r(self, file):
165 """t.open_r(file) and t.open_w(file) implement
166 t.open(file, 'r') and t.open(file, 'w') respectively."""
167 if not self.steps:
168 return open(file, 'r')
169 if self.steps[-1][1] == SINK:
170 raise ValueError, \
171 'Template.open_r: pipeline ends width SINK'
172 cmd = self.makepipeline(file, '')
173 return os.popen(cmd, 'r')
Guido van Rossum2db91351992-10-18 17:09:59 +0000174
Tim Peters2344fae2001-01-15 00:50:52 +0000175 def open_w(self, file):
176 if not self.steps:
177 return open(file, 'w')
178 if self.steps[0][1] == SOURCE:
179 raise ValueError, \
180 'Template.open_w: pipeline begins with SOURCE'
181 cmd = self.makepipeline('', file)
182 return os.popen(cmd, 'w')
Guido van Rossum2db91351992-10-18 17:09:59 +0000183
Tim Peters2344fae2001-01-15 00:50:52 +0000184 def copy(self, infile, outfile):
185 return os.system(self.makepipeline(infile, outfile))
Guido van Rossum2db91351992-10-18 17:09:59 +0000186
Tim Peters2344fae2001-01-15 00:50:52 +0000187 def makepipeline(self, infile, outfile):
188 cmd = makepipeline(infile, self.steps, outfile)
189 if self.debugging:
190 print cmd
191 cmd = 'set -x; ' + cmd
192 return cmd
Guido van Rossum2db91351992-10-18 17:09:59 +0000193
194
195def makepipeline(infile, steps, outfile):
Tim Peters2344fae2001-01-15 00:50:52 +0000196 # Build a list with for each command:
197 # [input filename or '', command string, kind, output filename or '']
198
199 list = []
200 for cmd, kind in steps:
201 list.append(['', cmd, kind, ''])
202 #
203 # Make sure there is at least one step
204 #
205 if not list:
206 list.append(['', 'cat', '--', ''])
207 #
208 # Take care of the input and output ends
209 #
210 [cmd, kind] = list[0][1:3]
211 if kind[0] == 'f' and not infile:
212 list.insert(0, ['', 'cat', '--', ''])
213 list[0][0] = infile
214 #
215 [cmd, kind] = list[-1][1:3]
216 if kind[1] == 'f' and not outfile:
217 list.append(['', 'cat', '--', ''])
218 list[-1][-1] = outfile
219 #
220 # Invent temporary files to connect stages that need files
221 #
222 garbage = []
223 for i in range(1, len(list)):
224 lkind = list[i-1][2]
225 rkind = list[i][2]
226 if lkind[1] == 'f' or rkind[0] == 'f':
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000227 (fd, temp) = tempfile.mkstemp()
228 os.close(fd)
Tim Peters2344fae2001-01-15 00:50:52 +0000229 garbage.append(temp)
230 list[i-1][-1] = list[i][0] = temp
231 #
232 for item in list:
233 [inf, cmd, kind, outf] = item
234 if kind[1] == 'f':
235 cmd = 'OUT=' + quote(outf) + '; ' + cmd
236 if kind[0] == 'f':
237 cmd = 'IN=' + quote(inf) + '; ' + cmd
238 if kind[0] == '-' and inf:
239 cmd = cmd + ' <' + quote(inf)
240 if kind[1] == '-' and outf:
241 cmd = cmd + ' >' + quote(outf)
242 item[1] = cmd
243 #
244 cmdlist = list[0][1]
245 for item in list[1:]:
246 [cmd, kind] = item[1:3]
247 if item[0] == '':
248 if 'f' in kind:
249 cmd = '{ ' + cmd + '; }'
250 cmdlist = cmdlist + ' |\n' + cmd
251 else:
252 cmdlist = cmdlist + '\n' + cmd
253 #
254 if garbage:
255 rmcmd = 'rm -f'
256 for file in garbage:
257 rmcmd = rmcmd + ' ' + quote(file)
258 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
259 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
260 #
261 return cmdlist
Guido van Rossum2db91351992-10-18 17:09:59 +0000262
263
264# Reliably quote a string as a single argument for /bin/sh
265
Fred Drake0f715d22001-07-20 18:53:34 +0000266_safechars = string.ascii_letters + string.digits + '!@%_-+=:,./' # Safe unquoted
Tim Peters2344fae2001-01-15 00:50:52 +0000267_funnychars = '"`$\\' # Unsafe inside "double quotes"
Guido van Rossum2db91351992-10-18 17:09:59 +0000268
269def quote(file):
Tim Peters2344fae2001-01-15 00:50:52 +0000270 for c in file:
271 if c not in _safechars:
272 break
273 else:
274 return file
275 if '\'' not in file:
276 return '\'' + file + '\''
277 res = ''
278 for c in file:
279 if c in _funnychars:
280 c = '\\' + c
281 res = res + c
282 return '"' + res + '"'