blob: dc18404f66d2eb8a2001dd5e8e2a20f731412070 [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Conversion pipeline templates.
2
3The problem:
4------------
5
Fred Drake0f715d22001-07-20 18:53:34 +00006Suppose you have some data that you want to convert to another format,
7such as from GIF image format to PPM image format. Maybe the
Guido van Rossum54f22ed2000-02-04 15:10:34 +00008conversion involves several steps (e.g. piping it through compress or
9uuencode). Some of the conversion steps may require that their input
10is a disk file, others may be able to read standard input; similar for
11their output. The input to the entire conversion may also be read
12from a disk file or from an open file, and similar for its output.
13
14The module lets you construct a pipeline template by sticking one or
15more conversion steps together. It will take care of creating and
16removing temporary files if they are necessary to hold intermediate
17data. You can then use the template to do conversions from many
18different sources to many different destinations. The temporary
19file names used are different each time the template is used.
20
21The templates are objects so you can create templates for many
22different conversion steps and store them in a dictionary, for
23instance.
Guido van Rossum2db91351992-10-18 17:09:59 +000024
25
Guido van Rossum54f22ed2000-02-04 15:10:34 +000026Directions:
27-----------
Guido van Rossum2db91351992-10-18 17:09:59 +000028
Guido van Rossum54f22ed2000-02-04 15:10:34 +000029To create a template:
30 t = Template()
Guido van Rossum2db91351992-10-18 17:09:59 +000031
Guido van Rossum54f22ed2000-02-04 15:10:34 +000032To add a conversion step to a template:
33 t.append(command, kind)
34where kind is a string of two characters: the first is '-' if the
35command reads its standard input or 'f' if it requires a file; the
36second likewise for the output. The command must be valid /bin/sh
37syntax. If input or output files are required, they are passed as
38$IN and $OUT; otherwise, it must be possible to use the command in
39a pipeline.
40
41To add a conversion step at the beginning:
42 t.prepend(command, kind)
43
44To convert a file to another file using a template:
45 sts = t.copy(infile, outfile)
46If infile or outfile are the empty string, standard input is read or
47standard output is written, respectively. The return value is the
48exit status of the conversion pipeline.
49
50To open a file for reading or writing through a conversion pipeline:
51 fp = t.open(file, mode)
52where mode is 'r' to read the file, or 'w' to write it -- just like
53for the built-in function open() or for os.popen().
54
55To create a new template object initialized to a given one:
56 t2 = t.clone()
57
58For an example, see the function test() at the end of the file.
Skip Montanaro352674d2001-02-07 23:14:30 +000059""" # '
Guido van Rossum2db91351992-10-18 17:09:59 +000060
61
Guido van Rossum9694fca1997-10-22 21:00:49 +000062import re
Guido van Rossum2db91351992-10-18 17:09:59 +000063
64import os
65import tempfile
66import string
67
Skip Montanaro352674d2001-02-07 23:14:30 +000068__all__ = ["Template"]
Guido van Rossum2db91351992-10-18 17:09:59 +000069
70# Conversion step kinds
71
Tim Peters2344fae2001-01-15 00:50:52 +000072FILEIN_FILEOUT = 'ff' # Must read & write real files
73STDIN_FILEOUT = '-f' # Must write a real file
74FILEIN_STDOUT = 'f-' # Must read a real file
75STDIN_STDOUT = '--' # Normal pipeline element
76SOURCE = '.-' # Must be first, writes stdout
77SINK = '-.' # Must be last, reads stdin
Guido van Rossum2db91351992-10-18 17:09:59 +000078
79stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
Tim Peters2344fae2001-01-15 00:50:52 +000080 SOURCE, SINK]
Guido van Rossum2db91351992-10-18 17:09:59 +000081
82
Guido van Rossum2db91351992-10-18 17:09:59 +000083class Template:
Tim Peters2344fae2001-01-15 00:50:52 +000084 """Class representing a pipeline template."""
Guido van Rossum2db91351992-10-18 17:09:59 +000085
Tim Peters2344fae2001-01-15 00:50:52 +000086 def __init__(self):
87 """Template() returns a fresh pipeline template."""
88 self.debugging = 0
89 self.reset()
Guido van Rossum2db91351992-10-18 17:09:59 +000090
Tim Peters2344fae2001-01-15 00:50:52 +000091 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +000092 """t.__repr__() implements repr(t)."""
93 return '<Template instance, steps=%r>' % (self.steps,)
Guido van Rossum2db91351992-10-18 17:09:59 +000094
Tim Peters2344fae2001-01-15 00:50:52 +000095 def reset(self):
96 """t.reset() restores a pipeline template to its initial state."""
97 self.steps = []
Guido van Rossum2db91351992-10-18 17:09:59 +000098
Tim Peters2344fae2001-01-15 00:50:52 +000099 def clone(self):
100 """t.clone() returns a new pipeline template with identical
101 initial state as the current one."""
102 t = Template()
103 t.steps = self.steps[:]
104 t.debugging = self.debugging
105 return t
Guido van Rossum2db91351992-10-18 17:09:59 +0000106
Tim Peters2344fae2001-01-15 00:50:52 +0000107 def debug(self, flag):
108 """t.debug(flag) turns debugging on or off."""
109 self.debugging = flag
Guido van Rossum2db91351992-10-18 17:09:59 +0000110
Tim Peters2344fae2001-01-15 00:50:52 +0000111 def append(self, cmd, kind):
112 """t.append(cmd, kind) adds a new step at the end."""
113 if type(cmd) is not type(''):
Collin Winterce36ad82007-08-30 01:19:48 +0000114 raise TypeError('Template.append: cmd must be a string')
Tim Peters2344fae2001-01-15 00:50:52 +0000115 if kind not in stepkinds:
Collin Winterce36ad82007-08-30 01:19:48 +0000116 raise ValueError('Template.append: bad kind %r' % (kind,))
Tim Peters2344fae2001-01-15 00:50:52 +0000117 if kind == SOURCE:
Collin Winterce36ad82007-08-30 01:19:48 +0000118 raise ValueError('Template.append: SOURCE can only be prepended')
Tim Peters2344fae2001-01-15 00:50:52 +0000119 if self.steps and self.steps[-1][1] == SINK:
Collin Winterce36ad82007-08-30 01:19:48 +0000120 raise ValueError('Template.append: already ends with SINK')
Tim Petersc09cee42001-04-25 03:43:14 +0000121 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
Collin Winterce36ad82007-08-30 01:19:48 +0000122 raise ValueError('Template.append: missing $IN in cmd')
Tim Petersc09cee42001-04-25 03:43:14 +0000123 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
Collin Winterce36ad82007-08-30 01:19:48 +0000124 raise ValueError('Template.append: missing $OUT in cmd')
Tim Peters2344fae2001-01-15 00:50:52 +0000125 self.steps.append((cmd, kind))
Guido van Rossum2db91351992-10-18 17:09:59 +0000126
Tim Peters2344fae2001-01-15 00:50:52 +0000127 def prepend(self, cmd, kind):
128 """t.prepend(cmd, kind) adds a new step at the front."""
129 if type(cmd) is not type(''):
Collin Winterce36ad82007-08-30 01:19:48 +0000130 raise TypeError('Template.prepend: cmd must be a string')
Tim Peters2344fae2001-01-15 00:50:52 +0000131 if kind not in stepkinds:
Collin Winterce36ad82007-08-30 01:19:48 +0000132 raise ValueError('Template.prepend: bad kind %r' % (kind,))
Tim Peters2344fae2001-01-15 00:50:52 +0000133 if kind == SINK:
Collin Winterce36ad82007-08-30 01:19:48 +0000134 raise ValueError('Template.prepend: SINK can only be appended')
Tim Peters2344fae2001-01-15 00:50:52 +0000135 if self.steps and self.steps[0][1] == SOURCE:
Collin Winterce36ad82007-08-30 01:19:48 +0000136 raise ValueError('Template.prepend: already begins with SOURCE')
Tim Petersc09cee42001-04-25 03:43:14 +0000137 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
Collin Winterce36ad82007-08-30 01:19:48 +0000138 raise ValueError('Template.prepend: missing $IN in cmd')
Tim Petersc09cee42001-04-25 03:43:14 +0000139 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
Collin Winterce36ad82007-08-30 01:19:48 +0000140 raise ValueError('Template.prepend: missing $OUT in cmd')
Tim Peters2344fae2001-01-15 00:50:52 +0000141 self.steps.insert(0, (cmd, kind))
Guido van Rossum2db91351992-10-18 17:09:59 +0000142
Tim Peters2344fae2001-01-15 00:50:52 +0000143 def open(self, file, rw):
144 """t.open(file, rw) returns a pipe or file object open for
145 reading or writing; the file is the other end of the pipeline."""
146 if rw == 'r':
147 return self.open_r(file)
148 if rw == 'w':
149 return self.open_w(file)
Collin Winterce36ad82007-08-30 01:19:48 +0000150 raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r'
151 % (rw,))
Guido van Rossum2db91351992-10-18 17:09:59 +0000152
Tim Peters2344fae2001-01-15 00:50:52 +0000153 def open_r(self, file):
154 """t.open_r(file) and t.open_w(file) implement
155 t.open(file, 'r') and t.open(file, 'w') respectively."""
156 if not self.steps:
157 return open(file, 'r')
158 if self.steps[-1][1] == SINK:
Collin Winterce36ad82007-08-30 01:19:48 +0000159 raise ValueError('Template.open_r: pipeline ends width SINK')
Tim Peters2344fae2001-01-15 00:50:52 +0000160 cmd = self.makepipeline(file, '')
161 return os.popen(cmd, 'r')
Guido van Rossum2db91351992-10-18 17:09:59 +0000162
Tim Peters2344fae2001-01-15 00:50:52 +0000163 def open_w(self, file):
164 if not self.steps:
165 return open(file, 'w')
166 if self.steps[0][1] == SOURCE:
Collin Winterce36ad82007-08-30 01:19:48 +0000167 raise ValueError('Template.open_w: pipeline begins with SOURCE')
Tim Peters2344fae2001-01-15 00:50:52 +0000168 cmd = self.makepipeline('', file)
169 return os.popen(cmd, 'w')
Guido van Rossum2db91351992-10-18 17:09:59 +0000170
Tim Peters2344fae2001-01-15 00:50:52 +0000171 def copy(self, infile, outfile):
172 return os.system(self.makepipeline(infile, outfile))
Guido van Rossum2db91351992-10-18 17:09:59 +0000173
Tim Peters2344fae2001-01-15 00:50:52 +0000174 def makepipeline(self, infile, outfile):
175 cmd = makepipeline(infile, self.steps, outfile)
176 if self.debugging:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000177 print(cmd)
Tim Peters2344fae2001-01-15 00:50:52 +0000178 cmd = 'set -x; ' + cmd
179 return cmd
Guido van Rossum2db91351992-10-18 17:09:59 +0000180
181
182def makepipeline(infile, steps, outfile):
Tim Peters2344fae2001-01-15 00:50:52 +0000183 # Build a list with for each command:
184 # [input filename or '', command string, kind, output filename or '']
185
186 list = []
187 for cmd, kind in steps:
188 list.append(['', cmd, kind, ''])
189 #
190 # Make sure there is at least one step
191 #
192 if not list:
193 list.append(['', 'cat', '--', ''])
194 #
195 # Take care of the input and output ends
196 #
197 [cmd, kind] = list[0][1:3]
198 if kind[0] == 'f' and not infile:
199 list.insert(0, ['', 'cat', '--', ''])
200 list[0][0] = infile
201 #
202 [cmd, kind] = list[-1][1:3]
203 if kind[1] == 'f' and not outfile:
204 list.append(['', 'cat', '--', ''])
205 list[-1][-1] = outfile
206 #
207 # Invent temporary files to connect stages that need files
208 #
209 garbage = []
210 for i in range(1, len(list)):
211 lkind = list[i-1][2]
212 rkind = list[i][2]
213 if lkind[1] == 'f' or rkind[0] == 'f':
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000214 (fd, temp) = tempfile.mkstemp()
215 os.close(fd)
Tim Peters2344fae2001-01-15 00:50:52 +0000216 garbage.append(temp)
217 list[i-1][-1] = list[i][0] = temp
218 #
219 for item in list:
220 [inf, cmd, kind, outf] = item
221 if kind[1] == 'f':
222 cmd = 'OUT=' + quote(outf) + '; ' + cmd
223 if kind[0] == 'f':
224 cmd = 'IN=' + quote(inf) + '; ' + cmd
225 if kind[0] == '-' and inf:
226 cmd = cmd + ' <' + quote(inf)
227 if kind[1] == '-' and outf:
228 cmd = cmd + ' >' + quote(outf)
229 item[1] = cmd
230 #
231 cmdlist = list[0][1]
232 for item in list[1:]:
233 [cmd, kind] = item[1:3]
234 if item[0] == '':
235 if 'f' in kind:
236 cmd = '{ ' + cmd + '; }'
237 cmdlist = cmdlist + ' |\n' + cmd
238 else:
239 cmdlist = cmdlist + '\n' + cmd
240 #
241 if garbage:
242 rmcmd = 'rm -f'
243 for file in garbage:
244 rmcmd = rmcmd + ' ' + quote(file)
245 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
246 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
247 #
248 return cmdlist
Guido van Rossum2db91351992-10-18 17:09:59 +0000249
250
251# Reliably quote a string as a single argument for /bin/sh
252
Fred Drake0f715d22001-07-20 18:53:34 +0000253_safechars = string.ascii_letters + string.digits + '!@%_-+=:,./' # Safe unquoted
Tim Peters2344fae2001-01-15 00:50:52 +0000254_funnychars = '"`$\\' # Unsafe inside "double quotes"
Guido van Rossum2db91351992-10-18 17:09:59 +0000255
256def quote(file):
Tim Peters2344fae2001-01-15 00:50:52 +0000257 for c in file:
258 if c not in _safechars:
259 break
260 else:
261 return file
262 if '\'' not in file:
263 return '\'' + file + '\''
264 res = ''
265 for c in file:
266 if c in _funnychars:
267 c = '\\' + c
268 res = res + c
269 return '"' + res + '"'
Guido van Rossum2db91351992-10-18 17:09:59 +0000270
271
272# Small test program and example
273
274def test():
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000275 print('Testing...')
Tim Peters2344fae2001-01-15 00:50:52 +0000276 t = Template()
277 t.append('togif $IN $OUT', 'ff')
278 t.append('giftoppm', '--')
279 t.append('ppmtogif >$OUT', '-f')
280 t.append('fromgif $IN $OUT', 'ff')
281 t.debug(1)
282 FILE = '/usr/local/images/rgb/rogues/guido.rgb'
283 t.copy(FILE, '@temp')
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000284 print('Done.')