blob: 42970538168364afd8354e17dcbafcf4a74ac9fe [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Conversion pipeline templates.
2
3The problem:
4------------
5
Fred Drake0f715d22001-07-20 18:53:34 +00006Suppose you have some data that you want to convert to another format,
7such as from GIF image format to PPM image format. Maybe the
Guido van Rossum54f22ed2000-02-04 15:10:34 +00008conversion involves several steps (e.g. piping it through compress or
9uuencode). Some of the conversion steps may require that their input
10is a disk file, others may be able to read standard input; similar for
11their output. The input to the entire conversion may also be read
12from a disk file or from an open file, and similar for its output.
13
14The module lets you construct a pipeline template by sticking one or
15more conversion steps together. It will take care of creating and
16removing temporary files if they are necessary to hold intermediate
17data. You can then use the template to do conversions from many
18different sources to many different destinations. The temporary
19file names used are different each time the template is used.
20
21The templates are objects so you can create templates for many
22different conversion steps and store them in a dictionary, for
23instance.
Guido van Rossum2db91351992-10-18 17:09:59 +000024
25
Guido van Rossum54f22ed2000-02-04 15:10:34 +000026Directions:
27-----------
Guido van Rossum2db91351992-10-18 17:09:59 +000028
Guido van Rossum54f22ed2000-02-04 15:10:34 +000029To create a template:
30 t = Template()
Guido van Rossum2db91351992-10-18 17:09:59 +000031
Guido van Rossum54f22ed2000-02-04 15:10:34 +000032To add a conversion step to a template:
33 t.append(command, kind)
34where kind is a string of two characters: the first is '-' if the
35command reads its standard input or 'f' if it requires a file; the
36second likewise for the output. The command must be valid /bin/sh
37syntax. If input or output files are required, they are passed as
38$IN and $OUT; otherwise, it must be possible to use the command in
39a pipeline.
40
41To add a conversion step at the beginning:
42 t.prepend(command, kind)
43
44To convert a file to another file using a template:
45 sts = t.copy(infile, outfile)
46If infile or outfile are the empty string, standard input is read or
47standard output is written, respectively. The return value is the
48exit status of the conversion pipeline.
49
50To open a file for reading or writing through a conversion pipeline:
51 fp = t.open(file, mode)
52where mode is 'r' to read the file, or 'w' to write it -- just like
53for the built-in function open() or for os.popen().
54
55To create a new template object initialized to a given one:
56 t2 = t.clone()
Skip Montanaro352674d2001-02-07 23:14:30 +000057""" # '
Guido van Rossum2db91351992-10-18 17:09:59 +000058
59
Guido van Rossum9694fca1997-10-22 21:00:49 +000060import re
Guido van Rossum2db91351992-10-18 17:09:59 +000061import os
62import tempfile
63import string
64
Skip Montanaro352674d2001-02-07 23:14:30 +000065__all__ = ["Template"]
Guido van Rossum2db91351992-10-18 17:09:59 +000066
67# Conversion step kinds
68
Tim Peters2344fae2001-01-15 00:50:52 +000069FILEIN_FILEOUT = 'ff' # Must read & write real files
70STDIN_FILEOUT = '-f' # Must write a real file
71FILEIN_STDOUT = 'f-' # Must read a real file
72STDIN_STDOUT = '--' # Normal pipeline element
73SOURCE = '.-' # Must be first, writes stdout
74SINK = '-.' # Must be last, reads stdin
Guido van Rossum2db91351992-10-18 17:09:59 +000075
76stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
Tim Peters2344fae2001-01-15 00:50:52 +000077 SOURCE, SINK]
Guido van Rossum2db91351992-10-18 17:09:59 +000078
79
Guido van Rossum2db91351992-10-18 17:09:59 +000080class Template:
Tim Peters2344fae2001-01-15 00:50:52 +000081 """Class representing a pipeline template."""
Guido van Rossum2db91351992-10-18 17:09:59 +000082
Tim Peters2344fae2001-01-15 00:50:52 +000083 def __init__(self):
84 """Template() returns a fresh pipeline template."""
85 self.debugging = 0
86 self.reset()
Guido van Rossum2db91351992-10-18 17:09:59 +000087
Tim Peters2344fae2001-01-15 00:50:52 +000088 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +000089 """t.__repr__() implements repr(t)."""
90 return '<Template instance, steps=%r>' % (self.steps,)
Guido van Rossum2db91351992-10-18 17:09:59 +000091
Tim Peters2344fae2001-01-15 00:50:52 +000092 def reset(self):
93 """t.reset() restores a pipeline template to its initial state."""
94 self.steps = []
Guido van Rossum2db91351992-10-18 17:09:59 +000095
Tim Peters2344fae2001-01-15 00:50:52 +000096 def clone(self):
97 """t.clone() returns a new pipeline template with identical
98 initial state as the current one."""
99 t = Template()
100 t.steps = self.steps[:]
101 t.debugging = self.debugging
102 return t
Guido van Rossum2db91351992-10-18 17:09:59 +0000103
Tim Peters2344fae2001-01-15 00:50:52 +0000104 def debug(self, flag):
105 """t.debug(flag) turns debugging on or off."""
106 self.debugging = flag
Guido van Rossum2db91351992-10-18 17:09:59 +0000107
Tim Peters2344fae2001-01-15 00:50:52 +0000108 def append(self, cmd, kind):
109 """t.append(cmd, kind) adds a new step at the end."""
110 if type(cmd) is not type(''):
Collin Winterce36ad82007-08-30 01:19:48 +0000111 raise TypeError('Template.append: cmd must be a string')
Tim Peters2344fae2001-01-15 00:50:52 +0000112 if kind not in stepkinds:
Collin Winterce36ad82007-08-30 01:19:48 +0000113 raise ValueError('Template.append: bad kind %r' % (kind,))
Tim Peters2344fae2001-01-15 00:50:52 +0000114 if kind == SOURCE:
Collin Winterce36ad82007-08-30 01:19:48 +0000115 raise ValueError('Template.append: SOURCE can only be prepended')
Tim Peters2344fae2001-01-15 00:50:52 +0000116 if self.steps and self.steps[-1][1] == SINK:
Collin Winterce36ad82007-08-30 01:19:48 +0000117 raise ValueError('Template.append: already ends with SINK')
Tim Petersc09cee42001-04-25 03:43:14 +0000118 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
Collin Winterce36ad82007-08-30 01:19:48 +0000119 raise ValueError('Template.append: missing $IN in cmd')
Tim Petersc09cee42001-04-25 03:43:14 +0000120 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
Collin Winterce36ad82007-08-30 01:19:48 +0000121 raise ValueError('Template.append: missing $OUT in cmd')
Tim Peters2344fae2001-01-15 00:50:52 +0000122 self.steps.append((cmd, kind))
Guido van Rossum2db91351992-10-18 17:09:59 +0000123
Tim Peters2344fae2001-01-15 00:50:52 +0000124 def prepend(self, cmd, kind):
125 """t.prepend(cmd, kind) adds a new step at the front."""
126 if type(cmd) is not type(''):
Collin Winterce36ad82007-08-30 01:19:48 +0000127 raise TypeError('Template.prepend: cmd must be a string')
Tim Peters2344fae2001-01-15 00:50:52 +0000128 if kind not in stepkinds:
Collin Winterce36ad82007-08-30 01:19:48 +0000129 raise ValueError('Template.prepend: bad kind %r' % (kind,))
Tim Peters2344fae2001-01-15 00:50:52 +0000130 if kind == SINK:
Collin Winterce36ad82007-08-30 01:19:48 +0000131 raise ValueError('Template.prepend: SINK can only be appended')
Tim Peters2344fae2001-01-15 00:50:52 +0000132 if self.steps and self.steps[0][1] == SOURCE:
Collin Winterce36ad82007-08-30 01:19:48 +0000133 raise ValueError('Template.prepend: already begins with SOURCE')
Tim Petersc09cee42001-04-25 03:43:14 +0000134 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
Collin Winterce36ad82007-08-30 01:19:48 +0000135 raise ValueError('Template.prepend: missing $IN in cmd')
Tim Petersc09cee42001-04-25 03:43:14 +0000136 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
Collin Winterce36ad82007-08-30 01:19:48 +0000137 raise ValueError('Template.prepend: missing $OUT in cmd')
Tim Peters2344fae2001-01-15 00:50:52 +0000138 self.steps.insert(0, (cmd, kind))
Guido van Rossum2db91351992-10-18 17:09:59 +0000139
Tim Peters2344fae2001-01-15 00:50:52 +0000140 def open(self, file, rw):
141 """t.open(file, rw) returns a pipe or file object open for
142 reading or writing; the file is the other end of the pipeline."""
143 if rw == 'r':
144 return self.open_r(file)
145 if rw == 'w':
146 return self.open_w(file)
Collin Winterce36ad82007-08-30 01:19:48 +0000147 raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r'
148 % (rw,))
Guido van Rossum2db91351992-10-18 17:09:59 +0000149
Tim Peters2344fae2001-01-15 00:50:52 +0000150 def open_r(self, file):
151 """t.open_r(file) and t.open_w(file) implement
152 t.open(file, 'r') and t.open(file, 'w') respectively."""
153 if not self.steps:
154 return open(file, 'r')
155 if self.steps[-1][1] == SINK:
Collin Winterce36ad82007-08-30 01:19:48 +0000156 raise ValueError('Template.open_r: pipeline ends width SINK')
Tim Peters2344fae2001-01-15 00:50:52 +0000157 cmd = self.makepipeline(file, '')
158 return os.popen(cmd, 'r')
Guido van Rossum2db91351992-10-18 17:09:59 +0000159
Tim Peters2344fae2001-01-15 00:50:52 +0000160 def open_w(self, file):
161 if not self.steps:
162 return open(file, 'w')
163 if self.steps[0][1] == SOURCE:
Collin Winterce36ad82007-08-30 01:19:48 +0000164 raise ValueError('Template.open_w: pipeline begins with SOURCE')
Tim Peters2344fae2001-01-15 00:50:52 +0000165 cmd = self.makepipeline('', file)
166 return os.popen(cmd, 'w')
Guido van Rossum2db91351992-10-18 17:09:59 +0000167
Tim Peters2344fae2001-01-15 00:50:52 +0000168 def copy(self, infile, outfile):
169 return os.system(self.makepipeline(infile, outfile))
Guido van Rossum2db91351992-10-18 17:09:59 +0000170
Tim Peters2344fae2001-01-15 00:50:52 +0000171 def makepipeline(self, infile, outfile):
172 cmd = makepipeline(infile, self.steps, outfile)
173 if self.debugging:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000174 print(cmd)
Tim Peters2344fae2001-01-15 00:50:52 +0000175 cmd = 'set -x; ' + cmd
176 return cmd
Guido van Rossum2db91351992-10-18 17:09:59 +0000177
178
179def makepipeline(infile, steps, outfile):
Tim Peters2344fae2001-01-15 00:50:52 +0000180 # Build a list with for each command:
181 # [input filename or '', command string, kind, output filename or '']
182
183 list = []
184 for cmd, kind in steps:
185 list.append(['', cmd, kind, ''])
186 #
187 # Make sure there is at least one step
188 #
189 if not list:
190 list.append(['', 'cat', '--', ''])
191 #
192 # Take care of the input and output ends
193 #
194 [cmd, kind] = list[0][1:3]
195 if kind[0] == 'f' and not infile:
196 list.insert(0, ['', 'cat', '--', ''])
197 list[0][0] = infile
198 #
199 [cmd, kind] = list[-1][1:3]
200 if kind[1] == 'f' and not outfile:
201 list.append(['', 'cat', '--', ''])
202 list[-1][-1] = outfile
203 #
204 # Invent temporary files to connect stages that need files
205 #
206 garbage = []
207 for i in range(1, len(list)):
208 lkind = list[i-1][2]
209 rkind = list[i][2]
210 if lkind[1] == 'f' or rkind[0] == 'f':
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000211 (fd, temp) = tempfile.mkstemp()
212 os.close(fd)
Tim Peters2344fae2001-01-15 00:50:52 +0000213 garbage.append(temp)
214 list[i-1][-1] = list[i][0] = temp
215 #
216 for item in list:
217 [inf, cmd, kind, outf] = item
218 if kind[1] == 'f':
219 cmd = 'OUT=' + quote(outf) + '; ' + cmd
220 if kind[0] == 'f':
221 cmd = 'IN=' + quote(inf) + '; ' + cmd
222 if kind[0] == '-' and inf:
223 cmd = cmd + ' <' + quote(inf)
224 if kind[1] == '-' and outf:
225 cmd = cmd + ' >' + quote(outf)
226 item[1] = cmd
227 #
228 cmdlist = list[0][1]
229 for item in list[1:]:
230 [cmd, kind] = item[1:3]
231 if item[0] == '':
232 if 'f' in kind:
233 cmd = '{ ' + cmd + '; }'
234 cmdlist = cmdlist + ' |\n' + cmd
235 else:
236 cmdlist = cmdlist + '\n' + cmd
237 #
238 if garbage:
239 rmcmd = 'rm -f'
240 for file in garbage:
241 rmcmd = rmcmd + ' ' + quote(file)
242 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
243 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
244 #
245 return cmdlist
Guido van Rossum2db91351992-10-18 17:09:59 +0000246
247
248# Reliably quote a string as a single argument for /bin/sh
249
Georg Brandl8569e582010-05-19 20:57:08 +0000250# Safe unquoted
251_safechars = frozenset(string.ascii_letters + string.digits + '@%_-+=:,./')
Guido van Rossum2db91351992-10-18 17:09:59 +0000252
253def quote(file):
Georg Brandl8569e582010-05-19 20:57:08 +0000254 """Return a shell-escaped version of the file string."""
Tim Peters2344fae2001-01-15 00:50:52 +0000255 for c in file:
256 if c not in _safechars:
257 break
258 else:
Benjamin Peterson21896a32010-03-21 22:03:03 +0000259 if not file:
260 return "''"
Tim Peters2344fae2001-01-15 00:50:52 +0000261 return file
Georg Brandl8569e582010-05-19 20:57:08 +0000262 # use single quotes, and put single quotes into double quotes
263 # the string $'b is then quoted as '$'"'"'b'
264 return "'" + file.replace("'", "'\"'\"'") + "'"