blob: 693309fff0103029ba05a45457f944b6cf39420b [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Conversion pipeline templates.
2
3The problem:
4------------
5
Fred Drake0f715d22001-07-20 18:53:34 +00006Suppose you have some data that you want to convert to another format,
7such as from GIF image format to PPM image format. Maybe the
Guido van Rossum54f22ed2000-02-04 15:10:34 +00008conversion involves several steps (e.g. piping it through compress or
9uuencode). Some of the conversion steps may require that their input
10is a disk file, others may be able to read standard input; similar for
11their output. The input to the entire conversion may also be read
12from a disk file or from an open file, and similar for its output.
13
14The module lets you construct a pipeline template by sticking one or
15more conversion steps together. It will take care of creating and
16removing temporary files if they are necessary to hold intermediate
17data. You can then use the template to do conversions from many
18different sources to many different destinations. The temporary
19file names used are different each time the template is used.
20
21The templates are objects so you can create templates for many
22different conversion steps and store them in a dictionary, for
23instance.
Guido van Rossum2db91351992-10-18 17:09:59 +000024
25
Guido van Rossum54f22ed2000-02-04 15:10:34 +000026Directions:
27-----------
Guido van Rossum2db91351992-10-18 17:09:59 +000028
Guido van Rossum54f22ed2000-02-04 15:10:34 +000029To create a template:
30 t = Template()
Guido van Rossum2db91351992-10-18 17:09:59 +000031
Guido van Rossum54f22ed2000-02-04 15:10:34 +000032To add a conversion step to a template:
33 t.append(command, kind)
34where kind is a string of two characters: the first is '-' if the
35command reads its standard input or 'f' if it requires a file; the
36second likewise for the output. The command must be valid /bin/sh
37syntax. If input or output files are required, they are passed as
38$IN and $OUT; otherwise, it must be possible to use the command in
39a pipeline.
40
41To add a conversion step at the beginning:
42 t.prepend(command, kind)
43
44To convert a file to another file using a template:
45 sts = t.copy(infile, outfile)
46If infile or outfile are the empty string, standard input is read or
47standard output is written, respectively. The return value is the
48exit status of the conversion pipeline.
49
50To open a file for reading or writing through a conversion pipeline:
51 fp = t.open(file, mode)
52where mode is 'r' to read the file, or 'w' to write it -- just like
53for the built-in function open() or for os.popen().
54
55To create a new template object initialized to a given one:
56 t2 = t.clone()
57
58For an example, see the function test() at the end of the file.
Skip Montanaro352674d2001-02-07 23:14:30 +000059""" # '
Guido van Rossum2db91351992-10-18 17:09:59 +000060
61
Guido van Rossum9694fca1997-10-22 21:00:49 +000062import re
Guido van Rossum2db91351992-10-18 17:09:59 +000063import os
64import tempfile
Éric Araujo9bce3112011-07-27 18:29:31 +020065# we import the quote function rather than the module for backward compat
66# (quote used to be an undocumented but used function in pipes)
67from shlex import quote
Guido van Rossum2db91351992-10-18 17:09:59 +000068
Skip Montanaro352674d2001-02-07 23:14:30 +000069__all__ = ["Template"]
Guido van Rossum2db91351992-10-18 17:09:59 +000070
71# Conversion step kinds
72
Tim Peters2344fae2001-01-15 00:50:52 +000073FILEIN_FILEOUT = 'ff' # Must read & write real files
74STDIN_FILEOUT = '-f' # Must write a real file
75FILEIN_STDOUT = 'f-' # Must read a real file
76STDIN_STDOUT = '--' # Normal pipeline element
77SOURCE = '.-' # Must be first, writes stdout
78SINK = '-.' # Must be last, reads stdin
Guido van Rossum2db91351992-10-18 17:09:59 +000079
80stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
Tim Peters2344fae2001-01-15 00:50:52 +000081 SOURCE, SINK]
Guido van Rossum2db91351992-10-18 17:09:59 +000082
83
Guido van Rossum2db91351992-10-18 17:09:59 +000084class Template:
Tim Peters2344fae2001-01-15 00:50:52 +000085 """Class representing a pipeline template."""
Guido van Rossum2db91351992-10-18 17:09:59 +000086
Tim Peters2344fae2001-01-15 00:50:52 +000087 def __init__(self):
88 """Template() returns a fresh pipeline template."""
89 self.debugging = 0
90 self.reset()
Guido van Rossum2db91351992-10-18 17:09:59 +000091
Tim Peters2344fae2001-01-15 00:50:52 +000092 def __repr__(self):
Walter Dörwald70a6b492004-02-12 17:35:32 +000093 """t.__repr__() implements repr(t)."""
94 return '<Template instance, steps=%r>' % (self.steps,)
Guido van Rossum2db91351992-10-18 17:09:59 +000095
Tim Peters2344fae2001-01-15 00:50:52 +000096 def reset(self):
97 """t.reset() restores a pipeline template to its initial state."""
98 self.steps = []
Guido van Rossum2db91351992-10-18 17:09:59 +000099
Tim Peters2344fae2001-01-15 00:50:52 +0000100 def clone(self):
101 """t.clone() returns a new pipeline template with identical
102 initial state as the current one."""
103 t = Template()
104 t.steps = self.steps[:]
105 t.debugging = self.debugging
106 return t
Guido van Rossum2db91351992-10-18 17:09:59 +0000107
Tim Peters2344fae2001-01-15 00:50:52 +0000108 def debug(self, flag):
109 """t.debug(flag) turns debugging on or off."""
110 self.debugging = flag
Guido van Rossum2db91351992-10-18 17:09:59 +0000111
Tim Peters2344fae2001-01-15 00:50:52 +0000112 def append(self, cmd, kind):
113 """t.append(cmd, kind) adds a new step at the end."""
114 if type(cmd) is not type(''):
Collin Winterce36ad82007-08-30 01:19:48 +0000115 raise TypeError('Template.append: cmd must be a string')
Tim Peters2344fae2001-01-15 00:50:52 +0000116 if kind not in stepkinds:
Collin Winterce36ad82007-08-30 01:19:48 +0000117 raise ValueError('Template.append: bad kind %r' % (kind,))
Tim Peters2344fae2001-01-15 00:50:52 +0000118 if kind == SOURCE:
Collin Winterce36ad82007-08-30 01:19:48 +0000119 raise ValueError('Template.append: SOURCE can only be prepended')
Tim Peters2344fae2001-01-15 00:50:52 +0000120 if self.steps and self.steps[-1][1] == SINK:
Collin Winterce36ad82007-08-30 01:19:48 +0000121 raise ValueError('Template.append: already ends with SINK')
Tim Petersc09cee42001-04-25 03:43:14 +0000122 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
Collin Winterce36ad82007-08-30 01:19:48 +0000123 raise ValueError('Template.append: missing $IN in cmd')
Tim Petersc09cee42001-04-25 03:43:14 +0000124 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
Collin Winterce36ad82007-08-30 01:19:48 +0000125 raise ValueError('Template.append: missing $OUT in cmd')
Tim Peters2344fae2001-01-15 00:50:52 +0000126 self.steps.append((cmd, kind))
Guido van Rossum2db91351992-10-18 17:09:59 +0000127
Tim Peters2344fae2001-01-15 00:50:52 +0000128 def prepend(self, cmd, kind):
129 """t.prepend(cmd, kind) adds a new step at the front."""
130 if type(cmd) is not type(''):
Collin Winterce36ad82007-08-30 01:19:48 +0000131 raise TypeError('Template.prepend: cmd must be a string')
Tim Peters2344fae2001-01-15 00:50:52 +0000132 if kind not in stepkinds:
Collin Winterce36ad82007-08-30 01:19:48 +0000133 raise ValueError('Template.prepend: bad kind %r' % (kind,))
Tim Peters2344fae2001-01-15 00:50:52 +0000134 if kind == SINK:
Collin Winterce36ad82007-08-30 01:19:48 +0000135 raise ValueError('Template.prepend: SINK can only be appended')
Tim Peters2344fae2001-01-15 00:50:52 +0000136 if self.steps and self.steps[0][1] == SOURCE:
Collin Winterce36ad82007-08-30 01:19:48 +0000137 raise ValueError('Template.prepend: already begins with SOURCE')
Tim Petersc09cee42001-04-25 03:43:14 +0000138 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
Collin Winterce36ad82007-08-30 01:19:48 +0000139 raise ValueError('Template.prepend: missing $IN in cmd')
Tim Petersc09cee42001-04-25 03:43:14 +0000140 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
Collin Winterce36ad82007-08-30 01:19:48 +0000141 raise ValueError('Template.prepend: missing $OUT in cmd')
Tim Peters2344fae2001-01-15 00:50:52 +0000142 self.steps.insert(0, (cmd, kind))
Guido van Rossum2db91351992-10-18 17:09:59 +0000143
Tim Peters2344fae2001-01-15 00:50:52 +0000144 def open(self, file, rw):
145 """t.open(file, rw) returns a pipe or file object open for
146 reading or writing; the file is the other end of the pipeline."""
147 if rw == 'r':
148 return self.open_r(file)
149 if rw == 'w':
150 return self.open_w(file)
Collin Winterce36ad82007-08-30 01:19:48 +0000151 raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r'
152 % (rw,))
Guido van Rossum2db91351992-10-18 17:09:59 +0000153
Tim Peters2344fae2001-01-15 00:50:52 +0000154 def open_r(self, file):
155 """t.open_r(file) and t.open_w(file) implement
156 t.open(file, 'r') and t.open(file, 'w') respectively."""
157 if not self.steps:
158 return open(file, 'r')
159 if self.steps[-1][1] == SINK:
Collin Winterce36ad82007-08-30 01:19:48 +0000160 raise ValueError('Template.open_r: pipeline ends width SINK')
Tim Peters2344fae2001-01-15 00:50:52 +0000161 cmd = self.makepipeline(file, '')
162 return os.popen(cmd, 'r')
Guido van Rossum2db91351992-10-18 17:09:59 +0000163
Tim Peters2344fae2001-01-15 00:50:52 +0000164 def open_w(self, file):
165 if not self.steps:
166 return open(file, 'w')
167 if self.steps[0][1] == SOURCE:
Collin Winterce36ad82007-08-30 01:19:48 +0000168 raise ValueError('Template.open_w: pipeline begins with SOURCE')
Tim Peters2344fae2001-01-15 00:50:52 +0000169 cmd = self.makepipeline('', file)
170 return os.popen(cmd, 'w')
Guido van Rossum2db91351992-10-18 17:09:59 +0000171
Tim Peters2344fae2001-01-15 00:50:52 +0000172 def copy(self, infile, outfile):
173 return os.system(self.makepipeline(infile, outfile))
Guido van Rossum2db91351992-10-18 17:09:59 +0000174
Tim Peters2344fae2001-01-15 00:50:52 +0000175 def makepipeline(self, infile, outfile):
176 cmd = makepipeline(infile, self.steps, outfile)
177 if self.debugging:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000178 print(cmd)
Tim Peters2344fae2001-01-15 00:50:52 +0000179 cmd = 'set -x; ' + cmd
180 return cmd
Guido van Rossum2db91351992-10-18 17:09:59 +0000181
182
183def makepipeline(infile, steps, outfile):
Tim Peters2344fae2001-01-15 00:50:52 +0000184 # Build a list with for each command:
185 # [input filename or '', command string, kind, output filename or '']
186
187 list = []
188 for cmd, kind in steps:
189 list.append(['', cmd, kind, ''])
190 #
191 # Make sure there is at least one step
192 #
193 if not list:
194 list.append(['', 'cat', '--', ''])
195 #
196 # Take care of the input and output ends
197 #
198 [cmd, kind] = list[0][1:3]
199 if kind[0] == 'f' and not infile:
200 list.insert(0, ['', 'cat', '--', ''])
201 list[0][0] = infile
202 #
203 [cmd, kind] = list[-1][1:3]
204 if kind[1] == 'f' and not outfile:
205 list.append(['', 'cat', '--', ''])
206 list[-1][-1] = outfile
207 #
208 # Invent temporary files to connect stages that need files
209 #
210 garbage = []
211 for i in range(1, len(list)):
212 lkind = list[i-1][2]
213 rkind = list[i][2]
214 if lkind[1] == 'f' or rkind[0] == 'f':
Guido van Rossum3b0a3292002-08-09 16:38:32 +0000215 (fd, temp) = tempfile.mkstemp()
216 os.close(fd)
Tim Peters2344fae2001-01-15 00:50:52 +0000217 garbage.append(temp)
218 list[i-1][-1] = list[i][0] = temp
219 #
220 for item in list:
221 [inf, cmd, kind, outf] = item
222 if kind[1] == 'f':
223 cmd = 'OUT=' + quote(outf) + '; ' + cmd
224 if kind[0] == 'f':
225 cmd = 'IN=' + quote(inf) + '; ' + cmd
226 if kind[0] == '-' and inf:
227 cmd = cmd + ' <' + quote(inf)
228 if kind[1] == '-' and outf:
229 cmd = cmd + ' >' + quote(outf)
230 item[1] = cmd
231 #
232 cmdlist = list[0][1]
233 for item in list[1:]:
234 [cmd, kind] = item[1:3]
235 if item[0] == '':
236 if 'f' in kind:
237 cmd = '{ ' + cmd + '; }'
238 cmdlist = cmdlist + ' |\n' + cmd
239 else:
240 cmdlist = cmdlist + '\n' + cmd
241 #
242 if garbage:
243 rmcmd = 'rm -f'
244 for file in garbage:
245 rmcmd = rmcmd + ' ' + quote(file)
246 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
247 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
248 #
249 return cmdlist