| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 1 | # Conversion pipeline templates | 
 | 2 | # ============================= | 
 | 3 |  | 
 | 4 |  | 
 | 5 | # The problem: | 
 | 6 | # ------------ | 
 | 7 | #  | 
 | 8 | # Suppose you have some data that you want to convert to another format | 
 | 9 | # (e.g. from GIF image format to PPM image format).  Maybe the | 
 | 10 | # conversion involves several steps (e.g. piping it through compress or | 
 | 11 | # uuencode).  Some of the conversion steps may require that their input | 
 | 12 | # is a disk file, others may be able to read standard input; similar for | 
 | 13 | # their output.  The input to the entire conversion may also be read | 
 | 14 | # from a disk file or from an open file, and similar for its output. | 
 | 15 | #  | 
 | 16 | # The module lets you construct a pipeline template by sticking one or | 
 | 17 | # more conversion steps together.  It will take care of creating and | 
 | 18 | # removing temporary files if they are necessary to hold intermediate | 
 | 19 | # data.  You can then use the template to do conversions from many | 
 | 20 | # different sources to many different destinations.  The temporary | 
 | 21 | # file names used are different each time the template is used. | 
 | 22 | # | 
 | 23 | # The templates are objects so you can create templates for many | 
 | 24 | # different conversion steps and store them in a dictionary, for | 
 | 25 | # instance. | 
 | 26 |  | 
 | 27 |  | 
 | 28 | # Directions: | 
 | 29 | # ----------- | 
 | 30 | # | 
 | 31 | # To create a template: | 
| Guido van Rossum | 7bc817d | 1993-12-17 15:25:27 +0000 | [diff] [blame] | 32 | #   t = Template() | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 33 | # | 
 | 34 | # To add a conversion step to a template: | 
 | 35 | #   t.append(command, kind) | 
 | 36 | # where kind is a string of two characters: the first is '-' if the | 
 | 37 | # command reads its standard input or 'f' if it requires a file; the | 
 | 38 | # second likewise for the output. The command must be valid /bin/sh | 
 | 39 | # syntax.  If input or output files are required, they are passed as | 
 | 40 | # $IN and $OUT; otherwise, it must be  possible to use the command in | 
 | 41 | # a pipeline. | 
 | 42 | # | 
 | 43 | # To add a conversion step at the beginning: | 
 | 44 | #   t.prepend(command, kind) | 
 | 45 | # | 
 | 46 | # To convert a file to another file using a template: | 
 | 47 | #   sts = t.copy(infile, outfile) | 
 | 48 | # If infile or outfile are the empty string, standard input is read or | 
 | 49 | # standard output is written, respectively.  The return value is the | 
 | 50 | # exit status of the conversion pipeline. | 
 | 51 | #  | 
 | 52 | # To open a file for reading or writing through a conversion pipeline: | 
 | 53 | #   fp = t.open(file, mode) | 
 | 54 | # where mode is 'r' to read the file, or 'w' to write it -- just like | 
 | 55 | # for the built-in function open() or for os.popen(). | 
 | 56 | # | 
 | 57 | # To create a new template object initialized to a given one: | 
 | 58 | #   t2 = t.clone() | 
 | 59 | # | 
 | 60 | # For an example, see the function test() at the end of the file. | 
 | 61 |  | 
 | 62 |  | 
 | 63 | import sys | 
| Guido van Rossum | 9694fca | 1997-10-22 21:00:49 +0000 | [diff] [blame^] | 64 | import re | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 65 |  | 
 | 66 | import os | 
 | 67 | import tempfile | 
 | 68 | import string | 
 | 69 |  | 
 | 70 |  | 
 | 71 | # Conversion step kinds | 
 | 72 |  | 
 | 73 | FILEIN_FILEOUT = 'ff'			# Must read & write real files | 
 | 74 | STDIN_FILEOUT  = '-f'			# Must write a real file | 
 | 75 | FILEIN_STDOUT  = 'f-'			# Must read a real file | 
 | 76 | STDIN_STDOUT   = '--'			# Normal pipeline element | 
 | 77 | SOURCE         = '.-'			# Must be first, writes stdout | 
 | 78 | SINK           = '-.'			# Must be last, reads stdin | 
 | 79 |  | 
 | 80 | stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ | 
 | 81 | 	     SOURCE, SINK] | 
 | 82 |  | 
 | 83 |  | 
 | 84 | # A pipeline template is a Template object: | 
 | 85 |  | 
 | 86 | class Template: | 
 | 87 |  | 
| Guido van Rossum | 7bc817d | 1993-12-17 15:25:27 +0000 | [diff] [blame] | 88 | 	# Template() returns a fresh pipeline template | 
 | 89 | 	def __init__(self): | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 90 | 		self.debugging = 0 | 
 | 91 | 		self.reset() | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 92 |  | 
 | 93 | 	# t.__repr__() implements `t` | 
 | 94 | 	def __repr__(self): | 
 | 95 | 		return '<Template instance, steps=' + `self.steps` + '>' | 
 | 96 |  | 
 | 97 | 	# t.reset() restores a pipeline template to its initial state | 
 | 98 | 	def reset(self): | 
 | 99 | 		self.steps = [] | 
 | 100 |  | 
 | 101 | 	# t.clone() returns a new pipeline template with identical | 
 | 102 | 	# initial state as the current one | 
 | 103 | 	def clone(self): | 
| Guido van Rossum | 7bc817d | 1993-12-17 15:25:27 +0000 | [diff] [blame] | 104 | 		t = Template() | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 105 | 		t.steps = self.steps[:] | 
 | 106 | 		t.debugging = self.debugging | 
 | 107 | 		return t | 
 | 108 |  | 
 | 109 | 	# t.debug(flag) turns debugging on or off | 
 | 110 | 	def debug(self, flag): | 
 | 111 | 		self.debugging = flag | 
 | 112 |  | 
 | 113 | 	# t.append(cmd, kind) adds a new step at the end | 
 | 114 | 	def append(self, cmd, kind): | 
 | 115 | 		if type(cmd) <> type(''): | 
 | 116 | 			raise TypeError, \ | 
 | 117 | 			      'Template.append: cmd must be a string' | 
 | 118 | 		if kind not in stepkinds: | 
 | 119 | 			raise ValueError, \ | 
 | 120 | 			      'Template.append: bad kind ' + `kind` | 
 | 121 | 		if kind == SOURCE: | 
 | 122 | 			raise ValueError, \ | 
 | 123 | 			      'Template.append: SOURCE can only be prepended' | 
 | 124 | 		if self.steps <> [] and self.steps[-1][1] == SINK: | 
 | 125 | 			raise ValueError, \ | 
 | 126 | 			      'Template.append: already ends with SINK' | 
| Guido van Rossum | 9694fca | 1997-10-22 21:00:49 +0000 | [diff] [blame^] | 127 | 		if kind[0] == 'f' and not re.search('\$IN\b', cmd): | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 128 | 			raise ValueError, \ | 
 | 129 | 			      'Template.append: missing $IN in cmd' | 
| Guido van Rossum | 9694fca | 1997-10-22 21:00:49 +0000 | [diff] [blame^] | 130 | 		if kind[1] == 'f' and not re.search('\$OUT\b', cmd): | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 131 | 			raise ValueError, \ | 
 | 132 | 			      'Template.append: missing $OUT in cmd' | 
 | 133 | 		self.steps.append((cmd, kind)) | 
 | 134 |  | 
 | 135 | 	# t.prepend(cmd, kind) adds a new step at the front | 
 | 136 | 	def prepend(self, cmd, kind): | 
 | 137 | 		if type(cmd) <> type(''): | 
 | 138 | 			raise TypeError, \ | 
 | 139 | 			      'Template.prepend: cmd must be a string' | 
 | 140 | 		if kind not in stepkinds: | 
 | 141 | 			raise ValueError, \ | 
 | 142 | 			      'Template.prepend: bad kind ' + `kind` | 
 | 143 | 		if kind == SINK: | 
 | 144 | 			raise ValueError, \ | 
 | 145 | 			      'Template.prepend: SINK can only be appended' | 
 | 146 | 		if self.steps <> [] and self.steps[0][1] == SOURCE: | 
 | 147 | 			raise ValueError, \ | 
 | 148 | 			      'Template.prepend: already begins with SOURCE' | 
| Guido van Rossum | 9694fca | 1997-10-22 21:00:49 +0000 | [diff] [blame^] | 149 | 		if kind[0] == 'f' and not re.search('\$IN\b', cmd): | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 150 | 			raise ValueError, \ | 
 | 151 | 			      'Template.prepend: missing $IN in cmd' | 
| Guido van Rossum | 9694fca | 1997-10-22 21:00:49 +0000 | [diff] [blame^] | 152 | 		if kind[1] == 'f' and not re.search('\$OUT\b', cmd): | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 153 | 			raise ValueError, \ | 
 | 154 | 			      'Template.prepend: missing $OUT in cmd' | 
 | 155 | 		self.steps.insert(0, (cmd, kind)) | 
 | 156 |  | 
 | 157 | 	# t.open(file, rw) returns a pipe or file object open for | 
 | 158 | 	# reading or writing; the file is the other end of the pipeline | 
 | 159 | 	def open(self, file, rw): | 
 | 160 | 		if rw == 'r': | 
 | 161 | 			return self.open_r(file) | 
 | 162 | 		if rw == 'w': | 
 | 163 | 			return self.open_w(file) | 
 | 164 | 		raise ValueError, \ | 
 | 165 | 		      'Template.open: rw must be \'r\' or \'w\', not ' + `rw` | 
 | 166 |  | 
 | 167 | 	# t.open_r(file) and t.open_w(file) implement | 
 | 168 | 	# t.open(file, 'r') and t.open(file, 'w') respectively | 
 | 169 |  | 
 | 170 | 	def open_r(self, file): | 
 | 171 | 		if self.steps == []: | 
 | 172 | 			return open(file, 'r') | 
 | 173 | 		if self.steps[-1][1] == SINK: | 
 | 174 | 			raise ValueError, \ | 
 | 175 | 			      'Template.open_r: pipeline ends width SINK' | 
 | 176 | 		cmd = self.makepipeline(file, '') | 
 | 177 | 		return os.popen(cmd, 'r') | 
 | 178 |  | 
 | 179 | 	def open_w(self, file): | 
 | 180 | 		if self.steps == []: | 
 | 181 | 			return open(file, 'w') | 
 | 182 | 		if self.steps[0][1] == SOURCE: | 
 | 183 | 			raise ValueError, \ | 
 | 184 | 			      'Template.open_w: pipeline begins with SOURCE' | 
 | 185 | 		cmd = self.makepipeline('', file) | 
 | 186 | 		return os.popen(cmd, 'w') | 
 | 187 |  | 
 | 188 | 	def copy(self, infile, outfile): | 
 | 189 | 		return os.system(self.makepipeline(infile, outfile)) | 
 | 190 |  | 
 | 191 | 	def makepipeline(self, infile, outfile): | 
 | 192 | 		cmd = makepipeline(infile, self.steps, outfile) | 
 | 193 | 		if self.debugging: | 
 | 194 | 			print cmd | 
 | 195 | 			cmd = 'set -x; ' + cmd | 
 | 196 | 		return cmd | 
 | 197 |  | 
 | 198 |  | 
 | 199 | def makepipeline(infile, steps, outfile): | 
 | 200 | 	# Build a list with for each command: | 
 | 201 | 	# [input filename or '', command string, kind, output filename or ''] | 
 | 202 | 	 | 
 | 203 | 	list = [] | 
 | 204 | 	for cmd, kind in steps: | 
 | 205 | 		list.append(['', cmd, kind, '']) | 
 | 206 | 	# | 
 | 207 | 	# Make sure there is at least one step | 
 | 208 | 	# | 
 | 209 | 	if list == []: | 
 | 210 | 		list.append(['', 'cat', '--', '']) | 
 | 211 | 	# | 
 | 212 | 	# Take care of the input and output ends | 
 | 213 | 	# | 
 | 214 | 	[cmd, kind] = list[0][1:3] | 
 | 215 | 	if kind[0] == 'f' and not infile: | 
 | 216 | 		list.insert(0, ['', 'cat', '--', '']) | 
 | 217 | 	list[0][0] = infile | 
 | 218 | 	# | 
 | 219 | 	[cmd, kind] = list[-1][1:3] | 
 | 220 | 	if kind[1] == 'f' and not outfile: | 
 | 221 | 		list.append(['', 'cat', '--', '']) | 
 | 222 | 	list[-1][-1] = outfile | 
 | 223 | 	# | 
 | 224 | 	# Invent temporary files to connect stages that need files | 
 | 225 | 	# | 
 | 226 | 	garbage = [] | 
 | 227 | 	for i in range(1, len(list)): | 
 | 228 | 		lkind = list[i-1][2] | 
 | 229 | 		rkind = list[i][2] | 
 | 230 | 		if lkind[1] == 'f' or rkind[0] == 'f': | 
 | 231 | 			temp = tempfile.mktemp() | 
 | 232 | 			garbage.append(temp) | 
 | 233 | 			list[i-1][-1] = list[i][0] = temp | 
 | 234 | 	# | 
 | 235 | 	for item in list: | 
 | 236 | 		[inf, cmd, kind, outf] = item | 
 | 237 | 		if kind[1] == 'f': | 
 | 238 | 			cmd = 'OUT=' + quote(outf) + '; ' + cmd | 
 | 239 | 		if kind[0] == 'f': | 
 | 240 | 			cmd = 'IN=' + quote(inf) + '; ' + cmd | 
 | 241 | 		if kind[0] == '-' and inf: | 
 | 242 | 			cmd = cmd + ' <' + quote(inf) | 
 | 243 | 		if kind[1] == '-' and outf: | 
 | 244 | 			cmd = cmd + ' >' + quote(outf) | 
 | 245 | 		item[1] = cmd | 
 | 246 | 	# | 
 | 247 | 	cmdlist = list[0][1] | 
 | 248 | 	for item in list[1:]: | 
 | 249 | 		[cmd, kind] = item[1:3] | 
 | 250 | 		if item[0] == '': | 
 | 251 | 			if 'f' in kind: | 
 | 252 | 				cmd = '{ ' + cmd + '; }' | 
 | 253 | 			cmdlist = cmdlist + ' |\n' + cmd | 
 | 254 | 		else: | 
 | 255 | 			cmdlist = cmdlist + '\n' + cmd | 
 | 256 | 	# | 
 | 257 | 	if garbage: | 
 | 258 | 		rmcmd = 'rm -f' | 
 | 259 | 		for file in garbage: | 
 | 260 | 			rmcmd = rmcmd + ' ' + quote(file) | 
 | 261 | 		trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' | 
 | 262 | 		cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd | 
 | 263 | 	# | 
 | 264 | 	return cmdlist | 
 | 265 |  | 
 | 266 |  | 
 | 267 | # Reliably quote a string as a single argument for /bin/sh | 
 | 268 |  | 
 | 269 | _safechars = string.letters + string.digits + '!@%_-+=:,./'	# Safe unquoted | 
 | 270 | _funnychars = '"`$\\'				# Unsafe inside "double quotes" | 
 | 271 |  | 
 | 272 | def quote(file): | 
 | 273 | 	for c in file: | 
 | 274 | 		if c not in _safechars: | 
 | 275 | 			break | 
 | 276 | 	else: | 
 | 277 | 		return file | 
 | 278 | 	if '\'' not in file: | 
 | 279 | 		return '\'' + file + '\'' | 
 | 280 | 	res = '' | 
 | 281 | 	for c in file: | 
 | 282 | 		if c in _funnychars: | 
 | 283 | 			c = '\\' + c | 
 | 284 | 		res = res + c | 
 | 285 | 	return '"' + res + '"' | 
 | 286 |  | 
 | 287 |  | 
 | 288 | # Small test program and example | 
 | 289 |  | 
 | 290 | def test(): | 
 | 291 | 	import os | 
 | 292 | 	print 'Testing...' | 
| Guido van Rossum | 7bc817d | 1993-12-17 15:25:27 +0000 | [diff] [blame] | 293 | 	t = Template() | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 294 | 	t.append('togif $IN $OUT', 'ff') | 
 | 295 | 	t.append('giftoppm', '--') | 
 | 296 | 	t.append('ppmtogif >$OUT', '-f') | 
 | 297 | 	t.append('fromgif $IN $OUT', 'ff') | 
 | 298 | 	t.debug(1) | 
 | 299 | 	FILE = '/usr/local/images/rgb/rogues/guido.rgb' | 
 | 300 | 	t.copy(FILE, '@temp') | 
 | 301 | 	print 'Done.' |