blob: 13bfcdd22f031330d70ef3b73d521936cc9c2ef3 [file] [log] [blame]
Guido van Rossume7e578f1995-08-04 04:00:20 +00001"""CGI-savvy HTTP Server.
2
3This module builds on SimpleHTTPServer by implementing GET and POST
4requests to cgi-bin scripts.
5
Guido van Rossume7d6b0a2000-09-19 04:01:01 +00006If the os.fork() function is not present (e.g. on Windows),
7os.popen2() is used as a fallback, with slightly altered semantics; if
8that function is not present either (e.g. on Macintosh), only Python
9scripts are supported, and they are executed by the current process.
10
11In all cases, the implementation is intentionally naive -- all
12requests are executed sychronously.
13
14SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
15-- it may execute arbitrary Python code or external programs.
Fred Drake40e84db1999-10-16 02:07:50 +000016
Jeremy Hylton6414cd82004-12-22 14:19:09 +000017Note that status code 200 is sent prior to execution of a CGI script, so
18scripts cannot send other status codes such as 302 (redirect).
Guido van Rossume7e578f1995-08-04 04:00:20 +000019"""
20
21
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000022__version__ = "0.4"
Guido van Rossume7e578f1995-08-04 04:00:20 +000023
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000024__all__ = ["CGIHTTPRequestHandler"]
Guido van Rossume7e578f1995-08-04 04:00:20 +000025
26import os
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000027import sys
Guido van Rossume7e578f1995-08-04 04:00:20 +000028import urllib
29import BaseHTTPServer
30import SimpleHTTPServer
Steve Holden8a978f72003-01-08 18:53:18 +000031import select
Guido van Rossume7e578f1995-08-04 04:00:20 +000032
33
34class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
35
36 """Complete HTTP server with GET, HEAD and POST commands.
37
38 GET and HEAD also support running CGI scripts.
39
40 The POST command is *only* implemented for CGI scripts.
41
42 """
43
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000044 # Determine platform specifics
45 have_fork = hasattr(os, 'fork')
46 have_popen2 = hasattr(os, 'popen2')
Guido van Rossum8cb65402002-02-01 16:27:59 +000047 have_popen3 = hasattr(os, 'popen3')
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000048
Guido van Rossum6aefd912000-09-01 03:27:34 +000049 # Make rfile unbuffered -- we need to read one line and then pass
50 # the rest to a subprocess, so we can't use buffered input.
51 rbufsize = 0
52
Guido van Rossume7e578f1995-08-04 04:00:20 +000053 def do_POST(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000054 """Serve a POST request.
Guido van Rossume7e578f1995-08-04 04:00:20 +000055
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000056 This is only implemented for CGI scripts.
Guido van Rossume7e578f1995-08-04 04:00:20 +000057
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000058 """
Guido van Rossume7e578f1995-08-04 04:00:20 +000059
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000060 if self.is_cgi():
61 self.run_cgi()
62 else:
63 self.send_error(501, "Can only POST to CGI scripts")
Guido van Rossume7e578f1995-08-04 04:00:20 +000064
65 def send_head(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000066 """Version of send_head that support CGI scripts"""
67 if self.is_cgi():
68 return self.run_cgi()
69 else:
70 return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
Guido van Rossume7e578f1995-08-04 04:00:20 +000071
72 def is_cgi(self):
Gregory P. Smith923ba362009-04-06 06:33:26 +000073 """Test whether self.path corresponds to a CGI script.
Guido van Rossume7e578f1995-08-04 04:00:20 +000074
Gregory P. Smith923ba362009-04-06 06:33:26 +000075 Returns True and updates the cgi_info attribute to the tuple
76 (dir, rest) if self.path requires running a CGI script.
77 Returns False otherwise.
Guido van Rossume7e578f1995-08-04 04:00:20 +000078
Gregory P. Smith923ba362009-04-06 06:33:26 +000079 The default implementation tests whether the normalized url
80 path begins with one of the strings in self.cgi_directories
81 (and the next character is a '/' or the end of the string).
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000082 """
Gregory P. Smith923ba362009-04-06 06:33:26 +000083 splitpath = _url_collapse_path_split(self.path)
84 if splitpath[0] in self.cgi_directories:
85 self.cgi_info = splitpath
86 return True
Tim Petersbc0e9102002-04-04 22:55:58 +000087 return False
Guido van Rossume7e578f1995-08-04 04:00:20 +000088
89 cgi_directories = ['/cgi-bin', '/htbin']
90
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000091 def is_executable(self, path):
92 """Test whether argument path is an executable file."""
93 return executable(path)
94
95 def is_python(self, path):
96 """Test whether argument path is a Python script."""
97 head, tail = os.path.splitext(path)
98 return tail.lower() in (".py", ".pyw")
99
Guido van Rossume7e578f1995-08-04 04:00:20 +0000100 def run_cgi(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000101 """Execute a CGI script."""
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000102 path = self.path
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000103 dir, rest = self.cgi_info
Tim Petersf733abb2007-01-30 03:03:46 +0000104
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000105 i = path.find('/', len(dir) + 1)
106 while i >= 0:
107 nextdir = path[:i]
108 nextrest = path[i+1:]
109
110 scriptdir = self.translate_path(nextdir)
111 if os.path.isdir(scriptdir):
112 dir, rest = nextdir, nextrest
113 i = path.find('/', len(dir) + 1)
114 else:
115 break
116
117 # find an explicit query string, if present.
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000118 i = rest.rfind('?')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000119 if i >= 0:
120 rest, query = rest[:i], rest[i+1:]
121 else:
122 query = ''
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000123
124 # dissect the part after the directory name into a script name &
125 # a possible additional path, to be stored in PATH_INFO.
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000126 i = rest.find('/')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000127 if i >= 0:
128 script, rest = rest[:i], rest[i:]
129 else:
130 script, rest = rest, ''
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000131
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000132 scriptname = dir + '/' + script
133 scriptfile = self.translate_path(scriptname)
134 if not os.path.exists(scriptfile):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000135 self.send_error(404, "No such CGI script (%r)" % scriptname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000136 return
137 if not os.path.isfile(scriptfile):
Tim Peters27f49612004-03-20 21:51:12 +0000138 self.send_error(403, "CGI script is not a plain file (%r)" %
Walter Dörwald70a6b492004-02-12 17:35:32 +0000139 scriptname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000140 return
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000141 ispy = self.is_python(scriptname)
142 if not ispy:
Guido van Rossum8cb65402002-02-01 16:27:59 +0000143 if not (self.have_fork or self.have_popen2 or self.have_popen3):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000144 self.send_error(403, "CGI script is not a Python script (%r)" %
145 scriptname)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000146 return
147 if not self.is_executable(scriptfile):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000148 self.send_error(403, "CGI script is not executable (%r)" %
149 scriptname)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000150 return
151
152 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
153 # XXX Much of the following could be prepared ahead of time!
154 env = {}
155 env['SERVER_SOFTWARE'] = self.version_string()
156 env['SERVER_NAME'] = self.server.server_name
157 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
158 env['SERVER_PROTOCOL'] = self.protocol_version
159 env['SERVER_PORT'] = str(self.server.server_port)
160 env['REQUEST_METHOD'] = self.command
161 uqrest = urllib.unquote(rest)
162 env['PATH_INFO'] = uqrest
163 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
164 env['SCRIPT_NAME'] = scriptname
165 if query:
166 env['QUERY_STRING'] = query
167 host = self.address_string()
168 if host != self.client_address[0]:
169 env['REMOTE_HOST'] = host
170 env['REMOTE_ADDR'] = self.client_address[0]
Martin v. Löwisa28b3e62004-08-29 16:53:26 +0000171 authorization = self.headers.getheader("authorization")
172 if authorization:
173 authorization = authorization.split()
174 if len(authorization) == 2:
175 import base64, binascii
176 env['AUTH_TYPE'] = authorization[0]
177 if authorization[0].lower() == "basic":
178 try:
179 authorization = base64.decodestring(authorization[1])
180 except binascii.Error:
181 pass
182 else:
183 authorization = authorization.split(':')
184 if len(authorization) == 2:
185 env['REMOTE_USER'] = authorization[0]
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000186 # XXX REMOTE_IDENT
187 if self.headers.typeheader is None:
188 env['CONTENT_TYPE'] = self.headers.type
189 else:
190 env['CONTENT_TYPE'] = self.headers.typeheader
191 length = self.headers.getheader('content-length')
192 if length:
193 env['CONTENT_LENGTH'] = length
Collin Winter83b2bf62007-03-09 03:15:56 +0000194 referer = self.headers.getheader('referer')
195 if referer:
196 env['HTTP_REFERER'] = referer
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000197 accept = []
198 for line in self.headers.getallmatchingheaders('accept'):
Eric S. Raymond7e642e82001-02-09 12:10:26 +0000199 if line[:1] in "\t\n\r ":
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000200 accept.append(line.strip())
Guido van Rossum01fc65d1998-05-13 20:13:24 +0000201 else:
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000202 accept = accept + line[7:].split(',')
203 env['HTTP_ACCEPT'] = ','.join(accept)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000204 ua = self.headers.getheader('user-agent')
205 if ua:
206 env['HTTP_USER_AGENT'] = ua
207 co = filter(None, self.headers.getheaders('cookie'))
208 if co:
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000209 env['HTTP_COOKIE'] = ', '.join(co)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000210 # XXX Other HTTP_* headers
Guido van Rossum70ec0b42004-03-20 22:18:03 +0000211 # Since we're setting the env in the parent, provide empty
212 # values to override previously set values
213 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
Collin Winter83b2bf62007-03-09 03:15:56 +0000214 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
Guido van Rossum70ec0b42004-03-20 22:18:03 +0000215 env.setdefault(k, "")
Guido van Rossume3ec2962002-08-20 20:07:10 +0000216 os.environ.update(env)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000217
218 self.send_response(200, "Script output follows")
219
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000220 decoded_query = query.replace('+', ' ')
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000221
222 if self.have_fork:
223 # Unix -- fork as we should
224 args = [script]
225 if '=' not in decoded_query:
226 args.append(decoded_query)
227 nobody = nobody_uid()
228 self.wfile.flush() # Always flush before forking
229 pid = os.fork()
230 if pid != 0:
231 # Parent
232 pid, sts = os.waitpid(pid, 0)
Steve Holden8a978f72003-01-08 18:53:18 +0000233 # throw away additional data [see bug #427345]
234 while select.select([self.rfile], [], [], 0)[0]:
Raymond Hettingere2f18372003-06-29 05:06:56 +0000235 if not self.rfile.read(1):
236 break
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000237 if sts:
238 self.log_error("CGI script exit status %#x", sts)
239 return
240 # Child
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000241 try:
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000242 try:
243 os.setuid(nobody)
244 except os.error:
245 pass
246 os.dup2(self.rfile.fileno(), 0)
247 os.dup2(self.wfile.fileno(), 1)
Raymond Hettinger92f200b2003-07-14 06:56:32 +0000248 os.execve(scriptfile, args, os.environ)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000249 except:
250 self.server.handle_error(self.request, self.client_address)
251 os._exit(127)
252
Guido van Rossum8cb65402002-02-01 16:27:59 +0000253 elif self.have_popen2 or self.have_popen3:
254 # Windows -- use popen2 or popen3 to create a subprocess
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000255 import shutil
Guido van Rossum8cb65402002-02-01 16:27:59 +0000256 if self.have_popen3:
257 popenx = os.popen3
258 else:
259 popenx = os.popen2
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000260 cmdline = scriptfile
261 if self.is_python(scriptfile):
262 interp = sys.executable
263 if interp.lower().endswith("w.exe"):
Guido van Rossum0afde132001-10-26 03:38:46 +0000264 # On Windows, use python.exe, not pythonw.exe
265 interp = interp[:-5] + interp[-4:]
Guido van Rossum16fd3382001-08-07 19:55:10 +0000266 cmdline = "%s -u %s" % (interp, cmdline)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000267 if '=' not in query and '"' not in query:
268 cmdline = '%s "%s"' % (cmdline, query)
Guido van Rossumbcbdc952001-10-17 06:45:56 +0000269 self.log_message("command: %s", cmdline)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000270 try:
271 nbytes = int(length)
Guido van Rossumb3903152002-10-17 16:21:35 +0000272 except (TypeError, ValueError):
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000273 nbytes = 0
Guido van Rossum8cb65402002-02-01 16:27:59 +0000274 files = popenx(cmdline, 'b')
275 fi = files[0]
276 fo = files[1]
277 if self.have_popen3:
278 fe = files[2]
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000279 if self.command.lower() == "post" and nbytes > 0:
280 data = self.rfile.read(nbytes)
281 fi.write(data)
Steve Holden8a978f72003-01-08 18:53:18 +0000282 # throw away additional data [see bug #427345]
283 while select.select([self.rfile._sock], [], [], 0)[0]:
Raymond Hettingere2f18372003-06-29 05:06:56 +0000284 if not self.rfile._sock.recv(1):
285 break
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000286 fi.close()
287 shutil.copyfileobj(fo, self.wfile)
Guido van Rossum8cb65402002-02-01 16:27:59 +0000288 if self.have_popen3:
289 errors = fe.read()
290 fe.close()
291 if errors:
292 self.log_error('%s', errors)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000293 sts = fo.close()
294 if sts:
295 self.log_error("CGI script exit status %#x", sts)
296 else:
Guido van Rossumbcbdc952001-10-17 06:45:56 +0000297 self.log_message("CGI script exited OK")
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000298
299 else:
300 # Other O.S. -- execute script in this process
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000301 save_argv = sys.argv
302 save_stdin = sys.stdin
303 save_stdout = sys.stdout
304 save_stderr = sys.stderr
305 try:
Tim Peters27f49612004-03-20 21:51:12 +0000306 save_cwd = os.getcwd()
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000307 try:
308 sys.argv = [scriptfile]
309 if '=' not in decoded_query:
310 sys.argv.append(decoded_query)
311 sys.stdout = self.wfile
312 sys.stdin = self.rfile
313 execfile(scriptfile, {"__name__": "__main__"})
314 finally:
315 sys.argv = save_argv
316 sys.stdin = save_stdin
317 sys.stdout = save_stdout
318 sys.stderr = save_stderr
Tim Peters27f49612004-03-20 21:51:12 +0000319 os.chdir(save_cwd)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000320 except SystemExit, sts:
321 self.log_error("CGI script exit status %s", str(sts))
322 else:
Guido van Rossumbcbdc952001-10-17 06:45:56 +0000323 self.log_message("CGI script exited OK")
Guido van Rossume7e578f1995-08-04 04:00:20 +0000324
325
Gregory P. Smith923ba362009-04-06 06:33:26 +0000326# TODO(gregory.p.smith): Move this into an appropriate library.
327def _url_collapse_path_split(path):
328 """
329 Given a URL path, remove extra '/'s and '.' path elements and collapse
330 any '..' references.
331
332 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
333
334 Returns: A tuple of (head, tail) where tail is everything after the final /
335 and head is everything before it. Head will always start with a '/' and,
336 if it contains anything else, never have a trailing '/'.
337
338 Raises: IndexError if too many '..' occur within the path.
339 """
340 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
341 # path semantics rather than local operating system semantics.
342 path_parts = []
343 for part in path.split('/'):
344 if part == '.':
345 path_parts.append('')
346 else:
347 path_parts.append(part)
348 # Filter out blank non trailing parts before consuming the '..'.
349 path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
350 if path_parts:
351 tail_part = path_parts.pop()
352 else:
353 tail_part = ''
354 head_parts = []
355 for part in path_parts:
356 if part == '..':
357 head_parts.pop()
358 else:
359 head_parts.append(part)
360 if tail_part and tail_part == '..':
361 head_parts.pop()
362 tail_part = ''
363 return ('/' + '/'.join(head_parts), tail_part)
364
365
Guido van Rossume7e578f1995-08-04 04:00:20 +0000366nobody = None
367
368def nobody_uid():
369 """Internal routine to get nobody's uid"""
370 global nobody
371 if nobody:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000372 return nobody
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000373 try:
374 import pwd
375 except ImportError:
376 return -1
Guido van Rossume7e578f1995-08-04 04:00:20 +0000377 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000378 nobody = pwd.getpwnam('nobody')[2]
Guido van Rossum630b8111999-04-28 12:21:47 +0000379 except KeyError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000380 nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000381 return nobody
382
383
384def executable(path):
385 """Test for executable file."""
386 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000387 st = os.stat(path)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000388 except os.error:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000389 return False
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000390 return st.st_mode & 0111 != 0
Guido van Rossume7e578f1995-08-04 04:00:20 +0000391
392
393def test(HandlerClass = CGIHTTPRequestHandler,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000394 ServerClass = BaseHTTPServer.HTTPServer):
Guido van Rossume7e578f1995-08-04 04:00:20 +0000395 SimpleHTTPServer.test(HandlerClass, ServerClass)
396
397
398if __name__ == '__main__':
399 test()