blob: 5620083c69347c084e1c0a97d5d6dc075d3d9bda [file] [log] [blame]
Guido van Rossume7e578f1995-08-04 04:00:20 +00001"""CGI-savvy HTTP Server.
2
3This module builds on SimpleHTTPServer by implementing GET and POST
4requests to cgi-bin scripts.
5
Guido van Rossume7d6b0a2000-09-19 04:01:01 +00006If the os.fork() function is not present (e.g. on Windows),
7os.popen2() is used as a fallback, with slightly altered semantics; if
8that function is not present either (e.g. on Macintosh), only Python
9scripts are supported, and they are executed by the current process.
10
11In all cases, the implementation is intentionally naive -- all
12requests are executed sychronously.
13
14SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
15-- it may execute arbitrary Python code or external programs.
Fred Drake40e84db1999-10-16 02:07:50 +000016
Jeremy Hylton6414cd82004-12-22 14:19:09 +000017Note that status code 200 is sent prior to execution of a CGI script, so
18scripts cannot send other status codes such as 302 (redirect).
Guido van Rossume7e578f1995-08-04 04:00:20 +000019"""
20
21
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000022__version__ = "0.4"
Guido van Rossume7e578f1995-08-04 04:00:20 +000023
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000024__all__ = ["CGIHTTPRequestHandler"]
Guido van Rossume7e578f1995-08-04 04:00:20 +000025
26import os
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000027import sys
Guido van Rossume7e578f1995-08-04 04:00:20 +000028import urllib
29import BaseHTTPServer
30import SimpleHTTPServer
Steve Holden8a978f72003-01-08 18:53:18 +000031import select
Senthil Kumarana9bd0cc2010-10-03 18:16:52 +000032import copy
Guido van Rossume7e578f1995-08-04 04:00:20 +000033
34
35class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
36
37 """Complete HTTP server with GET, HEAD and POST commands.
38
39 GET and HEAD also support running CGI scripts.
40
41 The POST command is *only* implemented for CGI scripts.
42
43 """
44
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000045 # Determine platform specifics
46 have_fork = hasattr(os, 'fork')
47 have_popen2 = hasattr(os, 'popen2')
Guido van Rossum8cb65402002-02-01 16:27:59 +000048 have_popen3 = hasattr(os, 'popen3')
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000049
Guido van Rossum6aefd912000-09-01 03:27:34 +000050 # Make rfile unbuffered -- we need to read one line and then pass
51 # the rest to a subprocess, so we can't use buffered input.
52 rbufsize = 0
53
Guido van Rossume7e578f1995-08-04 04:00:20 +000054 def do_POST(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000055 """Serve a POST request.
Guido van Rossume7e578f1995-08-04 04:00:20 +000056
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000057 This is only implemented for CGI scripts.
Guido van Rossume7e578f1995-08-04 04:00:20 +000058
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000059 """
Guido van Rossume7e578f1995-08-04 04:00:20 +000060
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000061 if self.is_cgi():
62 self.run_cgi()
63 else:
64 self.send_error(501, "Can only POST to CGI scripts")
Guido van Rossume7e578f1995-08-04 04:00:20 +000065
66 def send_head(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000067 """Version of send_head that support CGI scripts"""
68 if self.is_cgi():
69 return self.run_cgi()
70 else:
71 return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
Guido van Rossume7e578f1995-08-04 04:00:20 +000072
73 def is_cgi(self):
Gregory P. Smith923ba362009-04-06 06:33:26 +000074 """Test whether self.path corresponds to a CGI script.
Guido van Rossume7e578f1995-08-04 04:00:20 +000075
Gregory P. Smith923ba362009-04-06 06:33:26 +000076 Returns True and updates the cgi_info attribute to the tuple
77 (dir, rest) if self.path requires running a CGI script.
78 Returns False otherwise.
Guido van Rossume7e578f1995-08-04 04:00:20 +000079
Gregory P. Smith4bd76642009-05-03 20:27:25 +000080 If any exception is raised, the caller should assume that
81 self.path was rejected as invalid and act accordingly.
82
Gregory P. Smith923ba362009-04-06 06:33:26 +000083 The default implementation tests whether the normalized url
84 path begins with one of the strings in self.cgi_directories
85 (and the next character is a '/' or the end of the string).
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000086 """
Martin Panter74c76c82015-10-03 05:55:46 +000087 collapsed_path = _url_collapse_path(self.path)
Senthil Kumaran5f7e7342012-04-12 02:23:23 +080088 dir_sep = collapsed_path.find('/', 1)
89 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
Senthil Kumaranfb2e8742012-04-11 03:07:57 +080090 if head in self.cgi_directories:
91 self.cgi_info = head, tail
Gregory P. Smith923ba362009-04-06 06:33:26 +000092 return True
Tim Petersbc0e9102002-04-04 22:55:58 +000093 return False
Guido van Rossume7e578f1995-08-04 04:00:20 +000094
95 cgi_directories = ['/cgi-bin', '/htbin']
96
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000097 def is_executable(self, path):
98 """Test whether argument path is an executable file."""
99 return executable(path)
100
101 def is_python(self, path):
102 """Test whether argument path is a Python script."""
103 head, tail = os.path.splitext(path)
104 return tail.lower() in (".py", ".pyw")
105
Guido van Rossume7e578f1995-08-04 04:00:20 +0000106 def run_cgi(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000107 """Execute a CGI script."""
108 dir, rest = self.cgi_info
Ned Deilyc8937622014-07-12 22:01:15 -0700109 path = dir + '/' + rest
110 i = path.find('/', len(dir)+1)
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000111 while i >= 0:
Ned Deilyc8937622014-07-12 22:01:15 -0700112 nextdir = path[:i]
113 nextrest = path[i+1:]
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000114
115 scriptdir = self.translate_path(nextdir)
116 if os.path.isdir(scriptdir):
117 dir, rest = nextdir, nextrest
Ned Deilyc8937622014-07-12 22:01:15 -0700118 i = path.find('/', len(dir)+1)
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000119 else:
120 break
121
122 # find an explicit query string, if present.
Martin Pantercff22eb2015-10-03 05:38:07 +0000123 rest, _, query = rest.partition('?')
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000124
125 # dissect the part after the directory name into a script name &
126 # a possible additional path, to be stored in PATH_INFO.
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000127 i = rest.find('/')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000128 if i >= 0:
129 script, rest = rest[:i], rest[i:]
130 else:
131 script, rest = rest, ''
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000132
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000133 scriptname = dir + '/' + script
134 scriptfile = self.translate_path(scriptname)
135 if not os.path.exists(scriptfile):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000136 self.send_error(404, "No such CGI script (%r)" % scriptname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000137 return
138 if not os.path.isfile(scriptfile):
Tim Peters27f49612004-03-20 21:51:12 +0000139 self.send_error(403, "CGI script is not a plain file (%r)" %
Walter Dörwald70a6b492004-02-12 17:35:32 +0000140 scriptname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000141 return
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000142 ispy = self.is_python(scriptname)
143 if not ispy:
Guido van Rossum8cb65402002-02-01 16:27:59 +0000144 if not (self.have_fork or self.have_popen2 or self.have_popen3):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000145 self.send_error(403, "CGI script is not a Python script (%r)" %
146 scriptname)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000147 return
148 if not self.is_executable(scriptfile):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000149 self.send_error(403, "CGI script is not executable (%r)" %
150 scriptname)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000151 return
152
153 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
154 # XXX Much of the following could be prepared ahead of time!
Senthil Kumarana9bd0cc2010-10-03 18:16:52 +0000155 env = copy.deepcopy(os.environ)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000156 env['SERVER_SOFTWARE'] = self.version_string()
157 env['SERVER_NAME'] = self.server.server_name
158 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
159 env['SERVER_PROTOCOL'] = self.protocol_version
160 env['SERVER_PORT'] = str(self.server.server_port)
161 env['REQUEST_METHOD'] = self.command
162 uqrest = urllib.unquote(rest)
163 env['PATH_INFO'] = uqrest
164 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
165 env['SCRIPT_NAME'] = scriptname
166 if query:
167 env['QUERY_STRING'] = query
168 host = self.address_string()
169 if host != self.client_address[0]:
170 env['REMOTE_HOST'] = host
171 env['REMOTE_ADDR'] = self.client_address[0]
Martin v. Löwisa28b3e62004-08-29 16:53:26 +0000172 authorization = self.headers.getheader("authorization")
173 if authorization:
174 authorization = authorization.split()
175 if len(authorization) == 2:
176 import base64, binascii
177 env['AUTH_TYPE'] = authorization[0]
178 if authorization[0].lower() == "basic":
179 try:
180 authorization = base64.decodestring(authorization[1])
181 except binascii.Error:
182 pass
183 else:
184 authorization = authorization.split(':')
185 if len(authorization) == 2:
186 env['REMOTE_USER'] = authorization[0]
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000187 # XXX REMOTE_IDENT
188 if self.headers.typeheader is None:
189 env['CONTENT_TYPE'] = self.headers.type
190 else:
191 env['CONTENT_TYPE'] = self.headers.typeheader
192 length = self.headers.getheader('content-length')
193 if length:
194 env['CONTENT_LENGTH'] = length
Collin Winter83b2bf62007-03-09 03:15:56 +0000195 referer = self.headers.getheader('referer')
196 if referer:
197 env['HTTP_REFERER'] = referer
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000198 accept = []
199 for line in self.headers.getallmatchingheaders('accept'):
Eric S. Raymond7e642e82001-02-09 12:10:26 +0000200 if line[:1] in "\t\n\r ":
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000201 accept.append(line.strip())
Guido van Rossum01fc65d1998-05-13 20:13:24 +0000202 else:
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000203 accept = accept + line[7:].split(',')
204 env['HTTP_ACCEPT'] = ','.join(accept)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000205 ua = self.headers.getheader('user-agent')
206 if ua:
207 env['HTTP_USER_AGENT'] = ua
208 co = filter(None, self.headers.getheaders('cookie'))
209 if co:
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000210 env['HTTP_COOKIE'] = ', '.join(co)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000211 # XXX Other HTTP_* headers
Guido van Rossum70ec0b42004-03-20 22:18:03 +0000212 # Since we're setting the env in the parent, provide empty
213 # values to override previously set values
214 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
Collin Winter83b2bf62007-03-09 03:15:56 +0000215 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
Guido van Rossum70ec0b42004-03-20 22:18:03 +0000216 env.setdefault(k, "")
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000217
218 self.send_response(200, "Script output follows")
219
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000220 decoded_query = query.replace('+', ' ')
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000221
222 if self.have_fork:
223 # Unix -- fork as we should
224 args = [script]
225 if '=' not in decoded_query:
226 args.append(decoded_query)
227 nobody = nobody_uid()
228 self.wfile.flush() # Always flush before forking
229 pid = os.fork()
230 if pid != 0:
231 # Parent
232 pid, sts = os.waitpid(pid, 0)
Steve Holden8a978f72003-01-08 18:53:18 +0000233 # throw away additional data [see bug #427345]
234 while select.select([self.rfile], [], [], 0)[0]:
Raymond Hettingere2f18372003-06-29 05:06:56 +0000235 if not self.rfile.read(1):
236 break
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000237 if sts:
238 self.log_error("CGI script exit status %#x", sts)
239 return
240 # Child
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000241 try:
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000242 try:
243 os.setuid(nobody)
244 except os.error:
245 pass
246 os.dup2(self.rfile.fileno(), 0)
247 os.dup2(self.wfile.fileno(), 1)
Senthil Kumarana9bd0cc2010-10-03 18:16:52 +0000248 os.execve(scriptfile, args, env)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000249 except:
250 self.server.handle_error(self.request, self.client_address)
251 os._exit(127)
252
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000253 else:
254 # Non Unix - use subprocess
255 import subprocess
256 cmdline = [scriptfile]
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000257 if self.is_python(scriptfile):
258 interp = sys.executable
259 if interp.lower().endswith("w.exe"):
Guido van Rossum0afde132001-10-26 03:38:46 +0000260 # On Windows, use python.exe, not pythonw.exe
261 interp = interp[:-5] + interp[-4:]
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000262 cmdline = [interp, '-u'] + cmdline
263 if '=' not in query:
264 cmdline.append(query)
265
266 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000267 try:
268 nbytes = int(length)
Guido van Rossumb3903152002-10-17 16:21:35 +0000269 except (TypeError, ValueError):
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000270 nbytes = 0
Senthil Kumaran5dff3542009-11-11 17:22:35 +0000271 p = subprocess.Popen(cmdline,
272 stdin = subprocess.PIPE,
273 stdout = subprocess.PIPE,
Senthil Kumarana9bd0cc2010-10-03 18:16:52 +0000274 stderr = subprocess.PIPE,
275 env = env
Senthil Kumaran5dff3542009-11-11 17:22:35 +0000276 )
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000277 if self.command.lower() == "post" and nbytes > 0:
278 data = self.rfile.read(nbytes)
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000279 else:
280 data = None
Steve Holden8a978f72003-01-08 18:53:18 +0000281 # throw away additional data [see bug #427345]
282 while select.select([self.rfile._sock], [], [], 0)[0]:
Raymond Hettingere2f18372003-06-29 05:06:56 +0000283 if not self.rfile._sock.recv(1):
284 break
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000285 stdout, stderr = p.communicate(data)
286 self.wfile.write(stdout)
287 if stderr:
288 self.log_error('%s', stderr)
Brian Curtin3606f952010-11-05 15:12:47 +0000289 p.stderr.close()
290 p.stdout.close()
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000291 status = p.returncode
292 if status:
293 self.log_error("CGI script exit status %#x", status)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000294 else:
Guido van Rossumbcbdc952001-10-17 06:45:56 +0000295 self.log_message("CGI script exited OK")
Guido van Rossume7e578f1995-08-04 04:00:20 +0000296
297
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800298def _url_collapse_path(path):
Gregory P. Smith923ba362009-04-06 06:33:26 +0000299 """
300 Given a URL path, remove extra '/'s and '.' path elements and collapse
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800301 any '..' references and returns a colllapsed path.
Gregory P. Smith923ba362009-04-06 06:33:26 +0000302
303 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800304 The utility of this function is limited to is_cgi method and helps
305 preventing some security attacks.
Gregory P. Smith923ba362009-04-06 06:33:26 +0000306
Martin Panter74c76c82015-10-03 05:55:46 +0000307 Returns: The reconstituted URL, which will always start with a '/'.
Gregory P. Smith923ba362009-04-06 06:33:26 +0000308
309 Raises: IndexError if too many '..' occur within the path.
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800310
Gregory P. Smith923ba362009-04-06 06:33:26 +0000311 """
Martin Panter74c76c82015-10-03 05:55:46 +0000312 # Query component should not be involved.
313 path, _, query = path.partition('?')
314 path = urllib.unquote(path)
315
Gregory P. Smith923ba362009-04-06 06:33:26 +0000316 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
317 # path semantics rather than local operating system semantics.
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800318 path_parts = path.split('/')
319 head_parts = []
320 for part in path_parts[:-1]:
321 if part == '..':
322 head_parts.pop() # IndexError if more '..' than prior parts
323 elif part and part != '.':
324 head_parts.append( part )
Gregory P. Smith923ba362009-04-06 06:33:26 +0000325 if path_parts:
Senthil Kumaranfb2e8742012-04-11 03:07:57 +0800326 tail_part = path_parts.pop()
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800327 if tail_part:
328 if tail_part == '..':
329 head_parts.pop()
330 tail_part = ''
331 elif tail_part == '.':
332 tail_part = ''
Gregory P. Smith923ba362009-04-06 06:33:26 +0000333 else:
334 tail_part = ''
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800335
Martin Panter74c76c82015-10-03 05:55:46 +0000336 if query:
337 tail_part = '?'.join((tail_part, query))
338
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800339 splitpath = ('/' + '/'.join(head_parts), tail_part)
340 collapsed_path = "/".join(splitpath)
341
342 return collapsed_path
Gregory P. Smith923ba362009-04-06 06:33:26 +0000343
344
Guido van Rossume7e578f1995-08-04 04:00:20 +0000345nobody = None
346
347def nobody_uid():
348 """Internal routine to get nobody's uid"""
349 global nobody
350 if nobody:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000351 return nobody
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000352 try:
353 import pwd
354 except ImportError:
355 return -1
Guido van Rossume7e578f1995-08-04 04:00:20 +0000356 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000357 nobody = pwd.getpwnam('nobody')[2]
Guido van Rossum630b8111999-04-28 12:21:47 +0000358 except KeyError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000359 nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000360 return nobody
361
362
363def executable(path):
364 """Test for executable file."""
365 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000366 st = os.stat(path)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000367 except os.error:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000368 return False
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000369 return st.st_mode & 0111 != 0
Guido van Rossume7e578f1995-08-04 04:00:20 +0000370
371
372def test(HandlerClass = CGIHTTPRequestHandler,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000373 ServerClass = BaseHTTPServer.HTTPServer):
Guido van Rossume7e578f1995-08-04 04:00:20 +0000374 SimpleHTTPServer.test(HandlerClass, ServerClass)
375
376
377if __name__ == '__main__':
378 test()