blob: 8f8ae56c93bedbe6c8f950dbbac0c0db56a22af4 [file] [log] [blame]
Guido van Rossume7e578f1995-08-04 04:00:20 +00001"""CGI-savvy HTTP Server.
2
3This module builds on SimpleHTTPServer by implementing GET and POST
4requests to cgi-bin scripts.
5
Guido van Rossume7d6b0a2000-09-19 04:01:01 +00006If the os.fork() function is not present (e.g. on Windows),
7os.popen2() is used as a fallback, with slightly altered semantics; if
8that function is not present either (e.g. on Macintosh), only Python
9scripts are supported, and they are executed by the current process.
10
11In all cases, the implementation is intentionally naive -- all
12requests are executed sychronously.
13
14SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
15-- it may execute arbitrary Python code or external programs.
Fred Drake40e84db1999-10-16 02:07:50 +000016
Jeremy Hylton6414cd82004-12-22 14:19:09 +000017Note that status code 200 is sent prior to execution of a CGI script, so
18scripts cannot send other status codes such as 302 (redirect).
Guido van Rossume7e578f1995-08-04 04:00:20 +000019"""
20
21
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000022__version__ = "0.4"
Guido van Rossume7e578f1995-08-04 04:00:20 +000023
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000024__all__ = ["CGIHTTPRequestHandler"]
Guido van Rossume7e578f1995-08-04 04:00:20 +000025
26import os
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000027import sys
Guido van Rossume7e578f1995-08-04 04:00:20 +000028import urllib
29import BaseHTTPServer
30import SimpleHTTPServer
Steve Holden8a978f72003-01-08 18:53:18 +000031import select
Senthil Kumarana9bd0cc2010-10-03 18:16:52 +000032import copy
Guido van Rossume7e578f1995-08-04 04:00:20 +000033
34
35class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
36
37 """Complete HTTP server with GET, HEAD and POST commands.
38
39 GET and HEAD also support running CGI scripts.
40
41 The POST command is *only* implemented for CGI scripts.
42
43 """
44
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000045 # Determine platform specifics
46 have_fork = hasattr(os, 'fork')
47 have_popen2 = hasattr(os, 'popen2')
Guido van Rossum8cb65402002-02-01 16:27:59 +000048 have_popen3 = hasattr(os, 'popen3')
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000049
Guido van Rossum6aefd912000-09-01 03:27:34 +000050 # Make rfile unbuffered -- we need to read one line and then pass
51 # the rest to a subprocess, so we can't use buffered input.
52 rbufsize = 0
53
Guido van Rossume7e578f1995-08-04 04:00:20 +000054 def do_POST(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000055 """Serve a POST request.
Guido van Rossume7e578f1995-08-04 04:00:20 +000056
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000057 This is only implemented for CGI scripts.
Guido van Rossume7e578f1995-08-04 04:00:20 +000058
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000059 """
Guido van Rossume7e578f1995-08-04 04:00:20 +000060
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000061 if self.is_cgi():
62 self.run_cgi()
63 else:
64 self.send_error(501, "Can only POST to CGI scripts")
Guido van Rossume7e578f1995-08-04 04:00:20 +000065
66 def send_head(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000067 """Version of send_head that support CGI scripts"""
68 if self.is_cgi():
69 return self.run_cgi()
70 else:
71 return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
Guido van Rossume7e578f1995-08-04 04:00:20 +000072
73 def is_cgi(self):
Gregory P. Smith923ba362009-04-06 06:33:26 +000074 """Test whether self.path corresponds to a CGI script.
Guido van Rossume7e578f1995-08-04 04:00:20 +000075
Gregory P. Smith923ba362009-04-06 06:33:26 +000076 Returns True and updates the cgi_info attribute to the tuple
77 (dir, rest) if self.path requires running a CGI script.
78 Returns False otherwise.
Guido van Rossume7e578f1995-08-04 04:00:20 +000079
Gregory P. Smith4bd76642009-05-03 20:27:25 +000080 If any exception is raised, the caller should assume that
81 self.path was rejected as invalid and act accordingly.
82
Gregory P. Smith923ba362009-04-06 06:33:26 +000083 The default implementation tests whether the normalized url
84 path begins with one of the strings in self.cgi_directories
85 (and the next character is a '/' or the end of the string).
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000086 """
Benjamin Peterson8d24d772014-06-14 18:36:29 -070087 collapsed_path = _url_collapse_path(urllib.unquote(self.path))
Senthil Kumaran5f7e7342012-04-12 02:23:23 +080088 dir_sep = collapsed_path.find('/', 1)
89 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
Senthil Kumaranfb2e8742012-04-11 03:07:57 +080090 if head in self.cgi_directories:
91 self.cgi_info = head, tail
Gregory P. Smith923ba362009-04-06 06:33:26 +000092 return True
Tim Petersbc0e9102002-04-04 22:55:58 +000093 return False
Guido van Rossume7e578f1995-08-04 04:00:20 +000094
95 cgi_directories = ['/cgi-bin', '/htbin']
96
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000097 def is_executable(self, path):
98 """Test whether argument path is an executable file."""
99 return executable(path)
100
101 def is_python(self, path):
102 """Test whether argument path is a Python script."""
103 head, tail = os.path.splitext(path)
104 return tail.lower() in (".py", ".pyw")
105
Guido van Rossume7e578f1995-08-04 04:00:20 +0000106 def run_cgi(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000107 """Execute a CGI script."""
108 dir, rest = self.cgi_info
Ned Deilyc8937622014-07-12 22:01:15 -0700109 path = dir + '/' + rest
110 i = path.find('/', len(dir)+1)
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000111 while i >= 0:
Ned Deilyc8937622014-07-12 22:01:15 -0700112 nextdir = path[:i]
113 nextrest = path[i+1:]
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000114
115 scriptdir = self.translate_path(nextdir)
116 if os.path.isdir(scriptdir):
117 dir, rest = nextdir, nextrest
Ned Deilyc8937622014-07-12 22:01:15 -0700118 i = path.find('/', len(dir)+1)
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000119 else:
120 break
121
122 # find an explicit query string, if present.
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000123 i = rest.rfind('?')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000124 if i >= 0:
125 rest, query = rest[:i], rest[i+1:]
126 else:
127 query = ''
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000128
129 # dissect the part after the directory name into a script name &
130 # a possible additional path, to be stored in PATH_INFO.
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000131 i = rest.find('/')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000132 if i >= 0:
133 script, rest = rest[:i], rest[i:]
134 else:
135 script, rest = rest, ''
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000136
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000137 scriptname = dir + '/' + script
138 scriptfile = self.translate_path(scriptname)
139 if not os.path.exists(scriptfile):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000140 self.send_error(404, "No such CGI script (%r)" % scriptname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000141 return
142 if not os.path.isfile(scriptfile):
Tim Peters27f49612004-03-20 21:51:12 +0000143 self.send_error(403, "CGI script is not a plain file (%r)" %
Walter Dörwald70a6b492004-02-12 17:35:32 +0000144 scriptname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000145 return
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000146 ispy = self.is_python(scriptname)
147 if not ispy:
Guido van Rossum8cb65402002-02-01 16:27:59 +0000148 if not (self.have_fork or self.have_popen2 or self.have_popen3):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000149 self.send_error(403, "CGI script is not a Python script (%r)" %
150 scriptname)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000151 return
152 if not self.is_executable(scriptfile):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000153 self.send_error(403, "CGI script is not executable (%r)" %
154 scriptname)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000155 return
156
157 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
158 # XXX Much of the following could be prepared ahead of time!
Senthil Kumarana9bd0cc2010-10-03 18:16:52 +0000159 env = copy.deepcopy(os.environ)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000160 env['SERVER_SOFTWARE'] = self.version_string()
161 env['SERVER_NAME'] = self.server.server_name
162 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
163 env['SERVER_PROTOCOL'] = self.protocol_version
164 env['SERVER_PORT'] = str(self.server.server_port)
165 env['REQUEST_METHOD'] = self.command
166 uqrest = urllib.unquote(rest)
167 env['PATH_INFO'] = uqrest
168 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
169 env['SCRIPT_NAME'] = scriptname
170 if query:
171 env['QUERY_STRING'] = query
172 host = self.address_string()
173 if host != self.client_address[0]:
174 env['REMOTE_HOST'] = host
175 env['REMOTE_ADDR'] = self.client_address[0]
Martin v. Löwisa28b3e62004-08-29 16:53:26 +0000176 authorization = self.headers.getheader("authorization")
177 if authorization:
178 authorization = authorization.split()
179 if len(authorization) == 2:
180 import base64, binascii
181 env['AUTH_TYPE'] = authorization[0]
182 if authorization[0].lower() == "basic":
183 try:
184 authorization = base64.decodestring(authorization[1])
185 except binascii.Error:
186 pass
187 else:
188 authorization = authorization.split(':')
189 if len(authorization) == 2:
190 env['REMOTE_USER'] = authorization[0]
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000191 # XXX REMOTE_IDENT
192 if self.headers.typeheader is None:
193 env['CONTENT_TYPE'] = self.headers.type
194 else:
195 env['CONTENT_TYPE'] = self.headers.typeheader
196 length = self.headers.getheader('content-length')
197 if length:
198 env['CONTENT_LENGTH'] = length
Collin Winter83b2bf62007-03-09 03:15:56 +0000199 referer = self.headers.getheader('referer')
200 if referer:
201 env['HTTP_REFERER'] = referer
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000202 accept = []
203 for line in self.headers.getallmatchingheaders('accept'):
Eric S. Raymond7e642e82001-02-09 12:10:26 +0000204 if line[:1] in "\t\n\r ":
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000205 accept.append(line.strip())
Guido van Rossum01fc65d1998-05-13 20:13:24 +0000206 else:
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000207 accept = accept + line[7:].split(',')
208 env['HTTP_ACCEPT'] = ','.join(accept)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000209 ua = self.headers.getheader('user-agent')
210 if ua:
211 env['HTTP_USER_AGENT'] = ua
212 co = filter(None, self.headers.getheaders('cookie'))
213 if co:
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000214 env['HTTP_COOKIE'] = ', '.join(co)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000215 # XXX Other HTTP_* headers
Guido van Rossum70ec0b42004-03-20 22:18:03 +0000216 # Since we're setting the env in the parent, provide empty
217 # values to override previously set values
218 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
Collin Winter83b2bf62007-03-09 03:15:56 +0000219 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
Guido van Rossum70ec0b42004-03-20 22:18:03 +0000220 env.setdefault(k, "")
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000221
222 self.send_response(200, "Script output follows")
223
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000224 decoded_query = query.replace('+', ' ')
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000225
226 if self.have_fork:
227 # Unix -- fork as we should
228 args = [script]
229 if '=' not in decoded_query:
230 args.append(decoded_query)
231 nobody = nobody_uid()
232 self.wfile.flush() # Always flush before forking
233 pid = os.fork()
234 if pid != 0:
235 # Parent
236 pid, sts = os.waitpid(pid, 0)
Steve Holden8a978f72003-01-08 18:53:18 +0000237 # throw away additional data [see bug #427345]
238 while select.select([self.rfile], [], [], 0)[0]:
Raymond Hettingere2f18372003-06-29 05:06:56 +0000239 if not self.rfile.read(1):
240 break
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000241 if sts:
242 self.log_error("CGI script exit status %#x", sts)
243 return
244 # Child
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000245 try:
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000246 try:
247 os.setuid(nobody)
248 except os.error:
249 pass
250 os.dup2(self.rfile.fileno(), 0)
251 os.dup2(self.wfile.fileno(), 1)
Senthil Kumarana9bd0cc2010-10-03 18:16:52 +0000252 os.execve(scriptfile, args, env)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000253 except:
254 self.server.handle_error(self.request, self.client_address)
255 os._exit(127)
256
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000257 else:
258 # Non Unix - use subprocess
259 import subprocess
260 cmdline = [scriptfile]
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000261 if self.is_python(scriptfile):
262 interp = sys.executable
263 if interp.lower().endswith("w.exe"):
Guido van Rossum0afde132001-10-26 03:38:46 +0000264 # On Windows, use python.exe, not pythonw.exe
265 interp = interp[:-5] + interp[-4:]
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000266 cmdline = [interp, '-u'] + cmdline
267 if '=' not in query:
268 cmdline.append(query)
269
270 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000271 try:
272 nbytes = int(length)
Guido van Rossumb3903152002-10-17 16:21:35 +0000273 except (TypeError, ValueError):
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000274 nbytes = 0
Senthil Kumaran5dff3542009-11-11 17:22:35 +0000275 p = subprocess.Popen(cmdline,
276 stdin = subprocess.PIPE,
277 stdout = subprocess.PIPE,
Senthil Kumarana9bd0cc2010-10-03 18:16:52 +0000278 stderr = subprocess.PIPE,
279 env = env
Senthil Kumaran5dff3542009-11-11 17:22:35 +0000280 )
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000281 if self.command.lower() == "post" and nbytes > 0:
282 data = self.rfile.read(nbytes)
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000283 else:
284 data = None
Steve Holden8a978f72003-01-08 18:53:18 +0000285 # throw away additional data [see bug #427345]
286 while select.select([self.rfile._sock], [], [], 0)[0]:
Raymond Hettingere2f18372003-06-29 05:06:56 +0000287 if not self.rfile._sock.recv(1):
288 break
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000289 stdout, stderr = p.communicate(data)
290 self.wfile.write(stdout)
291 if stderr:
292 self.log_error('%s', stderr)
Brian Curtin3606f952010-11-05 15:12:47 +0000293 p.stderr.close()
294 p.stdout.close()
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000295 status = p.returncode
296 if status:
297 self.log_error("CGI script exit status %#x", status)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000298 else:
Guido van Rossumbcbdc952001-10-17 06:45:56 +0000299 self.log_message("CGI script exited OK")
Guido van Rossume7e578f1995-08-04 04:00:20 +0000300
301
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800302def _url_collapse_path(path):
Gregory P. Smith923ba362009-04-06 06:33:26 +0000303 """
304 Given a URL path, remove extra '/'s and '.' path elements and collapse
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800305 any '..' references and returns a colllapsed path.
Gregory P. Smith923ba362009-04-06 06:33:26 +0000306
307 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800308 The utility of this function is limited to is_cgi method and helps
309 preventing some security attacks.
Gregory P. Smith923ba362009-04-06 06:33:26 +0000310
311 Returns: A tuple of (head, tail) where tail is everything after the final /
312 and head is everything before it. Head will always start with a '/' and,
313 if it contains anything else, never have a trailing '/'.
314
315 Raises: IndexError if too many '..' occur within the path.
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800316
Gregory P. Smith923ba362009-04-06 06:33:26 +0000317 """
318 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
319 # path semantics rather than local operating system semantics.
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800320 path_parts = path.split('/')
321 head_parts = []
322 for part in path_parts[:-1]:
323 if part == '..':
324 head_parts.pop() # IndexError if more '..' than prior parts
325 elif part and part != '.':
326 head_parts.append( part )
Gregory P. Smith923ba362009-04-06 06:33:26 +0000327 if path_parts:
Senthil Kumaranfb2e8742012-04-11 03:07:57 +0800328 tail_part = path_parts.pop()
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800329 if tail_part:
330 if tail_part == '..':
331 head_parts.pop()
332 tail_part = ''
333 elif tail_part == '.':
334 tail_part = ''
Gregory P. Smith923ba362009-04-06 06:33:26 +0000335 else:
336 tail_part = ''
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800337
338 splitpath = ('/' + '/'.join(head_parts), tail_part)
339 collapsed_path = "/".join(splitpath)
340
341 return collapsed_path
Gregory P. Smith923ba362009-04-06 06:33:26 +0000342
343
Guido van Rossume7e578f1995-08-04 04:00:20 +0000344nobody = None
345
346def nobody_uid():
347 """Internal routine to get nobody's uid"""
348 global nobody
349 if nobody:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000350 return nobody
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000351 try:
352 import pwd
353 except ImportError:
354 return -1
Guido van Rossume7e578f1995-08-04 04:00:20 +0000355 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000356 nobody = pwd.getpwnam('nobody')[2]
Guido van Rossum630b8111999-04-28 12:21:47 +0000357 except KeyError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000358 nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000359 return nobody
360
361
362def executable(path):
363 """Test for executable file."""
364 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000365 st = os.stat(path)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000366 except os.error:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000367 return False
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000368 return st.st_mode & 0111 != 0
Guido van Rossume7e578f1995-08-04 04:00:20 +0000369
370
371def test(HandlerClass = CGIHTTPRequestHandler,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000372 ServerClass = BaseHTTPServer.HTTPServer):
Guido van Rossume7e578f1995-08-04 04:00:20 +0000373 SimpleHTTPServer.test(HandlerClass, ServerClass)
374
375
376if __name__ == '__main__':
377 test()