blob: 47a994cab1fe90850dcecf79fd4a2ccbe46ba1f2 [file] [log] [blame]
Guido van Rossume7e578f1995-08-04 04:00:20 +00001"""CGI-savvy HTTP Server.
2
3This module builds on SimpleHTTPServer by implementing GET and POST
4requests to cgi-bin scripts.
5
Guido van Rossume7d6b0a2000-09-19 04:01:01 +00006If the os.fork() function is not present (e.g. on Windows),
7os.popen2() is used as a fallback, with slightly altered semantics; if
8that function is not present either (e.g. on Macintosh), only Python
9scripts are supported, and they are executed by the current process.
10
11In all cases, the implementation is intentionally naive -- all
12requests are executed sychronously.
13
14SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
15-- it may execute arbitrary Python code or external programs.
Fred Drake40e84db1999-10-16 02:07:50 +000016
Jeremy Hylton6414cd82004-12-22 14:19:09 +000017Note that status code 200 is sent prior to execution of a CGI script, so
18scripts cannot send other status codes such as 302 (redirect).
Guido van Rossume7e578f1995-08-04 04:00:20 +000019"""
20
21
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000022__version__ = "0.4"
Guido van Rossume7e578f1995-08-04 04:00:20 +000023
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000024__all__ = ["CGIHTTPRequestHandler"]
Guido van Rossume7e578f1995-08-04 04:00:20 +000025
26import os
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000027import sys
Guido van Rossume7e578f1995-08-04 04:00:20 +000028import urllib
29import BaseHTTPServer
30import SimpleHTTPServer
Steve Holden8a978f72003-01-08 18:53:18 +000031import select
Senthil Kumarana9bd0cc2010-10-03 18:16:52 +000032import copy
Guido van Rossume7e578f1995-08-04 04:00:20 +000033
34
35class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
36
37 """Complete HTTP server with GET, HEAD and POST commands.
38
39 GET and HEAD also support running CGI scripts.
40
41 The POST command is *only* implemented for CGI scripts.
42
43 """
44
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000045 # Determine platform specifics
46 have_fork = hasattr(os, 'fork')
47 have_popen2 = hasattr(os, 'popen2')
Guido van Rossum8cb65402002-02-01 16:27:59 +000048 have_popen3 = hasattr(os, 'popen3')
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000049
Guido van Rossum6aefd912000-09-01 03:27:34 +000050 # Make rfile unbuffered -- we need to read one line and then pass
51 # the rest to a subprocess, so we can't use buffered input.
52 rbufsize = 0
53
Guido van Rossume7e578f1995-08-04 04:00:20 +000054 def do_POST(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000055 """Serve a POST request.
Guido van Rossume7e578f1995-08-04 04:00:20 +000056
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000057 This is only implemented for CGI scripts.
Guido van Rossume7e578f1995-08-04 04:00:20 +000058
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000059 """
Guido van Rossume7e578f1995-08-04 04:00:20 +000060
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000061 if self.is_cgi():
62 self.run_cgi()
63 else:
64 self.send_error(501, "Can only POST to CGI scripts")
Guido van Rossume7e578f1995-08-04 04:00:20 +000065
66 def send_head(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000067 """Version of send_head that support CGI scripts"""
68 if self.is_cgi():
69 return self.run_cgi()
70 else:
71 return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
Guido van Rossume7e578f1995-08-04 04:00:20 +000072
73 def is_cgi(self):
Gregory P. Smith923ba362009-04-06 06:33:26 +000074 """Test whether self.path corresponds to a CGI script.
Guido van Rossume7e578f1995-08-04 04:00:20 +000075
Gregory P. Smith923ba362009-04-06 06:33:26 +000076 Returns True and updates the cgi_info attribute to the tuple
77 (dir, rest) if self.path requires running a CGI script.
78 Returns False otherwise.
Guido van Rossume7e578f1995-08-04 04:00:20 +000079
Gregory P. Smith4bd76642009-05-03 20:27:25 +000080 If any exception is raised, the caller should assume that
81 self.path was rejected as invalid and act accordingly.
82
Gregory P. Smith923ba362009-04-06 06:33:26 +000083 The default implementation tests whether the normalized url
84 path begins with one of the strings in self.cgi_directories
85 (and the next character is a '/' or the end of the string).
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000086 """
Senthil Kumaran5f7e7342012-04-12 02:23:23 +080087 collapsed_path = _url_collapse_path(self.path)
88 dir_sep = collapsed_path.find('/', 1)
89 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
Senthil Kumaranfb2e8742012-04-11 03:07:57 +080090 if head in self.cgi_directories:
91 self.cgi_info = head, tail
Gregory P. Smith923ba362009-04-06 06:33:26 +000092 return True
Tim Petersbc0e9102002-04-04 22:55:58 +000093 return False
Guido van Rossume7e578f1995-08-04 04:00:20 +000094
95 cgi_directories = ['/cgi-bin', '/htbin']
96
Guido van Rossume7d6b0a2000-09-19 04:01:01 +000097 def is_executable(self, path):
98 """Test whether argument path is an executable file."""
99 return executable(path)
100
101 def is_python(self, path):
102 """Test whether argument path is a Python script."""
103 head, tail = os.path.splitext(path)
104 return tail.lower() in (".py", ".pyw")
105
Guido van Rossume7e578f1995-08-04 04:00:20 +0000106 def run_cgi(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000107 """Execute a CGI script."""
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000108 path = self.path
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000109 dir, rest = self.cgi_info
Tim Petersf733abb2007-01-30 03:03:46 +0000110
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000111 i = path.find('/', len(dir) + 1)
112 while i >= 0:
113 nextdir = path[:i]
114 nextrest = path[i+1:]
115
116 scriptdir = self.translate_path(nextdir)
117 if os.path.isdir(scriptdir):
118 dir, rest = nextdir, nextrest
119 i = path.find('/', len(dir) + 1)
120 else:
121 break
122
123 # find an explicit query string, if present.
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000124 i = rest.rfind('?')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000125 if i >= 0:
126 rest, query = rest[:i], rest[i+1:]
127 else:
128 query = ''
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000129
130 # dissect the part after the directory name into a script name &
131 # a possible additional path, to be stored in PATH_INFO.
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000132 i = rest.find('/')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000133 if i >= 0:
134 script, rest = rest[:i], rest[i:]
135 else:
136 script, rest = rest, ''
Andrew M. Kuchlingb29069d2006-12-22 13:25:02 +0000137
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000138 scriptname = dir + '/' + script
139 scriptfile = self.translate_path(scriptname)
140 if not os.path.exists(scriptfile):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000141 self.send_error(404, "No such CGI script (%r)" % scriptname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000142 return
143 if not os.path.isfile(scriptfile):
Tim Peters27f49612004-03-20 21:51:12 +0000144 self.send_error(403, "CGI script is not a plain file (%r)" %
Walter Dörwald70a6b492004-02-12 17:35:32 +0000145 scriptname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000146 return
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000147 ispy = self.is_python(scriptname)
148 if not ispy:
Guido van Rossum8cb65402002-02-01 16:27:59 +0000149 if not (self.have_fork or self.have_popen2 or self.have_popen3):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000150 self.send_error(403, "CGI script is not a Python script (%r)" %
151 scriptname)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000152 return
153 if not self.is_executable(scriptfile):
Walter Dörwald70a6b492004-02-12 17:35:32 +0000154 self.send_error(403, "CGI script is not executable (%r)" %
155 scriptname)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000156 return
157
158 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
159 # XXX Much of the following could be prepared ahead of time!
Senthil Kumarana9bd0cc2010-10-03 18:16:52 +0000160 env = copy.deepcopy(os.environ)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000161 env['SERVER_SOFTWARE'] = self.version_string()
162 env['SERVER_NAME'] = self.server.server_name
163 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
164 env['SERVER_PROTOCOL'] = self.protocol_version
165 env['SERVER_PORT'] = str(self.server.server_port)
166 env['REQUEST_METHOD'] = self.command
167 uqrest = urllib.unquote(rest)
168 env['PATH_INFO'] = uqrest
169 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
170 env['SCRIPT_NAME'] = scriptname
171 if query:
172 env['QUERY_STRING'] = query
173 host = self.address_string()
174 if host != self.client_address[0]:
175 env['REMOTE_HOST'] = host
176 env['REMOTE_ADDR'] = self.client_address[0]
Martin v. Löwisa28b3e62004-08-29 16:53:26 +0000177 authorization = self.headers.getheader("authorization")
178 if authorization:
179 authorization = authorization.split()
180 if len(authorization) == 2:
181 import base64, binascii
182 env['AUTH_TYPE'] = authorization[0]
183 if authorization[0].lower() == "basic":
184 try:
185 authorization = base64.decodestring(authorization[1])
186 except binascii.Error:
187 pass
188 else:
189 authorization = authorization.split(':')
190 if len(authorization) == 2:
191 env['REMOTE_USER'] = authorization[0]
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000192 # XXX REMOTE_IDENT
193 if self.headers.typeheader is None:
194 env['CONTENT_TYPE'] = self.headers.type
195 else:
196 env['CONTENT_TYPE'] = self.headers.typeheader
197 length = self.headers.getheader('content-length')
198 if length:
199 env['CONTENT_LENGTH'] = length
Collin Winter83b2bf62007-03-09 03:15:56 +0000200 referer = self.headers.getheader('referer')
201 if referer:
202 env['HTTP_REFERER'] = referer
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000203 accept = []
204 for line in self.headers.getallmatchingheaders('accept'):
Eric S. Raymond7e642e82001-02-09 12:10:26 +0000205 if line[:1] in "\t\n\r ":
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000206 accept.append(line.strip())
Guido van Rossum01fc65d1998-05-13 20:13:24 +0000207 else:
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000208 accept = accept + line[7:].split(',')
209 env['HTTP_ACCEPT'] = ','.join(accept)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000210 ua = self.headers.getheader('user-agent')
211 if ua:
212 env['HTTP_USER_AGENT'] = ua
213 co = filter(None, self.headers.getheaders('cookie'))
214 if co:
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000215 env['HTTP_COOKIE'] = ', '.join(co)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000216 # XXX Other HTTP_* headers
Guido van Rossum70ec0b42004-03-20 22:18:03 +0000217 # Since we're setting the env in the parent, provide empty
218 # values to override previously set values
219 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
Collin Winter83b2bf62007-03-09 03:15:56 +0000220 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
Guido van Rossum70ec0b42004-03-20 22:18:03 +0000221 env.setdefault(k, "")
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000222
223 self.send_response(200, "Script output follows")
224
Eric S. Raymond6b71e742001-02-09 08:56:30 +0000225 decoded_query = query.replace('+', ' ')
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000226
227 if self.have_fork:
228 # Unix -- fork as we should
229 args = [script]
230 if '=' not in decoded_query:
231 args.append(decoded_query)
232 nobody = nobody_uid()
233 self.wfile.flush() # Always flush before forking
234 pid = os.fork()
235 if pid != 0:
236 # Parent
237 pid, sts = os.waitpid(pid, 0)
Steve Holden8a978f72003-01-08 18:53:18 +0000238 # throw away additional data [see bug #427345]
239 while select.select([self.rfile], [], [], 0)[0]:
Raymond Hettingere2f18372003-06-29 05:06:56 +0000240 if not self.rfile.read(1):
241 break
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000242 if sts:
243 self.log_error("CGI script exit status %#x", sts)
244 return
245 # Child
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000246 try:
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000247 try:
248 os.setuid(nobody)
249 except os.error:
250 pass
251 os.dup2(self.rfile.fileno(), 0)
252 os.dup2(self.wfile.fileno(), 1)
Senthil Kumarana9bd0cc2010-10-03 18:16:52 +0000253 os.execve(scriptfile, args, env)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000254 except:
255 self.server.handle_error(self.request, self.client_address)
256 os._exit(127)
257
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000258 else:
259 # Non Unix - use subprocess
260 import subprocess
261 cmdline = [scriptfile]
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000262 if self.is_python(scriptfile):
263 interp = sys.executable
264 if interp.lower().endswith("w.exe"):
Guido van Rossum0afde132001-10-26 03:38:46 +0000265 # On Windows, use python.exe, not pythonw.exe
266 interp = interp[:-5] + interp[-4:]
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000267 cmdline = [interp, '-u'] + cmdline
268 if '=' not in query:
269 cmdline.append(query)
270
271 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000272 try:
273 nbytes = int(length)
Guido van Rossumb3903152002-10-17 16:21:35 +0000274 except (TypeError, ValueError):
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000275 nbytes = 0
Senthil Kumaran5dff3542009-11-11 17:22:35 +0000276 p = subprocess.Popen(cmdline,
277 stdin = subprocess.PIPE,
278 stdout = subprocess.PIPE,
Senthil Kumarana9bd0cc2010-10-03 18:16:52 +0000279 stderr = subprocess.PIPE,
280 env = env
Senthil Kumaran5dff3542009-11-11 17:22:35 +0000281 )
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000282 if self.command.lower() == "post" and nbytes > 0:
283 data = self.rfile.read(nbytes)
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000284 else:
285 data = None
Steve Holden8a978f72003-01-08 18:53:18 +0000286 # throw away additional data [see bug #427345]
287 while select.select([self.rfile._sock], [], [], 0)[0]:
Raymond Hettingere2f18372003-06-29 05:06:56 +0000288 if not self.rfile._sock.recv(1):
289 break
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000290 stdout, stderr = p.communicate(data)
291 self.wfile.write(stdout)
292 if stderr:
293 self.log_error('%s', stderr)
Brian Curtin3606f952010-11-05 15:12:47 +0000294 p.stderr.close()
295 p.stdout.close()
Senthil Kumaran3a145a12009-11-11 01:34:44 +0000296 status = p.returncode
297 if status:
298 self.log_error("CGI script exit status %#x", status)
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000299 else:
Guido van Rossumbcbdc952001-10-17 06:45:56 +0000300 self.log_message("CGI script exited OK")
Guido van Rossume7e578f1995-08-04 04:00:20 +0000301
302
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800303def _url_collapse_path(path):
Gregory P. Smith923ba362009-04-06 06:33:26 +0000304 """
305 Given a URL path, remove extra '/'s and '.' path elements and collapse
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800306 any '..' references and returns a colllapsed path.
Gregory P. Smith923ba362009-04-06 06:33:26 +0000307
308 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800309 The utility of this function is limited to is_cgi method and helps
310 preventing some security attacks.
Gregory P. Smith923ba362009-04-06 06:33:26 +0000311
312 Returns: A tuple of (head, tail) where tail is everything after the final /
313 and head is everything before it. Head will always start with a '/' and,
314 if it contains anything else, never have a trailing '/'.
315
316 Raises: IndexError if too many '..' occur within the path.
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800317
Gregory P. Smith923ba362009-04-06 06:33:26 +0000318 """
319 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
320 # path semantics rather than local operating system semantics.
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800321 path_parts = path.split('/')
322 head_parts = []
323 for part in path_parts[:-1]:
324 if part == '..':
325 head_parts.pop() # IndexError if more '..' than prior parts
326 elif part and part != '.':
327 head_parts.append( part )
Gregory P. Smith923ba362009-04-06 06:33:26 +0000328 if path_parts:
Senthil Kumaranfb2e8742012-04-11 03:07:57 +0800329 tail_part = path_parts.pop()
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800330 if tail_part:
331 if tail_part == '..':
332 head_parts.pop()
333 tail_part = ''
334 elif tail_part == '.':
335 tail_part = ''
Gregory P. Smith923ba362009-04-06 06:33:26 +0000336 else:
337 tail_part = ''
Senthil Kumaran5f7e7342012-04-12 02:23:23 +0800338
339 splitpath = ('/' + '/'.join(head_parts), tail_part)
340 collapsed_path = "/".join(splitpath)
341
342 return collapsed_path
Gregory P. Smith923ba362009-04-06 06:33:26 +0000343
344
Guido van Rossume7e578f1995-08-04 04:00:20 +0000345nobody = None
346
347def nobody_uid():
348 """Internal routine to get nobody's uid"""
349 global nobody
350 if nobody:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000351 return nobody
Guido van Rossume7d6b0a2000-09-19 04:01:01 +0000352 try:
353 import pwd
354 except ImportError:
355 return -1
Guido van Rossume7e578f1995-08-04 04:00:20 +0000356 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000357 nobody = pwd.getpwnam('nobody')[2]
Guido van Rossum630b8111999-04-28 12:21:47 +0000358 except KeyError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000359 nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000360 return nobody
361
362
363def executable(path):
364 """Test for executable file."""
365 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000366 st = os.stat(path)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000367 except os.error:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000368 return False
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000369 return st.st_mode & 0111 != 0
Guido van Rossume7e578f1995-08-04 04:00:20 +0000370
371
372def test(HandlerClass = CGIHTTPRequestHandler,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000373 ServerClass = BaseHTTPServer.HTTPServer):
Guido van Rossume7e578f1995-08-04 04:00:20 +0000374 SimpleHTTPServer.test(HandlerClass, ServerClass)
375
376
377if __name__ == '__main__':
378 test()