blob: 4f41a19aa5c8fee4b0bbe2cfa01a6df01e0724be [file] [log] [blame]
Georg Brandl24420152008-05-26 16:32:26 +00001"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
16os.popen2() is used as a fallback, with slightly altered semantics; if
17that function is not present either (e.g. on Macintosh), only Python
18scripts are supported, and they are executed by the current process.
19
20In all cases, the implementation is intentionally naive -- all
21requests are executed synchronously.
22
23SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
24-- it may execute arbitrary Python code or external programs.
25
26Note that status code 200 is sent prior to execution of a CGI script, so
27scripts cannot send other status codes such as 302 (redirect).
28
29XXX To do:
30
31- log requests even later (to capture byte count)
32- log user-agent header and other interesting goodies
33- send error log to separate file
34"""
35
36
37# See also:
38#
39# HTTP Working Group T. Berners-Lee
40# INTERNET-DRAFT R. T. Fielding
41# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
42# Expires September 8, 1995 March 8, 1995
43#
44# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
45#
46# and
47#
48# Network Working Group R. Fielding
49# Request for Comments: 2616 et al
50# Obsoletes: 2068 June 1999
51# Category: Standards Track
52#
53# URL: http://www.faqs.org/rfcs/rfc2616.html
54
55# Log files
56# ---------
57#
58# Here's a quote from the NCSA httpd docs about log file format.
59#
60# | The logfile format is as follows. Each line consists of:
61# |
62# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
63# |
64# | host: Either the DNS name or the IP number of the remote client
65# | rfc931: Any information returned by identd for this person,
66# | - otherwise.
67# | authuser: If user sent a userid for authentication, the user name,
68# | - otherwise.
69# | DD: Day
70# | Mon: Month (calendar name)
71# | YYYY: Year
72# | hh: hour (24-hour format, the machine's timezone)
73# | mm: minutes
74# | ss: seconds
75# | request: The first line of the HTTP request as sent by the client.
76# | ddd: the status code returned by the server, - if not available.
77# | bbbb: the total number of bytes sent,
78# | *not including the HTTP/1.0 header*, - if not available
79# |
80# | You can determine the name of the file accessed through request.
81#
82# (Actually, the latter is only true if you know the server configuration
83# at the time the request was made!)
84
85__version__ = "0.6"
86
87__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
88
89import io
90import os
91import sys
92import cgi
93import time
94import socket # For gethostbyaddr()
95import shutil
96import urllib
97import select
98import mimetools
99import mimetypes
100import posixpath
101import socketserver
102
103# Default error message template
104DEFAULT_ERROR_MESSAGE = """\
105<head>
106<title>Error response</title>
107</head>
108<body>
109<h1>Error response</h1>
110<p>Error code %(code)d.
111<p>Message: %(message)s.
112<p>Error code explanation: %(code)s = %(explain)s.
113</body>
114"""
115
116DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
117
118def _quote_html(html):
119 return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
120
121class HTTPServer(socketserver.TCPServer):
122
123 allow_reuse_address = 1 # Seems to make sense in testing environment
124
125 def server_bind(self):
126 """Override server_bind to store the server name."""
127 socketserver.TCPServer.server_bind(self)
128 host, port = self.socket.getsockname()[:2]
129 self.server_name = socket.getfqdn(host)
130 self.server_port = port
131
132
133class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
134
135 """HTTP request handler base class.
136
137 The following explanation of HTTP serves to guide you through the
138 code as well as to expose any misunderstandings I may have about
139 HTTP (so you don't need to read the code to figure out I'm wrong
140 :-).
141
142 HTTP (HyperText Transfer Protocol) is an extensible protocol on
143 top of a reliable stream transport (e.g. TCP/IP). The protocol
144 recognizes three parts to a request:
145
146 1. One line identifying the request type and path
147 2. An optional set of RFC-822-style headers
148 3. An optional data part
149
150 The headers and data are separated by a blank line.
151
152 The first line of the request has the form
153
154 <command> <path> <version>
155
156 where <command> is a (case-sensitive) keyword such as GET or POST,
157 <path> is a string containing path information for the request,
158 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
159 <path> is encoded using the URL encoding scheme (using %xx to signify
160 the ASCII character with hex code xx).
161
162 The specification specifies that lines are separated by CRLF but
163 for compatibility with the widest range of clients recommends
164 servers also handle LF. Similarly, whitespace in the request line
165 is treated sensibly (allowing multiple spaces between components
166 and allowing trailing whitespace).
167
168 Similarly, for output, lines ought to be separated by CRLF pairs
169 but most clients grok LF characters just fine.
170
171 If the first line of the request has the form
172
173 <command> <path>
174
175 (i.e. <version> is left out) then this is assumed to be an HTTP
176 0.9 request; this form has no optional headers and data part and
177 the reply consists of just the data.
178
179 The reply form of the HTTP 1.x protocol again has three parts:
180
181 1. One line giving the response code
182 2. An optional set of RFC-822-style headers
183 3. The data
184
185 Again, the headers and data are separated by a blank line.
186
187 The response code line has the form
188
189 <version> <responsecode> <responsestring>
190
191 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
192 <responsecode> is a 3-digit response code indicating success or
193 failure of the request, and <responsestring> is an optional
194 human-readable string explaining what the response code means.
195
196 This server parses the request and the headers, and then calls a
197 function specific to the request type (<command>). Specifically,
198 a request SPAM will be handled by a method do_SPAM(). If no
199 such method exists the server sends an error response to the
200 client. If it exists, it is called with no arguments:
201
202 do_SPAM()
203
204 Note that the request name is case sensitive (i.e. SPAM and spam
205 are different requests).
206
207 The various request details are stored in instance variables:
208
209 - client_address is the client IP address in the form (host,
210 port);
211
212 - command, path and version are the broken-down request line;
213
214 - headers is an instance of mimetools.Message (or a derived
215 class) containing the header information;
216
217 - rfile is a file object open for reading positioned at the
218 start of the optional input data part;
219
220 - wfile is a file object open for writing.
221
222 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
223
224 The first thing to be written must be the response line. Then
225 follow 0 or more header lines, then a blank line, and then the
226 actual data (if any). The meaning of the header lines depends on
227 the command executed by the server; in most cases, when data is
228 returned, there should be at least one header line of the form
229
230 Content-type: <type>/<subtype>
231
232 where <type> and <subtype> should be registered MIME types,
233 e.g. "text/html" or "text/plain".
234
235 """
236
237 # The Python system version, truncated to its first component.
238 sys_version = "Python/" + sys.version.split()[0]
239
240 # The server software version. You may want to override this.
241 # The format is multiple whitespace-separated strings,
242 # where each string is of the form name[/version].
243 server_version = "BaseHTTP/" + __version__
244
245 error_message_format = DEFAULT_ERROR_MESSAGE
246 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
247
248 # The default request version. This only affects responses up until
249 # the point where the request line is parsed, so it mainly decides what
250 # the client gets back when sending a malformed request line.
251 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
252 default_request_version = "HTTP/0.9"
253
254 def parse_request(self):
255 """Parse a request (internal).
256
257 The request should be stored in self.raw_requestline; the results
258 are in self.command, self.path, self.request_version and
259 self.headers.
260
261 Return True for success, False for failure; on failure, an
262 error is sent back.
263
264 """
265 self.command = None # set in case of error on the first line
266 self.request_version = version = self.default_request_version
267 self.close_connection = 1
268 requestline = str(self.raw_requestline, 'iso-8859-1')
269 if requestline[-2:] == '\r\n':
270 requestline = requestline[:-2]
271 elif requestline[-1:] == '\n':
272 requestline = requestline[:-1]
273 self.requestline = requestline
274 words = requestline.split()
275 if len(words) == 3:
276 [command, path, version] = words
277 if version[:5] != 'HTTP/':
278 self.send_error(400, "Bad request version (%r)" % version)
279 return False
280 try:
281 base_version_number = version.split('/', 1)[1]
282 version_number = base_version_number.split(".")
283 # RFC 2145 section 3.1 says there can be only one "." and
284 # - major and minor numbers MUST be treated as
285 # separate integers;
286 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
287 # turn is lower than HTTP/12.3;
288 # - Leading zeros MUST be ignored by recipients.
289 if len(version_number) != 2:
290 raise ValueError
291 version_number = int(version_number[0]), int(version_number[1])
292 except (ValueError, IndexError):
293 self.send_error(400, "Bad request version (%r)" % version)
294 return False
295 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
296 self.close_connection = 0
297 if version_number >= (2, 0):
298 self.send_error(505,
299 "Invalid HTTP Version (%s)" % base_version_number)
300 return False
301 elif len(words) == 2:
302 [command, path] = words
303 self.close_connection = 1
304 if command != 'GET':
305 self.send_error(400,
306 "Bad HTTP/0.9 request type (%r)" % command)
307 return False
308 elif not words:
309 return False
310 else:
311 self.send_error(400, "Bad request syntax (%r)" % requestline)
312 return False
313 self.command, self.path, self.request_version = command, path, version
314
315 # Examine the headers and look for a Connection directive.
316
317 # MessageClass (rfc822) wants to see strings rather than bytes.
318 # But a TextIOWrapper around self.rfile would buffer too many bytes
319 # from the stream, bytes which we later need to read as bytes.
320 # So we read the correct bytes here, as bytes, then use StringIO
321 # to make them look like strings for MessageClass to parse.
322 headers = []
323 while True:
324 line = self.rfile.readline()
325 headers.append(line)
326 if line in (b'\r\n', b'\n', b''):
327 break
328 hfile = io.StringIO(b''.join(headers).decode('iso-8859-1'))
329 self.headers = self.MessageClass(hfile)
330
331 conntype = self.headers.get('Connection', "")
332 if conntype.lower() == 'close':
333 self.close_connection = 1
334 elif (conntype.lower() == 'keep-alive' and
335 self.protocol_version >= "HTTP/1.1"):
336 self.close_connection = 0
337 return True
338
339 def handle_one_request(self):
340 """Handle a single HTTP request.
341
342 You normally don't need to override this method; see the class
343 __doc__ string for information on how to handle specific HTTP
344 commands such as GET and POST.
345
346 """
347 self.raw_requestline = self.rfile.readline()
348 if not self.raw_requestline:
349 self.close_connection = 1
350 return
351 if not self.parse_request(): # An error code has been sent, just exit
352 return
353 mname = 'do_' + self.command
354 if not hasattr(self, mname):
355 self.send_error(501, "Unsupported method (%r)" % self.command)
356 return
357 method = getattr(self, mname)
358 method()
359
360 def handle(self):
361 """Handle multiple requests if necessary."""
362 self.close_connection = 1
363
364 self.handle_one_request()
365 while not self.close_connection:
366 self.handle_one_request()
367
368 def send_error(self, code, message=None):
369 """Send and log an error reply.
370
371 Arguments are the error code, and a detailed message.
372 The detailed message defaults to the short entry matching the
373 response code.
374
375 This sends an error response (so it must be called before any
376 output has been generated), logs the error, and finally sends
377 a piece of HTML explaining the error to the user.
378
379 """
380
381 try:
382 shortmsg, longmsg = self.responses[code]
383 except KeyError:
384 shortmsg, longmsg = '???', '???'
385 if message is None:
386 message = shortmsg
387 explain = longmsg
388 self.log_error("code %d, message %s", code, message)
389 # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
390 content = (self.error_message_format %
391 {'code': code, 'message': _quote_html(message), 'explain': explain})
392 self.send_response(code, message)
393 self.send_header("Content-Type", self.error_content_type)
394 self.send_header('Connection', 'close')
395 self.end_headers()
396 if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
397 self.wfile.write(content.encode('UTF-8', 'replace'))
398
399 def send_response(self, code, message=None):
400 """Send the response header and log the response code.
401
402 Also send two standard headers with the server software
403 version and the current date.
404
405 """
406 self.log_request(code)
407 if message is None:
408 if code in self.responses:
409 message = self.responses[code][0]
410 else:
411 message = ''
412 if self.request_version != 'HTTP/0.9':
413 self.wfile.write(("%s %d %s\r\n" %
414 (self.protocol_version, code, message)).encode('ASCII', 'strict'))
415 # print (self.protocol_version, code, message)
416 self.send_header('Server', self.version_string())
417 self.send_header('Date', self.date_time_string())
418
419 def send_header(self, keyword, value):
420 """Send a MIME header."""
421 if self.request_version != 'HTTP/0.9':
422 self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict'))
423
424 if keyword.lower() == 'connection':
425 if value.lower() == 'close':
426 self.close_connection = 1
427 elif value.lower() == 'keep-alive':
428 self.close_connection = 0
429
430 def end_headers(self):
431 """Send the blank line ending the MIME headers."""
432 if self.request_version != 'HTTP/0.9':
433 self.wfile.write(b"\r\n")
434
435 def log_request(self, code='-', size='-'):
436 """Log an accepted request.
437
438 This is called by send_response().
439
440 """
441
442 self.log_message('"%s" %s %s',
443 self.requestline, str(code), str(size))
444
445 def log_error(self, format, *args):
446 """Log an error.
447
448 This is called when a request cannot be fulfilled. By
449 default it passes the message on to log_message().
450
451 Arguments are the same as for log_message().
452
453 XXX This should go to the separate error log.
454
455 """
456
457 self.log_message(format, *args)
458
459 def log_message(self, format, *args):
460 """Log an arbitrary message.
461
462 This is used by all other logging functions. Override
463 it if you have specific logging wishes.
464
465 The first argument, FORMAT, is a format string for the
466 message to be logged. If the format string contains
467 any % escapes requiring parameters, they should be
468 specified as subsequent arguments (it's just like
469 printf!).
470
471 The client host and current date/time are prefixed to
472 every message.
473
474 """
475
476 sys.stderr.write("%s - - [%s] %s\n" %
477 (self.address_string(),
478 self.log_date_time_string(),
479 format%args))
480
481 def version_string(self):
482 """Return the server software version string."""
483 return self.server_version + ' ' + self.sys_version
484
485 def date_time_string(self, timestamp=None):
486 """Return the current date and time formatted for a message header."""
487 if timestamp is None:
488 timestamp = time.time()
489 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
490 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
491 self.weekdayname[wd],
492 day, self.monthname[month], year,
493 hh, mm, ss)
494 return s
495
496 def log_date_time_string(self):
497 """Return the current time formatted for logging."""
498 now = time.time()
499 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
500 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
501 day, self.monthname[month], year, hh, mm, ss)
502 return s
503
504 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
505
506 monthname = [None,
507 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
508 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
509
510 def address_string(self):
511 """Return the client address formatted for logging.
512
513 This version looks up the full hostname using gethostbyaddr(),
514 and tries to find a name that contains at least one dot.
515
516 """
517
518 host, port = self.client_address[:2]
519 return socket.getfqdn(host)
520
521 # Essentially static class variables
522
523 # The version of the HTTP protocol we support.
524 # Set this to HTTP/1.1 to enable automatic keepalive
525 protocol_version = "HTTP/1.0"
526
527 # The Message-like class used to parse headers
528 MessageClass = mimetools.Message
529
530 # Table mapping response codes to messages; entries have the
531 # form {code: (shortmessage, longmessage)}.
532 # See RFC 2616.
533 responses = {
534 100: ('Continue', 'Request received, please continue'),
535 101: ('Switching Protocols',
536 'Switching to new protocol; obey Upgrade header'),
537
538 200: ('OK', 'Request fulfilled, document follows'),
539 201: ('Created', 'Document created, URL follows'),
540 202: ('Accepted',
541 'Request accepted, processing continues off-line'),
542 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
543 204: ('No Content', 'Request fulfilled, nothing follows'),
544 205: ('Reset Content', 'Clear input form for further input.'),
545 206: ('Partial Content', 'Partial content follows.'),
546
547 300: ('Multiple Choices',
548 'Object has several resources -- see URI list'),
549 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
550 302: ('Found', 'Object moved temporarily -- see URI list'),
551 303: ('See Other', 'Object moved -- see Method and URL list'),
552 304: ('Not Modified',
553 'Document has not changed since given time'),
554 305: ('Use Proxy',
555 'You must use proxy specified in Location to access this '
556 'resource.'),
557 307: ('Temporary Redirect',
558 'Object moved temporarily -- see URI list'),
559
560 400: ('Bad Request',
561 'Bad request syntax or unsupported method'),
562 401: ('Unauthorized',
563 'No permission -- see authorization schemes'),
564 402: ('Payment Required',
565 'No payment -- see charging schemes'),
566 403: ('Forbidden',
567 'Request forbidden -- authorization will not help'),
568 404: ('Not Found', 'Nothing matches the given URI'),
569 405: ('Method Not Allowed',
570 'Specified method is invalid for this server.'),
571 406: ('Not Acceptable', 'URI not available in preferred format.'),
572 407: ('Proxy Authentication Required', 'You must authenticate with '
573 'this proxy before proceeding.'),
574 408: ('Request Timeout', 'Request timed out; try again later.'),
575 409: ('Conflict', 'Request conflict.'),
576 410: ('Gone',
577 'URI no longer exists and has been permanently removed.'),
578 411: ('Length Required', 'Client must specify Content-Length.'),
579 412: ('Precondition Failed', 'Precondition in headers is false.'),
580 413: ('Request Entity Too Large', 'Entity is too large.'),
581 414: ('Request-URI Too Long', 'URI is too long.'),
582 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
583 416: ('Requested Range Not Satisfiable',
584 'Cannot satisfy request range.'),
585 417: ('Expectation Failed',
586 'Expect condition could not be satisfied.'),
587
588 500: ('Internal Server Error', 'Server got itself in trouble'),
589 501: ('Not Implemented',
590 'Server does not support this operation'),
591 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
592 503: ('Service Unavailable',
593 'The server cannot process the request due to a high load'),
594 504: ('Gateway Timeout',
595 'The gateway server did not receive a timely response'),
596 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
597 }
598
599
600class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
601
602 """Simple HTTP request handler with GET and HEAD commands.
603
604 This serves files from the current directory and any of its
605 subdirectories. The MIME type for files is determined by
606 calling the .guess_type() method.
607
608 The GET and HEAD requests are identical except that the HEAD
609 request omits the actual contents of the file.
610
611 """
612
613 server_version = "SimpleHTTP/" + __version__
614
615 def do_GET(self):
616 """Serve a GET request."""
617 f = self.send_head()
618 if f:
619 self.copyfile(f, self.wfile)
620 f.close()
621
622 def do_HEAD(self):
623 """Serve a HEAD request."""
624 f = self.send_head()
625 if f:
626 f.close()
627
628 def send_head(self):
629 """Common code for GET and HEAD commands.
630
631 This sends the response code and MIME headers.
632
633 Return value is either a file object (which has to be copied
634 to the outputfile by the caller unless the command was HEAD,
635 and must be closed by the caller under all circumstances), or
636 None, in which case the caller has nothing further to do.
637
638 """
639 path = self.translate_path(self.path)
640 f = None
641 if os.path.isdir(path):
642 if not self.path.endswith('/'):
643 # redirect browser - doing basically what apache does
644 self.send_response(301)
645 self.send_header("Location", self.path + "/")
646 self.end_headers()
647 return None
648 for index in "index.html", "index.htm":
649 index = os.path.join(path, index)
650 if os.path.exists(index):
651 path = index
652 break
653 else:
654 return self.list_directory(path)
655 ctype = self.guess_type(path)
656 try:
657 f = open(path, 'rb')
658 except IOError:
659 self.send_error(404, "File not found")
660 return None
661 self.send_response(200)
662 self.send_header("Content-type", ctype)
663 fs = os.fstat(f.fileno())
664 self.send_header("Content-Length", str(fs[6]))
665 self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
666 self.end_headers()
667 return f
668
669 def list_directory(self, path):
670 """Helper to produce a directory listing (absent index.html).
671
672 Return value is either a file object, or None (indicating an
673 error). In either case, the headers are sent, making the
674 interface the same as for send_head().
675
676 """
677 try:
678 list = os.listdir(path)
679 except os.error:
680 self.send_error(404, "No permission to list directory")
681 return None
682 list.sort(key=lambda a: a.lower())
683 r = []
684 displaypath = cgi.escape(urllib.unquote(self.path))
685 r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
686 r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
687 r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
688 r.append("<hr>\n<ul>\n")
689 for name in list:
690 fullname = os.path.join(path, name)
691 displayname = linkname = name
692 # Append / for directories or @ for symbolic links
693 if os.path.isdir(fullname):
694 displayname = name + "/"
695 linkname = name + "/"
696 if os.path.islink(fullname):
697 displayname = name + "@"
698 # Note: a link to a directory displays with @ and links with /
699 r.append('<li><a href="%s">%s</a>\n'
700 % (urllib.quote(linkname), cgi.escape(displayname)))
701 r.append("</ul>\n<hr>\n</body>\n</html>\n")
702 enc = sys.getfilesystemencoding()
703 encoded = ''.join(r).encode(enc)
704 f = io.BytesIO()
705 f.write(encoded)
706 f.seek(0)
707 self.send_response(200)
708 self.send_header("Content-type", "text/html; charset=%s" % enc)
709 self.send_header("Content-Length", str(len(encoded)))
710 self.end_headers()
711 return f
712
713 def translate_path(self, path):
714 """Translate a /-separated PATH to the local filename syntax.
715
716 Components that mean special things to the local file system
717 (e.g. drive or directory names) are ignored. (XXX They should
718 probably be diagnosed.)
719
720 """
721 # abandon query parameters
722 path = path.split('?',1)[0]
723 path = path.split('#',1)[0]
724 path = posixpath.normpath(urllib.unquote(path))
725 words = path.split('/')
726 words = filter(None, words)
727 path = os.getcwd()
728 for word in words:
729 drive, word = os.path.splitdrive(word)
730 head, word = os.path.split(word)
731 if word in (os.curdir, os.pardir): continue
732 path = os.path.join(path, word)
733 return path
734
735 def copyfile(self, source, outputfile):
736 """Copy all data between two file objects.
737
738 The SOURCE argument is a file object open for reading
739 (or anything with a read() method) and the DESTINATION
740 argument is a file object open for writing (or
741 anything with a write() method).
742
743 The only reason for overriding this would be to change
744 the block size or perhaps to replace newlines by CRLF
745 -- note however that this the default server uses this
746 to copy binary data as well.
747
748 """
749 shutil.copyfileobj(source, outputfile)
750
751 def guess_type(self, path):
752 """Guess the type of a file.
753
754 Argument is a PATH (a filename).
755
756 Return value is a string of the form type/subtype,
757 usable for a MIME Content-type header.
758
759 The default implementation looks the file's extension
760 up in the table self.extensions_map, using application/octet-stream
761 as a default; however it would be permissible (if
762 slow) to look inside the data to make a better guess.
763
764 """
765
766 base, ext = posixpath.splitext(path)
767 if ext in self.extensions_map:
768 return self.extensions_map[ext]
769 ext = ext.lower()
770 if ext in self.extensions_map:
771 return self.extensions_map[ext]
772 else:
773 return self.extensions_map['']
774
775 if not mimetypes.inited:
776 mimetypes.init() # try to read system mime.types
777 extensions_map = mimetypes.types_map.copy()
778 extensions_map.update({
779 '': 'application/octet-stream', # Default
780 '.py': 'text/plain',
781 '.c': 'text/plain',
782 '.h': 'text/plain',
783 })
784
785
786# Utilities for CGIHTTPRequestHandler
787
788nobody = None
789
790def nobody_uid():
791 """Internal routine to get nobody's uid"""
792 global nobody
793 if nobody:
794 return nobody
795 try:
796 import pwd
797 except ImportError:
798 return -1
799 try:
800 nobody = pwd.getpwnam('nobody')[2]
801 except KeyError:
802 nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
803 return nobody
804
805
806def executable(path):
807 """Test for executable file."""
808 try:
809 st = os.stat(path)
810 except os.error:
811 return False
812 return st.st_mode & 0o111 != 0
813
814
815class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
816
817 """Complete HTTP server with GET, HEAD and POST commands.
818
819 GET and HEAD also support running CGI scripts.
820
821 The POST command is *only* implemented for CGI scripts.
822
823 """
824
825 # Determine platform specifics
826 have_fork = hasattr(os, 'fork')
827 have_popen2 = hasattr(os, 'popen2')
828 have_popen3 = hasattr(os, 'popen3')
829
830 # Make rfile unbuffered -- we need to read one line and then pass
831 # the rest to a subprocess, so we can't use buffered input.
832 rbufsize = 0
833
834 def do_POST(self):
835 """Serve a POST request.
836
837 This is only implemented for CGI scripts.
838
839 """
840
841 if self.is_cgi():
842 self.run_cgi()
843 else:
844 self.send_error(501, "Can only POST to CGI scripts")
845
846 def send_head(self):
847 """Version of send_head that support CGI scripts"""
848 if self.is_cgi():
849 return self.run_cgi()
850 else:
851 return SimpleHTTPRequestHandler.send_head(self)
852
853 def is_cgi(self):
854 """Test whether self.path corresponds to a CGI script.
855
856 Return a tuple (dir, rest) if self.path requires running a
857 CGI script, None if not. Note that rest begins with a
858 slash if it is not empty.
859
860 The default implementation tests whether the path
861 begins with one of the strings in the list
862 self.cgi_directories (and the next character is a '/'
863 or the end of the string).
864
865 """
866
867 path = self.path
868
869 for x in self.cgi_directories:
870 i = len(x)
871 if path[:i] == x and (not path[i:] or path[i] == '/'):
872 self.cgi_info = path[:i], path[i+1:]
873 return True
874 return False
875
876 cgi_directories = ['/cgi-bin', '/htbin']
877
878 def is_executable(self, path):
879 """Test whether argument path is an executable file."""
880 return executable(path)
881
882 def is_python(self, path):
883 """Test whether argument path is a Python script."""
884 head, tail = os.path.splitext(path)
885 return tail.lower() in (".py", ".pyw")
886
887 def run_cgi(self):
888 """Execute a CGI script."""
889 path = self.path
890 dir, rest = self.cgi_info
891
892 i = path.find('/', len(dir) + 1)
893 while i >= 0:
894 nextdir = path[:i]
895 nextrest = path[i+1:]
896
897 scriptdir = self.translate_path(nextdir)
898 if os.path.isdir(scriptdir):
899 dir, rest = nextdir, nextrest
900 i = path.find('/', len(dir) + 1)
901 else:
902 break
903
904 # find an explicit query string, if present.
905 i = rest.rfind('?')
906 if i >= 0:
907 rest, query = rest[:i], rest[i+1:]
908 else:
909 query = ''
910
911 # dissect the part after the directory name into a script name &
912 # a possible additional path, to be stored in PATH_INFO.
913 i = rest.find('/')
914 if i >= 0:
915 script, rest = rest[:i], rest[i:]
916 else:
917 script, rest = rest, ''
918
919 scriptname = dir + '/' + script
920 scriptfile = self.translate_path(scriptname)
921 if not os.path.exists(scriptfile):
922 self.send_error(404, "No such CGI script (%r)" % scriptname)
923 return
924 if not os.path.isfile(scriptfile):
925 self.send_error(403, "CGI script is not a plain file (%r)" %
926 scriptname)
927 return
928 ispy = self.is_python(scriptname)
929 if not ispy:
930 if not (self.have_fork or self.have_popen2 or self.have_popen3):
931 self.send_error(403, "CGI script is not a Python script (%r)" %
932 scriptname)
933 return
934 if not self.is_executable(scriptfile):
935 self.send_error(403, "CGI script is not executable (%r)" %
936 scriptname)
937 return
938
939 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
940 # XXX Much of the following could be prepared ahead of time!
941 env = {}
942 env['SERVER_SOFTWARE'] = self.version_string()
943 env['SERVER_NAME'] = self.server.server_name
944 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
945 env['SERVER_PROTOCOL'] = self.protocol_version
946 env['SERVER_PORT'] = str(self.server.server_port)
947 env['REQUEST_METHOD'] = self.command
948 uqrest = urllib.unquote(rest)
949 env['PATH_INFO'] = uqrest
950 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
951 env['SCRIPT_NAME'] = scriptname
952 if query:
953 env['QUERY_STRING'] = query
954 host = self.address_string()
955 if host != self.client_address[0]:
956 env['REMOTE_HOST'] = host
957 env['REMOTE_ADDR'] = self.client_address[0]
958 authorization = self.headers.getheader("authorization")
959 if authorization:
960 authorization = authorization.split()
961 if len(authorization) == 2:
962 import base64, binascii
963 env['AUTH_TYPE'] = authorization[0]
964 if authorization[0].lower() == "basic":
965 try:
966 authorization = authorization[1].encode('ascii')
967 authorization = base64.decodestring(authorization).\
968 decode('ascii')
969 except (binascii.Error, UnicodeError):
970 pass
971 else:
972 authorization = authorization.split(':')
973 if len(authorization) == 2:
974 env['REMOTE_USER'] = authorization[0]
975 # XXX REMOTE_IDENT
976 if self.headers.typeheader is None:
977 env['CONTENT_TYPE'] = self.headers.type
978 else:
979 env['CONTENT_TYPE'] = self.headers.typeheader
980 length = self.headers.getheader('content-length')
981 if length:
982 env['CONTENT_LENGTH'] = length
983 referer = self.headers.getheader('referer')
984 if referer:
985 env['HTTP_REFERER'] = referer
986 accept = []
987 for line in self.headers.getallmatchingheaders('accept'):
988 if line[:1] in "\t\n\r ":
989 accept.append(line.strip())
990 else:
991 accept = accept + line[7:].split(',')
992 env['HTTP_ACCEPT'] = ','.join(accept)
993 ua = self.headers.getheader('user-agent')
994 if ua:
995 env['HTTP_USER_AGENT'] = ua
996 co = filter(None, self.headers.getheaders('cookie'))
997 if co:
998 env['HTTP_COOKIE'] = ', '.join(co)
999 # XXX Other HTTP_* headers
1000 # Since we're setting the env in the parent, provide empty
1001 # values to override previously set values
1002 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1003 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1004 env.setdefault(k, "")
1005 os.environ.update(env)
1006
1007 self.send_response(200, "Script output follows")
1008
1009 decoded_query = query.replace('+', ' ')
1010
1011 if self.have_fork:
1012 # Unix -- fork as we should
1013 args = [script]
1014 if '=' not in decoded_query:
1015 args.append(decoded_query)
1016 nobody = nobody_uid()
1017 self.wfile.flush() # Always flush before forking
1018 pid = os.fork()
1019 if pid != 0:
1020 # Parent
1021 pid, sts = os.waitpid(pid, 0)
1022 # throw away additional data [see bug #427345]
1023 while select.select([self.rfile], [], [], 0)[0]:
1024 if not self.rfile.read(1):
1025 break
1026 if sts:
1027 self.log_error("CGI script exit status %#x", sts)
1028 return
1029 # Child
1030 try:
1031 try:
1032 os.setuid(nobody)
1033 except os.error:
1034 pass
1035 os.dup2(self.rfile.fileno(), 0)
1036 os.dup2(self.wfile.fileno(), 1)
1037 os.execve(scriptfile, args, os.environ)
1038 except:
1039 self.server.handle_error(self.request, self.client_address)
1040 os._exit(127)
1041
1042 elif self.have_popen2 or self.have_popen3:
1043 # Windows -- use popen2 or popen3 to create a subprocess
1044 import shutil
1045 if self.have_popen3:
1046 popenx = os.popen3
1047 else:
1048 popenx = os.popen2
1049 cmdline = scriptfile
1050 if self.is_python(scriptfile):
1051 interp = sys.executable
1052 if interp.lower().endswith("w.exe"):
1053 # On Windows, use python.exe, not pythonw.exe
1054 interp = interp[:-5] + interp[-4:]
1055 cmdline = "%s -u %s" % (interp, cmdline)
1056 if '=' not in query and '"' not in query:
1057 cmdline = '%s "%s"' % (cmdline, query)
1058 self.log_message("command: %s", cmdline)
1059 try:
1060 nbytes = int(length)
1061 except (TypeError, ValueError):
1062 nbytes = 0
1063 files = popenx(cmdline, 'b')
1064 fi = files[0]
1065 fo = files[1]
1066 if self.have_popen3:
1067 fe = files[2]
1068 if self.command.lower() == "post" and nbytes > 0:
1069 data = self.rfile.read(nbytes)
1070 fi.write(data)
1071 # throw away additional data [see bug #427345]
1072 while select.select([self.rfile._sock], [], [], 0)[0]:
1073 if not self.rfile._sock.recv(1):
1074 break
1075 fi.close()
1076 shutil.copyfileobj(fo, self.wfile)
1077 if self.have_popen3:
1078 errors = fe.read()
1079 fe.close()
1080 if errors:
1081 self.log_error('%s', errors)
1082 sts = fo.close()
1083 if sts:
1084 self.log_error("CGI script exit status %#x", sts)
1085 else:
1086 self.log_message("CGI script exited OK")
1087
1088 else:
1089 # Other O.S. -- execute script in this process
1090 save_argv = sys.argv
1091 save_stdin = sys.stdin
1092 save_stdout = sys.stdout
1093 save_stderr = sys.stderr
1094 try:
1095 save_cwd = os.getcwd()
1096 try:
1097 sys.argv = [scriptfile]
1098 if '=' not in decoded_query:
1099 sys.argv.append(decoded_query)
1100 sys.stdout = self.wfile
1101 sys.stdin = self.rfile
1102 exec(open(scriptfile).read(), {"__name__": "__main__"})
1103 finally:
1104 sys.argv = save_argv
1105 sys.stdin = save_stdin
1106 sys.stdout = save_stdout
1107 sys.stderr = save_stderr
1108 os.chdir(save_cwd)
1109 except SystemExit as sts:
1110 self.log_error("CGI script exit status %s", str(sts))
1111 else:
1112 self.log_message("CGI script exited OK")
1113
1114
1115def test(HandlerClass = BaseHTTPRequestHandler,
1116 ServerClass = HTTPServer, protocol="HTTP/1.0"):
1117 """Test the HTTP request handler class.
1118
1119 This runs an HTTP server on port 8000 (or the first command line
1120 argument).
1121
1122 """
1123
1124 if sys.argv[1:]:
1125 port = int(sys.argv[1])
1126 else:
1127 port = 8000
1128 server_address = ('', port)
1129
1130 HandlerClass.protocol_version = protocol
1131 httpd = ServerClass(server_address, HandlerClass)
1132
1133 sa = httpd.socket.getsockname()
1134 print("Serving HTTP on", sa[0], "port", sa[1], "...")
1135 httpd.serve_forever()
1136
1137
1138if __name__ == '__main__':
1139 test(HandlerClass=BaseHTTPRequestHandler)
1140 test(HandlerClass=SimpleHTTPRequestHandler)
1141 test(HandlerClass=CGIHTTPRequestHandler)