blob: 5726017b38c93a858679a62dbc00e87c1d037daa [file] [log] [blame]
Georg Brandl24420152008-05-26 16:32:26 +00001"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +000016subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl24420152008-05-26 16:32:26 +000017
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group T. Berners-Lee
38# INTERNET-DRAFT R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40# Expires September 8, 1995 March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group R. Fielding
47# Request for Comments: 2616 et al
48# Obsoletes: 2068 June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# | host: Either the DNS name or the IP number of the remote client
63# | rfc931: Any information returned by identd for this person,
64# | - otherwise.
65# | authuser: If user sent a userid for authentication, the user name,
66# | - otherwise.
67# | DD: Day
68# | Mon: Month (calendar name)
69# | YYYY: Year
70# | hh: hour (24-hour format, the machine's timezone)
71# | mm: minutes
72# | ss: seconds
73# | request: The first line of the HTTP request as sent by the client.
74# | ddd: the status code returned by the server, - if not available.
75# | bbbb: the total number of bytes sent,
76# | *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
86
87import io
88import os
89import sys
90import cgi
91import time
92import socket # For gethostbyaddr()
93import shutil
Jeremy Hylton1afc1692008-06-18 20:49:58 +000094import urllib.parse
Georg Brandl24420152008-05-26 16:32:26 +000095import select
Georg Brandl24420152008-05-26 16:32:26 +000096import mimetypes
97import posixpath
98import socketserver
Barry Warsaw820c1202008-06-12 04:06:45 +000099import email.message
100import email.parser
Georg Brandl24420152008-05-26 16:32:26 +0000101
102# Default error message template
103DEFAULT_ERROR_MESSAGE = """\
104<head>
105<title>Error response</title>
106</head>
107<body>
108<h1>Error response</h1>
109<p>Error code %(code)d.
110<p>Message: %(message)s.
111<p>Error code explanation: %(code)s = %(explain)s.
112</body>
113"""
114
115DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
116
117def _quote_html(html):
118 return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
119
120class HTTPServer(socketserver.TCPServer):
121
122 allow_reuse_address = 1 # Seems to make sense in testing environment
123
124 def server_bind(self):
125 """Override server_bind to store the server name."""
126 socketserver.TCPServer.server_bind(self)
127 host, port = self.socket.getsockname()[:2]
128 self.server_name = socket.getfqdn(host)
129 self.server_port = port
130
131
132class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
133
134 """HTTP request handler base class.
135
136 The following explanation of HTTP serves to guide you through the
137 code as well as to expose any misunderstandings I may have about
138 HTTP (so you don't need to read the code to figure out I'm wrong
139 :-).
140
141 HTTP (HyperText Transfer Protocol) is an extensible protocol on
142 top of a reliable stream transport (e.g. TCP/IP). The protocol
143 recognizes three parts to a request:
144
145 1. One line identifying the request type and path
146 2. An optional set of RFC-822-style headers
147 3. An optional data part
148
149 The headers and data are separated by a blank line.
150
151 The first line of the request has the form
152
153 <command> <path> <version>
154
155 where <command> is a (case-sensitive) keyword such as GET or POST,
156 <path> is a string containing path information for the request,
157 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
158 <path> is encoded using the URL encoding scheme (using %xx to signify
159 the ASCII character with hex code xx).
160
161 The specification specifies that lines are separated by CRLF but
162 for compatibility with the widest range of clients recommends
163 servers also handle LF. Similarly, whitespace in the request line
164 is treated sensibly (allowing multiple spaces between components
165 and allowing trailing whitespace).
166
167 Similarly, for output, lines ought to be separated by CRLF pairs
168 but most clients grok LF characters just fine.
169
170 If the first line of the request has the form
171
172 <command> <path>
173
174 (i.e. <version> is left out) then this is assumed to be an HTTP
175 0.9 request; this form has no optional headers and data part and
176 the reply consists of just the data.
177
178 The reply form of the HTTP 1.x protocol again has three parts:
179
180 1. One line giving the response code
181 2. An optional set of RFC-822-style headers
182 3. The data
183
184 Again, the headers and data are separated by a blank line.
185
186 The response code line has the form
187
188 <version> <responsecode> <responsestring>
189
190 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
191 <responsecode> is a 3-digit response code indicating success or
192 failure of the request, and <responsestring> is an optional
193 human-readable string explaining what the response code means.
194
195 This server parses the request and the headers, and then calls a
196 function specific to the request type (<command>). Specifically,
197 a request SPAM will be handled by a method do_SPAM(). If no
198 such method exists the server sends an error response to the
199 client. If it exists, it is called with no arguments:
200
201 do_SPAM()
202
203 Note that the request name is case sensitive (i.e. SPAM and spam
204 are different requests).
205
206 The various request details are stored in instance variables:
207
208 - client_address is the client IP address in the form (host,
209 port);
210
211 - command, path and version are the broken-down request line;
212
Barry Warsaw820c1202008-06-12 04:06:45 +0000213 - headers is an instance of email.message.Message (or a derived
Georg Brandl24420152008-05-26 16:32:26 +0000214 class) containing the header information;
215
216 - rfile is a file object open for reading positioned at the
217 start of the optional input data part;
218
219 - wfile is a file object open for writing.
220
221 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
222
223 The first thing to be written must be the response line. Then
224 follow 0 or more header lines, then a blank line, and then the
225 actual data (if any). The meaning of the header lines depends on
226 the command executed by the server; in most cases, when data is
227 returned, there should be at least one header line of the form
228
229 Content-type: <type>/<subtype>
230
231 where <type> and <subtype> should be registered MIME types,
232 e.g. "text/html" or "text/plain".
233
234 """
235
236 # The Python system version, truncated to its first component.
237 sys_version = "Python/" + sys.version.split()[0]
238
239 # The server software version. You may want to override this.
240 # The format is multiple whitespace-separated strings,
241 # where each string is of the form name[/version].
242 server_version = "BaseHTTP/" + __version__
243
244 error_message_format = DEFAULT_ERROR_MESSAGE
245 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
246
247 # The default request version. This only affects responses up until
248 # the point where the request line is parsed, so it mainly decides what
249 # the client gets back when sending a malformed request line.
250 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
251 default_request_version = "HTTP/0.9"
252
253 def parse_request(self):
254 """Parse a request (internal).
255
256 The request should be stored in self.raw_requestline; the results
257 are in self.command, self.path, self.request_version and
258 self.headers.
259
260 Return True for success, False for failure; on failure, an
261 error is sent back.
262
263 """
264 self.command = None # set in case of error on the first line
265 self.request_version = version = self.default_request_version
266 self.close_connection = 1
267 requestline = str(self.raw_requestline, 'iso-8859-1')
268 if requestline[-2:] == '\r\n':
269 requestline = requestline[:-2]
270 elif requestline[-1:] == '\n':
271 requestline = requestline[:-1]
272 self.requestline = requestline
273 words = requestline.split()
274 if len(words) == 3:
275 [command, path, version] = words
276 if version[:5] != 'HTTP/':
277 self.send_error(400, "Bad request version (%r)" % version)
278 return False
279 try:
280 base_version_number = version.split('/', 1)[1]
281 version_number = base_version_number.split(".")
282 # RFC 2145 section 3.1 says there can be only one "." and
283 # - major and minor numbers MUST be treated as
284 # separate integers;
285 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
286 # turn is lower than HTTP/12.3;
287 # - Leading zeros MUST be ignored by recipients.
288 if len(version_number) != 2:
289 raise ValueError
290 version_number = int(version_number[0]), int(version_number[1])
291 except (ValueError, IndexError):
292 self.send_error(400, "Bad request version (%r)" % version)
293 return False
294 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
295 self.close_connection = 0
296 if version_number >= (2, 0):
297 self.send_error(505,
298 "Invalid HTTP Version (%s)" % base_version_number)
299 return False
300 elif len(words) == 2:
301 [command, path] = words
302 self.close_connection = 1
303 if command != 'GET':
304 self.send_error(400,
305 "Bad HTTP/0.9 request type (%r)" % command)
306 return False
307 elif not words:
308 return False
309 else:
310 self.send_error(400, "Bad request syntax (%r)" % requestline)
311 return False
312 self.command, self.path, self.request_version = command, path, version
313
314 # Examine the headers and look for a Connection directive.
315
Georg Brandl9f0f9602008-06-12 22:23:59 +0000316 # MessageClass wants to see strings rather than bytes.
Georg Brandl24420152008-05-26 16:32:26 +0000317 # But a TextIOWrapper around self.rfile would buffer too many bytes
318 # from the stream, bytes which we later need to read as bytes.
319 # So we read the correct bytes here, as bytes, then use StringIO
320 # to make them look like strings for MessageClass to parse.
321 headers = []
322 while True:
323 line = self.rfile.readline()
324 headers.append(line)
325 if line in (b'\r\n', b'\n', b''):
326 break
327 hfile = io.StringIO(b''.join(headers).decode('iso-8859-1'))
Barry Warsaw820c1202008-06-12 04:06:45 +0000328 self.headers = email.parser.Parser(_class=self.MessageClass).parse(hfile)
Georg Brandl24420152008-05-26 16:32:26 +0000329
330 conntype = self.headers.get('Connection', "")
331 if conntype.lower() == 'close':
332 self.close_connection = 1
333 elif (conntype.lower() == 'keep-alive' and
334 self.protocol_version >= "HTTP/1.1"):
335 self.close_connection = 0
336 return True
337
338 def handle_one_request(self):
339 """Handle a single HTTP request.
340
341 You normally don't need to override this method; see the class
342 __doc__ string for information on how to handle specific HTTP
343 commands such as GET and POST.
344
345 """
346 self.raw_requestline = self.rfile.readline()
347 if not self.raw_requestline:
348 self.close_connection = 1
349 return
350 if not self.parse_request(): # An error code has been sent, just exit
351 return
352 mname = 'do_' + self.command
353 if not hasattr(self, mname):
354 self.send_error(501, "Unsupported method (%r)" % self.command)
355 return
356 method = getattr(self, mname)
357 method()
358
359 def handle(self):
360 """Handle multiple requests if necessary."""
361 self.close_connection = 1
362
363 self.handle_one_request()
364 while not self.close_connection:
365 self.handle_one_request()
366
367 def send_error(self, code, message=None):
368 """Send and log an error reply.
369
370 Arguments are the error code, and a detailed message.
371 The detailed message defaults to the short entry matching the
372 response code.
373
374 This sends an error response (so it must be called before any
375 output has been generated), logs the error, and finally sends
376 a piece of HTML explaining the error to the user.
377
378 """
379
380 try:
381 shortmsg, longmsg = self.responses[code]
382 except KeyError:
383 shortmsg, longmsg = '???', '???'
384 if message is None:
385 message = shortmsg
386 explain = longmsg
387 self.log_error("code %d, message %s", code, message)
388 # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
389 content = (self.error_message_format %
390 {'code': code, 'message': _quote_html(message), 'explain': explain})
391 self.send_response(code, message)
392 self.send_header("Content-Type", self.error_content_type)
393 self.send_header('Connection', 'close')
394 self.end_headers()
395 if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
396 self.wfile.write(content.encode('UTF-8', 'replace'))
397
398 def send_response(self, code, message=None):
399 """Send the response header and log the response code.
400
401 Also send two standard headers with the server software
402 version and the current date.
403
404 """
405 self.log_request(code)
406 if message is None:
407 if code in self.responses:
408 message = self.responses[code][0]
409 else:
410 message = ''
411 if self.request_version != 'HTTP/0.9':
412 self.wfile.write(("%s %d %s\r\n" %
413 (self.protocol_version, code, message)).encode('ASCII', 'strict'))
414 # print (self.protocol_version, code, message)
415 self.send_header('Server', self.version_string())
416 self.send_header('Date', self.date_time_string())
417
418 def send_header(self, keyword, value):
419 """Send a MIME header."""
420 if self.request_version != 'HTTP/0.9':
421 self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict'))
422
423 if keyword.lower() == 'connection':
424 if value.lower() == 'close':
425 self.close_connection = 1
426 elif value.lower() == 'keep-alive':
427 self.close_connection = 0
428
429 def end_headers(self):
430 """Send the blank line ending the MIME headers."""
431 if self.request_version != 'HTTP/0.9':
432 self.wfile.write(b"\r\n")
433
434 def log_request(self, code='-', size='-'):
435 """Log an accepted request.
436
437 This is called by send_response().
438
439 """
440
441 self.log_message('"%s" %s %s',
442 self.requestline, str(code), str(size))
443
444 def log_error(self, format, *args):
445 """Log an error.
446
447 This is called when a request cannot be fulfilled. By
448 default it passes the message on to log_message().
449
450 Arguments are the same as for log_message().
451
452 XXX This should go to the separate error log.
453
454 """
455
456 self.log_message(format, *args)
457
458 def log_message(self, format, *args):
459 """Log an arbitrary message.
460
461 This is used by all other logging functions. Override
462 it if you have specific logging wishes.
463
464 The first argument, FORMAT, is a format string for the
465 message to be logged. If the format string contains
466 any % escapes requiring parameters, they should be
467 specified as subsequent arguments (it's just like
468 printf!).
469
470 The client host and current date/time are prefixed to
471 every message.
472
473 """
474
475 sys.stderr.write("%s - - [%s] %s\n" %
476 (self.address_string(),
477 self.log_date_time_string(),
478 format%args))
479
480 def version_string(self):
481 """Return the server software version string."""
482 return self.server_version + ' ' + self.sys_version
483
484 def date_time_string(self, timestamp=None):
485 """Return the current date and time formatted for a message header."""
486 if timestamp is None:
487 timestamp = time.time()
488 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
489 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
490 self.weekdayname[wd],
491 day, self.monthname[month], year,
492 hh, mm, ss)
493 return s
494
495 def log_date_time_string(self):
496 """Return the current time formatted for logging."""
497 now = time.time()
498 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
499 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
500 day, self.monthname[month], year, hh, mm, ss)
501 return s
502
503 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
504
505 monthname = [None,
506 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
507 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
508
509 def address_string(self):
510 """Return the client address formatted for logging.
511
512 This version looks up the full hostname using gethostbyaddr(),
513 and tries to find a name that contains at least one dot.
514
515 """
516
517 host, port = self.client_address[:2]
518 return socket.getfqdn(host)
519
520 # Essentially static class variables
521
522 # The version of the HTTP protocol we support.
523 # Set this to HTTP/1.1 to enable automatic keepalive
524 protocol_version = "HTTP/1.0"
525
Barry Warsaw820c1202008-06-12 04:06:45 +0000526 # MessageClass used to parse headers
527 import http.client
528 MessageClass = http.client.HTTPMessage
Georg Brandl24420152008-05-26 16:32:26 +0000529
530 # Table mapping response codes to messages; entries have the
531 # form {code: (shortmessage, longmessage)}.
532 # See RFC 2616.
533 responses = {
534 100: ('Continue', 'Request received, please continue'),
535 101: ('Switching Protocols',
536 'Switching to new protocol; obey Upgrade header'),
537
538 200: ('OK', 'Request fulfilled, document follows'),
539 201: ('Created', 'Document created, URL follows'),
540 202: ('Accepted',
541 'Request accepted, processing continues off-line'),
542 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
543 204: ('No Content', 'Request fulfilled, nothing follows'),
544 205: ('Reset Content', 'Clear input form for further input.'),
545 206: ('Partial Content', 'Partial content follows.'),
546
547 300: ('Multiple Choices',
548 'Object has several resources -- see URI list'),
549 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
550 302: ('Found', 'Object moved temporarily -- see URI list'),
551 303: ('See Other', 'Object moved -- see Method and URL list'),
552 304: ('Not Modified',
553 'Document has not changed since given time'),
554 305: ('Use Proxy',
555 'You must use proxy specified in Location to access this '
556 'resource.'),
557 307: ('Temporary Redirect',
558 'Object moved temporarily -- see URI list'),
559
560 400: ('Bad Request',
561 'Bad request syntax or unsupported method'),
562 401: ('Unauthorized',
563 'No permission -- see authorization schemes'),
564 402: ('Payment Required',
565 'No payment -- see charging schemes'),
566 403: ('Forbidden',
567 'Request forbidden -- authorization will not help'),
568 404: ('Not Found', 'Nothing matches the given URI'),
569 405: ('Method Not Allowed',
570 'Specified method is invalid for this server.'),
571 406: ('Not Acceptable', 'URI not available in preferred format.'),
572 407: ('Proxy Authentication Required', 'You must authenticate with '
573 'this proxy before proceeding.'),
574 408: ('Request Timeout', 'Request timed out; try again later.'),
575 409: ('Conflict', 'Request conflict.'),
576 410: ('Gone',
577 'URI no longer exists and has been permanently removed.'),
578 411: ('Length Required', 'Client must specify Content-Length.'),
579 412: ('Precondition Failed', 'Precondition in headers is false.'),
580 413: ('Request Entity Too Large', 'Entity is too large.'),
581 414: ('Request-URI Too Long', 'URI is too long.'),
582 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
583 416: ('Requested Range Not Satisfiable',
584 'Cannot satisfy request range.'),
585 417: ('Expectation Failed',
586 'Expect condition could not be satisfied.'),
587
588 500: ('Internal Server Error', 'Server got itself in trouble'),
589 501: ('Not Implemented',
590 'Server does not support this operation'),
591 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
592 503: ('Service Unavailable',
593 'The server cannot process the request due to a high load'),
594 504: ('Gateway Timeout',
595 'The gateway server did not receive a timely response'),
596 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
597 }
598
599
600class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
601
602 """Simple HTTP request handler with GET and HEAD commands.
603
604 This serves files from the current directory and any of its
605 subdirectories. The MIME type for files is determined by
606 calling the .guess_type() method.
607
608 The GET and HEAD requests are identical except that the HEAD
609 request omits the actual contents of the file.
610
611 """
612
613 server_version = "SimpleHTTP/" + __version__
614
615 def do_GET(self):
616 """Serve a GET request."""
617 f = self.send_head()
618 if f:
619 self.copyfile(f, self.wfile)
620 f.close()
621
622 def do_HEAD(self):
623 """Serve a HEAD request."""
624 f = self.send_head()
625 if f:
626 f.close()
627
628 def send_head(self):
629 """Common code for GET and HEAD commands.
630
631 This sends the response code and MIME headers.
632
633 Return value is either a file object (which has to be copied
634 to the outputfile by the caller unless the command was HEAD,
635 and must be closed by the caller under all circumstances), or
636 None, in which case the caller has nothing further to do.
637
638 """
639 path = self.translate_path(self.path)
640 f = None
641 if os.path.isdir(path):
642 if not self.path.endswith('/'):
643 # redirect browser - doing basically what apache does
644 self.send_response(301)
645 self.send_header("Location", self.path + "/")
646 self.end_headers()
647 return None
648 for index in "index.html", "index.htm":
649 index = os.path.join(path, index)
650 if os.path.exists(index):
651 path = index
652 break
653 else:
654 return self.list_directory(path)
655 ctype = self.guess_type(path)
656 try:
657 f = open(path, 'rb')
658 except IOError:
659 self.send_error(404, "File not found")
660 return None
661 self.send_response(200)
662 self.send_header("Content-type", ctype)
663 fs = os.fstat(f.fileno())
664 self.send_header("Content-Length", str(fs[6]))
665 self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
666 self.end_headers()
667 return f
668
669 def list_directory(self, path):
670 """Helper to produce a directory listing (absent index.html).
671
672 Return value is either a file object, or None (indicating an
673 error). In either case, the headers are sent, making the
674 interface the same as for send_head().
675
676 """
677 try:
678 list = os.listdir(path)
679 except os.error:
680 self.send_error(404, "No permission to list directory")
681 return None
682 list.sort(key=lambda a: a.lower())
683 r = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000684 displaypath = cgi.escape(urllib.parse.unquote(self.path))
Georg Brandl24420152008-05-26 16:32:26 +0000685 r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
686 r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
687 r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
688 r.append("<hr>\n<ul>\n")
689 for name in list:
690 fullname = os.path.join(path, name)
691 displayname = linkname = name
692 # Append / for directories or @ for symbolic links
693 if os.path.isdir(fullname):
694 displayname = name + "/"
695 linkname = name + "/"
696 if os.path.islink(fullname):
697 displayname = name + "@"
698 # Note: a link to a directory displays with @ and links with /
699 r.append('<li><a href="%s">%s</a>\n'
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000700 % (urllib.parse.quote(linkname), cgi.escape(displayname)))
Georg Brandl24420152008-05-26 16:32:26 +0000701 r.append("</ul>\n<hr>\n</body>\n</html>\n")
702 enc = sys.getfilesystemencoding()
703 encoded = ''.join(r).encode(enc)
704 f = io.BytesIO()
705 f.write(encoded)
706 f.seek(0)
707 self.send_response(200)
708 self.send_header("Content-type", "text/html; charset=%s" % enc)
709 self.send_header("Content-Length", str(len(encoded)))
710 self.end_headers()
711 return f
712
713 def translate_path(self, path):
714 """Translate a /-separated PATH to the local filename syntax.
715
716 Components that mean special things to the local file system
717 (e.g. drive or directory names) are ignored. (XXX They should
718 probably be diagnosed.)
719
720 """
721 # abandon query parameters
722 path = path.split('?',1)[0]
723 path = path.split('#',1)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000724 path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl24420152008-05-26 16:32:26 +0000725 words = path.split('/')
726 words = filter(None, words)
727 path = os.getcwd()
728 for word in words:
729 drive, word = os.path.splitdrive(word)
730 head, word = os.path.split(word)
731 if word in (os.curdir, os.pardir): continue
732 path = os.path.join(path, word)
733 return path
734
735 def copyfile(self, source, outputfile):
736 """Copy all data between two file objects.
737
738 The SOURCE argument is a file object open for reading
739 (or anything with a read() method) and the DESTINATION
740 argument is a file object open for writing (or
741 anything with a write() method).
742
743 The only reason for overriding this would be to change
744 the block size or perhaps to replace newlines by CRLF
745 -- note however that this the default server uses this
746 to copy binary data as well.
747
748 """
749 shutil.copyfileobj(source, outputfile)
750
751 def guess_type(self, path):
752 """Guess the type of a file.
753
754 Argument is a PATH (a filename).
755
756 Return value is a string of the form type/subtype,
757 usable for a MIME Content-type header.
758
759 The default implementation looks the file's extension
760 up in the table self.extensions_map, using application/octet-stream
761 as a default; however it would be permissible (if
762 slow) to look inside the data to make a better guess.
763
764 """
765
766 base, ext = posixpath.splitext(path)
767 if ext in self.extensions_map:
768 return self.extensions_map[ext]
769 ext = ext.lower()
770 if ext in self.extensions_map:
771 return self.extensions_map[ext]
772 else:
773 return self.extensions_map['']
774
775 if not mimetypes.inited:
776 mimetypes.init() # try to read system mime.types
777 extensions_map = mimetypes.types_map.copy()
778 extensions_map.update({
779 '': 'application/octet-stream', # Default
780 '.py': 'text/plain',
781 '.c': 'text/plain',
782 '.h': 'text/plain',
783 })
784
785
786# Utilities for CGIHTTPRequestHandler
787
788nobody = None
789
790def nobody_uid():
791 """Internal routine to get nobody's uid"""
792 global nobody
793 if nobody:
794 return nobody
795 try:
796 import pwd
797 except ImportError:
798 return -1
799 try:
800 nobody = pwd.getpwnam('nobody')[2]
801 except KeyError:
802 nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
803 return nobody
804
805
806def executable(path):
807 """Test for executable file."""
808 try:
809 st = os.stat(path)
810 except os.error:
811 return False
812 return st.st_mode & 0o111 != 0
813
814
815class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
816
817 """Complete HTTP server with GET, HEAD and POST commands.
818
819 GET and HEAD also support running CGI scripts.
820
821 The POST command is *only* implemented for CGI scripts.
822
823 """
824
825 # Determine platform specifics
826 have_fork = hasattr(os, 'fork')
Georg Brandl24420152008-05-26 16:32:26 +0000827
828 # Make rfile unbuffered -- we need to read one line and then pass
829 # the rest to a subprocess, so we can't use buffered input.
830 rbufsize = 0
831
832 def do_POST(self):
833 """Serve a POST request.
834
835 This is only implemented for CGI scripts.
836
837 """
838
839 if self.is_cgi():
840 self.run_cgi()
841 else:
842 self.send_error(501, "Can only POST to CGI scripts")
843
844 def send_head(self):
845 """Version of send_head that support CGI scripts"""
846 if self.is_cgi():
847 return self.run_cgi()
848 else:
849 return SimpleHTTPRequestHandler.send_head(self)
850
851 def is_cgi(self):
852 """Test whether self.path corresponds to a CGI script.
853
854 Return a tuple (dir, rest) if self.path requires running a
855 CGI script, None if not. Note that rest begins with a
856 slash if it is not empty.
857
858 The default implementation tests whether the path
859 begins with one of the strings in the list
860 self.cgi_directories (and the next character is a '/'
861 or the end of the string).
862
863 """
864
865 path = self.path
866
867 for x in self.cgi_directories:
868 i = len(x)
869 if path[:i] == x and (not path[i:] or path[i] == '/'):
870 self.cgi_info = path[:i], path[i+1:]
871 return True
872 return False
873
874 cgi_directories = ['/cgi-bin', '/htbin']
875
876 def is_executable(self, path):
877 """Test whether argument path is an executable file."""
878 return executable(path)
879
880 def is_python(self, path):
881 """Test whether argument path is a Python script."""
882 head, tail = os.path.splitext(path)
883 return tail.lower() in (".py", ".pyw")
884
885 def run_cgi(self):
886 """Execute a CGI script."""
887 path = self.path
888 dir, rest = self.cgi_info
889
890 i = path.find('/', len(dir) + 1)
891 while i >= 0:
892 nextdir = path[:i]
893 nextrest = path[i+1:]
894
895 scriptdir = self.translate_path(nextdir)
896 if os.path.isdir(scriptdir):
897 dir, rest = nextdir, nextrest
898 i = path.find('/', len(dir) + 1)
899 else:
900 break
901
902 # find an explicit query string, if present.
903 i = rest.rfind('?')
904 if i >= 0:
905 rest, query = rest[:i], rest[i+1:]
906 else:
907 query = ''
908
909 # dissect the part after the directory name into a script name &
910 # a possible additional path, to be stored in PATH_INFO.
911 i = rest.find('/')
912 if i >= 0:
913 script, rest = rest[:i], rest[i:]
914 else:
915 script, rest = rest, ''
916
917 scriptname = dir + '/' + script
918 scriptfile = self.translate_path(scriptname)
919 if not os.path.exists(scriptfile):
920 self.send_error(404, "No such CGI script (%r)" % scriptname)
921 return
922 if not os.path.isfile(scriptfile):
923 self.send_error(403, "CGI script is not a plain file (%r)" %
924 scriptname)
925 return
926 ispy = self.is_python(scriptname)
927 if not ispy:
Georg Brandl24420152008-05-26 16:32:26 +0000928 if not self.is_executable(scriptfile):
929 self.send_error(403, "CGI script is not executable (%r)" %
930 scriptname)
931 return
932
933 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
934 # XXX Much of the following could be prepared ahead of time!
935 env = {}
936 env['SERVER_SOFTWARE'] = self.version_string()
937 env['SERVER_NAME'] = self.server.server_name
938 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
939 env['SERVER_PROTOCOL'] = self.protocol_version
940 env['SERVER_PORT'] = str(self.server.server_port)
941 env['REQUEST_METHOD'] = self.command
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000942 uqrest = urllib.parse.unquote(rest)
Georg Brandl24420152008-05-26 16:32:26 +0000943 env['PATH_INFO'] = uqrest
944 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
945 env['SCRIPT_NAME'] = scriptname
946 if query:
947 env['QUERY_STRING'] = query
948 host = self.address_string()
949 if host != self.client_address[0]:
950 env['REMOTE_HOST'] = host
951 env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw820c1202008-06-12 04:06:45 +0000952 authorization = self.headers.get("authorization")
Georg Brandl24420152008-05-26 16:32:26 +0000953 if authorization:
954 authorization = authorization.split()
955 if len(authorization) == 2:
956 import base64, binascii
957 env['AUTH_TYPE'] = authorization[0]
958 if authorization[0].lower() == "basic":
959 try:
960 authorization = authorization[1].encode('ascii')
961 authorization = base64.decodestring(authorization).\
962 decode('ascii')
963 except (binascii.Error, UnicodeError):
964 pass
965 else:
966 authorization = authorization.split(':')
967 if len(authorization) == 2:
968 env['REMOTE_USER'] = authorization[0]
969 # XXX REMOTE_IDENT
Barry Warsaw820c1202008-06-12 04:06:45 +0000970 if self.headers.get('content-type') is None:
971 env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl24420152008-05-26 16:32:26 +0000972 else:
Barry Warsaw820c1202008-06-12 04:06:45 +0000973 env['CONTENT_TYPE'] = self.headers['content-type']
974 length = self.headers.get('content-length')
Georg Brandl24420152008-05-26 16:32:26 +0000975 if length:
976 env['CONTENT_LENGTH'] = length
Barry Warsaw820c1202008-06-12 04:06:45 +0000977 referer = self.headers.get('referer')
Georg Brandl24420152008-05-26 16:32:26 +0000978 if referer:
979 env['HTTP_REFERER'] = referer
980 accept = []
981 for line in self.headers.getallmatchingheaders('accept'):
982 if line[:1] in "\t\n\r ":
983 accept.append(line.strip())
984 else:
985 accept = accept + line[7:].split(',')
986 env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw820c1202008-06-12 04:06:45 +0000987 ua = self.headers.get('user-agent')
Georg Brandl24420152008-05-26 16:32:26 +0000988 if ua:
989 env['HTTP_USER_AGENT'] = ua
Barry Warsaw820c1202008-06-12 04:06:45 +0000990 co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl24420152008-05-26 16:32:26 +0000991 if co:
992 env['HTTP_COOKIE'] = ', '.join(co)
993 # XXX Other HTTP_* headers
994 # Since we're setting the env in the parent, provide empty
995 # values to override previously set values
996 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
997 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
998 env.setdefault(k, "")
999 os.environ.update(env)
1000
1001 self.send_response(200, "Script output follows")
1002
1003 decoded_query = query.replace('+', ' ')
1004
1005 if self.have_fork:
1006 # Unix -- fork as we should
1007 args = [script]
1008 if '=' not in decoded_query:
1009 args.append(decoded_query)
1010 nobody = nobody_uid()
1011 self.wfile.flush() # Always flush before forking
1012 pid = os.fork()
1013 if pid != 0:
1014 # Parent
1015 pid, sts = os.waitpid(pid, 0)
1016 # throw away additional data [see bug #427345]
1017 while select.select([self.rfile], [], [], 0)[0]:
1018 if not self.rfile.read(1):
1019 break
1020 if sts:
1021 self.log_error("CGI script exit status %#x", sts)
1022 return
1023 # Child
1024 try:
1025 try:
1026 os.setuid(nobody)
1027 except os.error:
1028 pass
1029 os.dup2(self.rfile.fileno(), 0)
1030 os.dup2(self.wfile.fileno(), 1)
1031 os.execve(scriptfile, args, os.environ)
1032 except:
1033 self.server.handle_error(self.request, self.client_address)
1034 os._exit(127)
1035
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001036 else:
1037 # Non-Unix -- use subprocess
1038 import subprocess
Georg Brandl24420152008-05-26 16:32:26 +00001039 cmdline = scriptfile
1040 if self.is_python(scriptfile):
1041 interp = sys.executable
1042 if interp.lower().endswith("w.exe"):
1043 # On Windows, use python.exe, not pythonw.exe
1044 interp = interp[:-5] + interp[-4:]
1045 cmdline = "%s -u %s" % (interp, cmdline)
1046 if '=' not in query and '"' not in query:
1047 cmdline = '%s "%s"' % (cmdline, query)
1048 self.log_message("command: %s", cmdline)
1049 try:
1050 nbytes = int(length)
1051 except (TypeError, ValueError):
1052 nbytes = 0
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001053 p = subprocess.Popen(cmdline,
1054 stdin=subprocess.PIPE,
1055 stdout=subprocess.PIPE,
1056 stderr=subprocess.PIPE,
1057 )
Georg Brandl24420152008-05-26 16:32:26 +00001058 if self.command.lower() == "post" and nbytes > 0:
1059 data = self.rfile.read(nbytes)
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001060 else:
1061 data = None
Georg Brandl24420152008-05-26 16:32:26 +00001062 # throw away additional data [see bug #427345]
1063 while select.select([self.rfile._sock], [], [], 0)[0]:
1064 if not self.rfile._sock.recv(1):
1065 break
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001066 stdout, stderr = p.communicate(data)
1067 self.wfile.write(stdout)
1068 if stderr:
1069 self.log_error('%s', stderr)
1070 status = p.returncode
1071 if status:
1072 self.log_error("CGI script exit status %#x", status)
Georg Brandl24420152008-05-26 16:32:26 +00001073 else:
1074 self.log_message("CGI script exited OK")
1075
1076
1077def test(HandlerClass = BaseHTTPRequestHandler,
1078 ServerClass = HTTPServer, protocol="HTTP/1.0"):
1079 """Test the HTTP request handler class.
1080
1081 This runs an HTTP server on port 8000 (or the first command line
1082 argument).
1083
1084 """
1085
1086 if sys.argv[1:]:
1087 port = int(sys.argv[1])
1088 else:
1089 port = 8000
1090 server_address = ('', port)
1091
1092 HandlerClass.protocol_version = protocol
1093 httpd = ServerClass(server_address, HandlerClass)
1094
1095 sa = httpd.socket.getsockname()
1096 print("Serving HTTP on", sa[0], "port", sa[1], "...")
1097 httpd.serve_forever()
1098
1099
1100if __name__ == '__main__':
1101 test(HandlerClass=BaseHTTPRequestHandler)
1102 test(HandlerClass=SimpleHTTPRequestHandler)
1103 test(HandlerClass=CGIHTTPRequestHandler)