blob: 098ad250caca9d8d7e6a8d184ea2ad3656f2d414 [file] [log] [blame]
Georg Brandl24420152008-05-26 16:32:26 +00001"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +000016subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl24420152008-05-26 16:32:26 +000017
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group T. Berners-Lee
38# INTERNET-DRAFT R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40# Expires September 8, 1995 March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group R. Fielding
47# Request for Comments: 2616 et al
48# Obsoletes: 2068 June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# | host: Either the DNS name or the IP number of the remote client
63# | rfc931: Any information returned by identd for this person,
64# | - otherwise.
65# | authuser: If user sent a userid for authentication, the user name,
66# | - otherwise.
67# | DD: Day
68# | Mon: Month (calendar name)
69# | YYYY: Year
70# | hh: hour (24-hour format, the machine's timezone)
71# | mm: minutes
72# | ss: seconds
73# | request: The first line of the HTTP request as sent by the client.
74# | ddd: the status code returned by the server, - if not available.
75# | bbbb: the total number of bytes sent,
76# | *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
86
Georg Brandl24420152008-05-26 16:32:26 +000087import cgi
Barry Warsaw820c1202008-06-12 04:06:45 +000088import email.message
89import email.parser
Jeremy Hylton914ab452009-03-27 17:16:06 +000090import http.client
91import io
92import mimetypes
93import os
94import posixpath
95import select
96import shutil
97import socket # For gethostbyaddr()
98import socketserver
99import sys
100import time
101import urllib.parse
Georg Brandl24420152008-05-26 16:32:26 +0000102
103# Default error message template
104DEFAULT_ERROR_MESSAGE = """\
105<head>
106<title>Error response</title>
107</head>
108<body>
109<h1>Error response</h1>
110<p>Error code %(code)d.
111<p>Message: %(message)s.
112<p>Error code explanation: %(code)s = %(explain)s.
113</body>
114"""
115
116DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
117
118def _quote_html(html):
119 return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
120
121class HTTPServer(socketserver.TCPServer):
122
123 allow_reuse_address = 1 # Seems to make sense in testing environment
124
125 def server_bind(self):
126 """Override server_bind to store the server name."""
127 socketserver.TCPServer.server_bind(self)
128 host, port = self.socket.getsockname()[:2]
129 self.server_name = socket.getfqdn(host)
130 self.server_port = port
131
132
133class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
134
135 """HTTP request handler base class.
136
137 The following explanation of HTTP serves to guide you through the
138 code as well as to expose any misunderstandings I may have about
139 HTTP (so you don't need to read the code to figure out I'm wrong
140 :-).
141
142 HTTP (HyperText Transfer Protocol) is an extensible protocol on
143 top of a reliable stream transport (e.g. TCP/IP). The protocol
144 recognizes three parts to a request:
145
146 1. One line identifying the request type and path
147 2. An optional set of RFC-822-style headers
148 3. An optional data part
149
150 The headers and data are separated by a blank line.
151
152 The first line of the request has the form
153
154 <command> <path> <version>
155
156 where <command> is a (case-sensitive) keyword such as GET or POST,
157 <path> is a string containing path information for the request,
158 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
159 <path> is encoded using the URL encoding scheme (using %xx to signify
160 the ASCII character with hex code xx).
161
162 The specification specifies that lines are separated by CRLF but
163 for compatibility with the widest range of clients recommends
164 servers also handle LF. Similarly, whitespace in the request line
165 is treated sensibly (allowing multiple spaces between components
166 and allowing trailing whitespace).
167
168 Similarly, for output, lines ought to be separated by CRLF pairs
169 but most clients grok LF characters just fine.
170
171 If the first line of the request has the form
172
173 <command> <path>
174
175 (i.e. <version> is left out) then this is assumed to be an HTTP
176 0.9 request; this form has no optional headers and data part and
177 the reply consists of just the data.
178
179 The reply form of the HTTP 1.x protocol again has three parts:
180
181 1. One line giving the response code
182 2. An optional set of RFC-822-style headers
183 3. The data
184
185 Again, the headers and data are separated by a blank line.
186
187 The response code line has the form
188
189 <version> <responsecode> <responsestring>
190
191 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
192 <responsecode> is a 3-digit response code indicating success or
193 failure of the request, and <responsestring> is an optional
194 human-readable string explaining what the response code means.
195
196 This server parses the request and the headers, and then calls a
197 function specific to the request type (<command>). Specifically,
198 a request SPAM will be handled by a method do_SPAM(). If no
199 such method exists the server sends an error response to the
200 client. If it exists, it is called with no arguments:
201
202 do_SPAM()
203
204 Note that the request name is case sensitive (i.e. SPAM and spam
205 are different requests).
206
207 The various request details are stored in instance variables:
208
209 - client_address is the client IP address in the form (host,
210 port);
211
212 - command, path and version are the broken-down request line;
213
Barry Warsaw820c1202008-06-12 04:06:45 +0000214 - headers is an instance of email.message.Message (or a derived
Georg Brandl24420152008-05-26 16:32:26 +0000215 class) containing the header information;
216
217 - rfile is a file object open for reading positioned at the
218 start of the optional input data part;
219
220 - wfile is a file object open for writing.
221
222 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
223
224 The first thing to be written must be the response line. Then
225 follow 0 or more header lines, then a blank line, and then the
226 actual data (if any). The meaning of the header lines depends on
227 the command executed by the server; in most cases, when data is
228 returned, there should be at least one header line of the form
229
230 Content-type: <type>/<subtype>
231
232 where <type> and <subtype> should be registered MIME types,
233 e.g. "text/html" or "text/plain".
234
235 """
236
237 # The Python system version, truncated to its first component.
238 sys_version = "Python/" + sys.version.split()[0]
239
240 # The server software version. You may want to override this.
241 # The format is multiple whitespace-separated strings,
242 # where each string is of the form name[/version].
243 server_version = "BaseHTTP/" + __version__
244
245 error_message_format = DEFAULT_ERROR_MESSAGE
246 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
247
248 # The default request version. This only affects responses up until
249 # the point where the request line is parsed, so it mainly decides what
250 # the client gets back when sending a malformed request line.
251 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
252 default_request_version = "HTTP/0.9"
253
254 def parse_request(self):
255 """Parse a request (internal).
256
257 The request should be stored in self.raw_requestline; the results
258 are in self.command, self.path, self.request_version and
259 self.headers.
260
261 Return True for success, False for failure; on failure, an
262 error is sent back.
263
264 """
265 self.command = None # set in case of error on the first line
266 self.request_version = version = self.default_request_version
267 self.close_connection = 1
268 requestline = str(self.raw_requestline, 'iso-8859-1')
269 if requestline[-2:] == '\r\n':
270 requestline = requestline[:-2]
271 elif requestline[-1:] == '\n':
272 requestline = requestline[:-1]
273 self.requestline = requestline
274 words = requestline.split()
275 if len(words) == 3:
276 [command, path, version] = words
277 if version[:5] != 'HTTP/':
278 self.send_error(400, "Bad request version (%r)" % version)
279 return False
280 try:
281 base_version_number = version.split('/', 1)[1]
282 version_number = base_version_number.split(".")
283 # RFC 2145 section 3.1 says there can be only one "." and
284 # - major and minor numbers MUST be treated as
285 # separate integers;
286 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
287 # turn is lower than HTTP/12.3;
288 # - Leading zeros MUST be ignored by recipients.
289 if len(version_number) != 2:
290 raise ValueError
291 version_number = int(version_number[0]), int(version_number[1])
292 except (ValueError, IndexError):
293 self.send_error(400, "Bad request version (%r)" % version)
294 return False
295 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
296 self.close_connection = 0
297 if version_number >= (2, 0):
298 self.send_error(505,
299 "Invalid HTTP Version (%s)" % base_version_number)
300 return False
301 elif len(words) == 2:
302 [command, path] = words
303 self.close_connection = 1
304 if command != 'GET':
305 self.send_error(400,
306 "Bad HTTP/0.9 request type (%r)" % command)
307 return False
308 elif not words:
309 return False
310 else:
311 self.send_error(400, "Bad request syntax (%r)" % requestline)
312 return False
313 self.command, self.path, self.request_version = command, path, version
314
315 # Examine the headers and look for a Connection directive.
Jeremy Hylton98eb6c22009-03-27 18:31:36 +0000316 self.headers = http.client.parse_headers(self.rfile,
317 _class=self.MessageClass)
Georg Brandl24420152008-05-26 16:32:26 +0000318
319 conntype = self.headers.get('Connection', "")
320 if conntype.lower() == 'close':
321 self.close_connection = 1
322 elif (conntype.lower() == 'keep-alive' and
323 self.protocol_version >= "HTTP/1.1"):
324 self.close_connection = 0
325 return True
326
327 def handle_one_request(self):
328 """Handle a single HTTP request.
329
330 You normally don't need to override this method; see the class
331 __doc__ string for information on how to handle specific HTTP
332 commands such as GET and POST.
333
334 """
Kristján Valur Jónsson985fc6a2009-07-01 10:01:31 +0000335 try:
336 self.raw_requestline = self.rfile.readline()
337 if not self.raw_requestline:
338 self.close_connection = 1
339 return
340 if not self.parse_request():
341 # An error code has been sent, just exit
342 return
343 mname = 'do_' + self.command
344 if not hasattr(self, mname):
345 self.send_error(501, "Unsupported method (%r)" % self.command)
346 return
347 method = getattr(self, mname)
348 method()
349 self.wfile.flush() #actually send the response if not already done.
350 except socket.timeout as e:
351 #a read or a write timed out. Discard this connection
352 self.log_error("Request timed out: %r", e)
Georg Brandl24420152008-05-26 16:32:26 +0000353 self.close_connection = 1
354 return
Georg Brandl24420152008-05-26 16:32:26 +0000355
356 def handle(self):
357 """Handle multiple requests if necessary."""
358 self.close_connection = 1
359
360 self.handle_one_request()
361 while not self.close_connection:
362 self.handle_one_request()
363
364 def send_error(self, code, message=None):
365 """Send and log an error reply.
366
367 Arguments are the error code, and a detailed message.
368 The detailed message defaults to the short entry matching the
369 response code.
370
371 This sends an error response (so it must be called before any
372 output has been generated), logs the error, and finally sends
373 a piece of HTML explaining the error to the user.
374
375 """
376
377 try:
378 shortmsg, longmsg = self.responses[code]
379 except KeyError:
380 shortmsg, longmsg = '???', '???'
381 if message is None:
382 message = shortmsg
383 explain = longmsg
384 self.log_error("code %d, message %s", code, message)
385 # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
386 content = (self.error_message_format %
387 {'code': code, 'message': _quote_html(message), 'explain': explain})
388 self.send_response(code, message)
389 self.send_header("Content-Type", self.error_content_type)
390 self.send_header('Connection', 'close')
391 self.end_headers()
392 if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
393 self.wfile.write(content.encode('UTF-8', 'replace'))
394
395 def send_response(self, code, message=None):
396 """Send the response header and log the response code.
397
398 Also send two standard headers with the server software
399 version and the current date.
400
401 """
402 self.log_request(code)
403 if message is None:
404 if code in self.responses:
405 message = self.responses[code][0]
406 else:
407 message = ''
408 if self.request_version != 'HTTP/0.9':
409 self.wfile.write(("%s %d %s\r\n" %
410 (self.protocol_version, code, message)).encode('ASCII', 'strict'))
411 # print (self.protocol_version, code, message)
412 self.send_header('Server', self.version_string())
413 self.send_header('Date', self.date_time_string())
414
415 def send_header(self, keyword, value):
416 """Send a MIME header."""
417 if self.request_version != 'HTTP/0.9':
418 self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict'))
419
420 if keyword.lower() == 'connection':
421 if value.lower() == 'close':
422 self.close_connection = 1
423 elif value.lower() == 'keep-alive':
424 self.close_connection = 0
425
426 def end_headers(self):
427 """Send the blank line ending the MIME headers."""
428 if self.request_version != 'HTTP/0.9':
429 self.wfile.write(b"\r\n")
430
431 def log_request(self, code='-', size='-'):
432 """Log an accepted request.
433
434 This is called by send_response().
435
436 """
437
438 self.log_message('"%s" %s %s',
439 self.requestline, str(code), str(size))
440
441 def log_error(self, format, *args):
442 """Log an error.
443
444 This is called when a request cannot be fulfilled. By
445 default it passes the message on to log_message().
446
447 Arguments are the same as for log_message().
448
449 XXX This should go to the separate error log.
450
451 """
452
453 self.log_message(format, *args)
454
455 def log_message(self, format, *args):
456 """Log an arbitrary message.
457
458 This is used by all other logging functions. Override
459 it if you have specific logging wishes.
460
461 The first argument, FORMAT, is a format string for the
462 message to be logged. If the format string contains
463 any % escapes requiring parameters, they should be
464 specified as subsequent arguments (it's just like
465 printf!).
466
467 The client host and current date/time are prefixed to
468 every message.
469
470 """
471
472 sys.stderr.write("%s - - [%s] %s\n" %
473 (self.address_string(),
474 self.log_date_time_string(),
475 format%args))
476
477 def version_string(self):
478 """Return the server software version string."""
479 return self.server_version + ' ' + self.sys_version
480
481 def date_time_string(self, timestamp=None):
482 """Return the current date and time formatted for a message header."""
483 if timestamp is None:
484 timestamp = time.time()
485 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
486 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
487 self.weekdayname[wd],
488 day, self.monthname[month], year,
489 hh, mm, ss)
490 return s
491
492 def log_date_time_string(self):
493 """Return the current time formatted for logging."""
494 now = time.time()
495 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
496 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
497 day, self.monthname[month], year, hh, mm, ss)
498 return s
499
500 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
501
502 monthname = [None,
503 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
504 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
505
506 def address_string(self):
507 """Return the client address formatted for logging.
508
509 This version looks up the full hostname using gethostbyaddr(),
510 and tries to find a name that contains at least one dot.
511
512 """
513
514 host, port = self.client_address[:2]
515 return socket.getfqdn(host)
516
517 # Essentially static class variables
518
519 # The version of the HTTP protocol we support.
520 # Set this to HTTP/1.1 to enable automatic keepalive
521 protocol_version = "HTTP/1.0"
522
Barry Warsaw820c1202008-06-12 04:06:45 +0000523 # MessageClass used to parse headers
Barry Warsaw820c1202008-06-12 04:06:45 +0000524 MessageClass = http.client.HTTPMessage
Georg Brandl24420152008-05-26 16:32:26 +0000525
526 # Table mapping response codes to messages; entries have the
527 # form {code: (shortmessage, longmessage)}.
528 # See RFC 2616.
529 responses = {
530 100: ('Continue', 'Request received, please continue'),
531 101: ('Switching Protocols',
532 'Switching to new protocol; obey Upgrade header'),
533
534 200: ('OK', 'Request fulfilled, document follows'),
535 201: ('Created', 'Document created, URL follows'),
536 202: ('Accepted',
537 'Request accepted, processing continues off-line'),
538 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
539 204: ('No Content', 'Request fulfilled, nothing follows'),
540 205: ('Reset Content', 'Clear input form for further input.'),
541 206: ('Partial Content', 'Partial content follows.'),
542
543 300: ('Multiple Choices',
544 'Object has several resources -- see URI list'),
545 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
546 302: ('Found', 'Object moved temporarily -- see URI list'),
547 303: ('See Other', 'Object moved -- see Method and URL list'),
548 304: ('Not Modified',
549 'Document has not changed since given time'),
550 305: ('Use Proxy',
551 'You must use proxy specified in Location to access this '
552 'resource.'),
553 307: ('Temporary Redirect',
554 'Object moved temporarily -- see URI list'),
555
556 400: ('Bad Request',
557 'Bad request syntax or unsupported method'),
558 401: ('Unauthorized',
559 'No permission -- see authorization schemes'),
560 402: ('Payment Required',
561 'No payment -- see charging schemes'),
562 403: ('Forbidden',
563 'Request forbidden -- authorization will not help'),
564 404: ('Not Found', 'Nothing matches the given URI'),
565 405: ('Method Not Allowed',
Senthil Kumaran7aa26212010-02-22 11:00:50 +0000566 'Specified method is invalid for this resource.'),
Georg Brandl24420152008-05-26 16:32:26 +0000567 406: ('Not Acceptable', 'URI not available in preferred format.'),
568 407: ('Proxy Authentication Required', 'You must authenticate with '
569 'this proxy before proceeding.'),
570 408: ('Request Timeout', 'Request timed out; try again later.'),
571 409: ('Conflict', 'Request conflict.'),
572 410: ('Gone',
573 'URI no longer exists and has been permanently removed.'),
574 411: ('Length Required', 'Client must specify Content-Length.'),
575 412: ('Precondition Failed', 'Precondition in headers is false.'),
576 413: ('Request Entity Too Large', 'Entity is too large.'),
577 414: ('Request-URI Too Long', 'URI is too long.'),
578 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
579 416: ('Requested Range Not Satisfiable',
580 'Cannot satisfy request range.'),
581 417: ('Expectation Failed',
582 'Expect condition could not be satisfied.'),
583
584 500: ('Internal Server Error', 'Server got itself in trouble'),
585 501: ('Not Implemented',
586 'Server does not support this operation'),
587 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
588 503: ('Service Unavailable',
589 'The server cannot process the request due to a high load'),
590 504: ('Gateway Timeout',
591 'The gateway server did not receive a timely response'),
592 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
593 }
594
595
596class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
597
598 """Simple HTTP request handler with GET and HEAD commands.
599
600 This serves files from the current directory and any of its
601 subdirectories. The MIME type for files is determined by
602 calling the .guess_type() method.
603
604 The GET and HEAD requests are identical except that the HEAD
605 request omits the actual contents of the file.
606
607 """
608
609 server_version = "SimpleHTTP/" + __version__
610
611 def do_GET(self):
612 """Serve a GET request."""
613 f = self.send_head()
614 if f:
615 self.copyfile(f, self.wfile)
616 f.close()
617
618 def do_HEAD(self):
619 """Serve a HEAD request."""
620 f = self.send_head()
621 if f:
622 f.close()
623
624 def send_head(self):
625 """Common code for GET and HEAD commands.
626
627 This sends the response code and MIME headers.
628
629 Return value is either a file object (which has to be copied
630 to the outputfile by the caller unless the command was HEAD,
631 and must be closed by the caller under all circumstances), or
632 None, in which case the caller has nothing further to do.
633
634 """
635 path = self.translate_path(self.path)
636 f = None
637 if os.path.isdir(path):
638 if not self.path.endswith('/'):
639 # redirect browser - doing basically what apache does
640 self.send_response(301)
641 self.send_header("Location", self.path + "/")
642 self.end_headers()
643 return None
644 for index in "index.html", "index.htm":
645 index = os.path.join(path, index)
646 if os.path.exists(index):
647 path = index
648 break
649 else:
650 return self.list_directory(path)
651 ctype = self.guess_type(path)
652 try:
653 f = open(path, 'rb')
654 except IOError:
655 self.send_error(404, "File not found")
656 return None
657 self.send_response(200)
658 self.send_header("Content-type", ctype)
659 fs = os.fstat(f.fileno())
660 self.send_header("Content-Length", str(fs[6]))
661 self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
662 self.end_headers()
663 return f
664
665 def list_directory(self, path):
666 """Helper to produce a directory listing (absent index.html).
667
668 Return value is either a file object, or None (indicating an
669 error). In either case, the headers are sent, making the
670 interface the same as for send_head().
671
672 """
673 try:
674 list = os.listdir(path)
675 except os.error:
676 self.send_error(404, "No permission to list directory")
677 return None
678 list.sort(key=lambda a: a.lower())
679 r = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000680 displaypath = cgi.escape(urllib.parse.unquote(self.path))
Georg Brandl24420152008-05-26 16:32:26 +0000681 r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
682 r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
683 r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
684 r.append("<hr>\n<ul>\n")
685 for name in list:
686 fullname = os.path.join(path, name)
687 displayname = linkname = name
688 # Append / for directories or @ for symbolic links
689 if os.path.isdir(fullname):
690 displayname = name + "/"
691 linkname = name + "/"
692 if os.path.islink(fullname):
693 displayname = name + "@"
694 # Note: a link to a directory displays with @ and links with /
695 r.append('<li><a href="%s">%s</a>\n'
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000696 % (urllib.parse.quote(linkname), cgi.escape(displayname)))
Georg Brandl24420152008-05-26 16:32:26 +0000697 r.append("</ul>\n<hr>\n</body>\n</html>\n")
698 enc = sys.getfilesystemencoding()
699 encoded = ''.join(r).encode(enc)
700 f = io.BytesIO()
701 f.write(encoded)
702 f.seek(0)
703 self.send_response(200)
704 self.send_header("Content-type", "text/html; charset=%s" % enc)
705 self.send_header("Content-Length", str(len(encoded)))
706 self.end_headers()
707 return f
708
709 def translate_path(self, path):
710 """Translate a /-separated PATH to the local filename syntax.
711
712 Components that mean special things to the local file system
713 (e.g. drive or directory names) are ignored. (XXX They should
714 probably be diagnosed.)
715
716 """
717 # abandon query parameters
718 path = path.split('?',1)[0]
719 path = path.split('#',1)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000720 path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl24420152008-05-26 16:32:26 +0000721 words = path.split('/')
722 words = filter(None, words)
723 path = os.getcwd()
724 for word in words:
725 drive, word = os.path.splitdrive(word)
726 head, word = os.path.split(word)
727 if word in (os.curdir, os.pardir): continue
728 path = os.path.join(path, word)
729 return path
730
731 def copyfile(self, source, outputfile):
732 """Copy all data between two file objects.
733
734 The SOURCE argument is a file object open for reading
735 (or anything with a read() method) and the DESTINATION
736 argument is a file object open for writing (or
737 anything with a write() method).
738
739 The only reason for overriding this would be to change
740 the block size or perhaps to replace newlines by CRLF
741 -- note however that this the default server uses this
742 to copy binary data as well.
743
744 """
745 shutil.copyfileobj(source, outputfile)
746
747 def guess_type(self, path):
748 """Guess the type of a file.
749
750 Argument is a PATH (a filename).
751
752 Return value is a string of the form type/subtype,
753 usable for a MIME Content-type header.
754
755 The default implementation looks the file's extension
756 up in the table self.extensions_map, using application/octet-stream
757 as a default; however it would be permissible (if
758 slow) to look inside the data to make a better guess.
759
760 """
761
762 base, ext = posixpath.splitext(path)
763 if ext in self.extensions_map:
764 return self.extensions_map[ext]
765 ext = ext.lower()
766 if ext in self.extensions_map:
767 return self.extensions_map[ext]
768 else:
769 return self.extensions_map['']
770
771 if not mimetypes.inited:
772 mimetypes.init() # try to read system mime.types
773 extensions_map = mimetypes.types_map.copy()
774 extensions_map.update({
775 '': 'application/octet-stream', # Default
776 '.py': 'text/plain',
777 '.c': 'text/plain',
778 '.h': 'text/plain',
779 })
780
781
782# Utilities for CGIHTTPRequestHandler
783
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000784# TODO(gregory.p.smith): Move this into an appropriate library.
785def _url_collapse_path_split(path):
786 """
787 Given a URL path, remove extra '/'s and '.' path elements and collapse
788 any '..' references.
789
790 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
791
792 Returns: A tuple of (head, tail) where tail is everything after the final /
793 and head is everything before it. Head will always start with a '/' and,
794 if it contains anything else, never have a trailing '/'.
795
796 Raises: IndexError if too many '..' occur within the path.
797 """
798 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
799 # path semantics rather than local operating system semantics.
800 path_parts = []
801 for part in path.split('/'):
802 if part == '.':
803 path_parts.append('')
804 else:
805 path_parts.append(part)
806 # Filter out blank non trailing parts before consuming the '..'.
807 path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
808 if path_parts:
809 tail_part = path_parts.pop()
810 else:
811 tail_part = ''
812 head_parts = []
813 for part in path_parts:
814 if part == '..':
815 head_parts.pop()
816 else:
817 head_parts.append(part)
818 if tail_part and tail_part == '..':
819 head_parts.pop()
820 tail_part = ''
821 return ('/' + '/'.join(head_parts), tail_part)
822
823
Georg Brandl24420152008-05-26 16:32:26 +0000824nobody = None
825
826def nobody_uid():
827 """Internal routine to get nobody's uid"""
828 global nobody
829 if nobody:
830 return nobody
831 try:
832 import pwd
833 except ImportError:
834 return -1
835 try:
836 nobody = pwd.getpwnam('nobody')[2]
837 except KeyError:
838 nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
839 return nobody
840
841
842def executable(path):
843 """Test for executable file."""
844 try:
845 st = os.stat(path)
846 except os.error:
847 return False
848 return st.st_mode & 0o111 != 0
849
850
851class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
852
853 """Complete HTTP server with GET, HEAD and POST commands.
854
855 GET and HEAD also support running CGI scripts.
856
857 The POST command is *only* implemented for CGI scripts.
858
859 """
860
861 # Determine platform specifics
862 have_fork = hasattr(os, 'fork')
Georg Brandl24420152008-05-26 16:32:26 +0000863
864 # Make rfile unbuffered -- we need to read one line and then pass
865 # the rest to a subprocess, so we can't use buffered input.
866 rbufsize = 0
867
868 def do_POST(self):
869 """Serve a POST request.
870
871 This is only implemented for CGI scripts.
872
873 """
874
875 if self.is_cgi():
876 self.run_cgi()
877 else:
878 self.send_error(501, "Can only POST to CGI scripts")
879
880 def send_head(self):
881 """Version of send_head that support CGI scripts"""
882 if self.is_cgi():
883 return self.run_cgi()
884 else:
885 return SimpleHTTPRequestHandler.send_head(self)
886
887 def is_cgi(self):
888 """Test whether self.path corresponds to a CGI script.
889
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000890 Returns True and updates the cgi_info attribute to the tuple
891 (dir, rest) if self.path requires running a CGI script.
892 Returns False otherwise.
Georg Brandl24420152008-05-26 16:32:26 +0000893
Benjamin Petersona7deeee2009-05-08 20:54:42 +0000894 If any exception is raised, the caller should assume that
895 self.path was rejected as invalid and act accordingly.
896
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000897 The default implementation tests whether the normalized url
898 path begins with one of the strings in self.cgi_directories
899 (and the next character is a '/' or the end of the string).
Georg Brandl24420152008-05-26 16:32:26 +0000900
901 """
902
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000903 splitpath = _url_collapse_path_split(self.path)
904 if splitpath[0] in self.cgi_directories:
905 self.cgi_info = splitpath
906 return True
Georg Brandl24420152008-05-26 16:32:26 +0000907 return False
908
909 cgi_directories = ['/cgi-bin', '/htbin']
910
911 def is_executable(self, path):
912 """Test whether argument path is an executable file."""
913 return executable(path)
914
915 def is_python(self, path):
916 """Test whether argument path is a Python script."""
917 head, tail = os.path.splitext(path)
918 return tail.lower() in (".py", ".pyw")
919
920 def run_cgi(self):
921 """Execute a CGI script."""
922 path = self.path
923 dir, rest = self.cgi_info
924
925 i = path.find('/', len(dir) + 1)
926 while i >= 0:
927 nextdir = path[:i]
928 nextrest = path[i+1:]
929
930 scriptdir = self.translate_path(nextdir)
931 if os.path.isdir(scriptdir):
932 dir, rest = nextdir, nextrest
933 i = path.find('/', len(dir) + 1)
934 else:
935 break
936
937 # find an explicit query string, if present.
938 i = rest.rfind('?')
939 if i >= 0:
940 rest, query = rest[:i], rest[i+1:]
941 else:
942 query = ''
943
944 # dissect the part after the directory name into a script name &
945 # a possible additional path, to be stored in PATH_INFO.
946 i = rest.find('/')
947 if i >= 0:
948 script, rest = rest[:i], rest[i:]
949 else:
950 script, rest = rest, ''
951
952 scriptname = dir + '/' + script
953 scriptfile = self.translate_path(scriptname)
954 if not os.path.exists(scriptfile):
955 self.send_error(404, "No such CGI script (%r)" % scriptname)
956 return
957 if not os.path.isfile(scriptfile):
958 self.send_error(403, "CGI script is not a plain file (%r)" %
959 scriptname)
960 return
961 ispy = self.is_python(scriptname)
962 if not ispy:
Georg Brandl24420152008-05-26 16:32:26 +0000963 if not self.is_executable(scriptfile):
964 self.send_error(403, "CGI script is not executable (%r)" %
965 scriptname)
966 return
967
968 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
969 # XXX Much of the following could be prepared ahead of time!
970 env = {}
971 env['SERVER_SOFTWARE'] = self.version_string()
972 env['SERVER_NAME'] = self.server.server_name
973 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
974 env['SERVER_PROTOCOL'] = self.protocol_version
975 env['SERVER_PORT'] = str(self.server.server_port)
976 env['REQUEST_METHOD'] = self.command
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000977 uqrest = urllib.parse.unquote(rest)
Georg Brandl24420152008-05-26 16:32:26 +0000978 env['PATH_INFO'] = uqrest
979 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
980 env['SCRIPT_NAME'] = scriptname
981 if query:
982 env['QUERY_STRING'] = query
983 host = self.address_string()
984 if host != self.client_address[0]:
985 env['REMOTE_HOST'] = host
986 env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw820c1202008-06-12 04:06:45 +0000987 authorization = self.headers.get("authorization")
Georg Brandl24420152008-05-26 16:32:26 +0000988 if authorization:
989 authorization = authorization.split()
990 if len(authorization) == 2:
991 import base64, binascii
992 env['AUTH_TYPE'] = authorization[0]
993 if authorization[0].lower() == "basic":
994 try:
995 authorization = authorization[1].encode('ascii')
Georg Brandl706824f2009-06-04 09:42:55 +0000996 authorization = base64.decodebytes(authorization).\
Georg Brandl24420152008-05-26 16:32:26 +0000997 decode('ascii')
998 except (binascii.Error, UnicodeError):
999 pass
1000 else:
1001 authorization = authorization.split(':')
1002 if len(authorization) == 2:
1003 env['REMOTE_USER'] = authorization[0]
1004 # XXX REMOTE_IDENT
Barry Warsaw820c1202008-06-12 04:06:45 +00001005 if self.headers.get('content-type') is None:
1006 env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl24420152008-05-26 16:32:26 +00001007 else:
Barry Warsaw820c1202008-06-12 04:06:45 +00001008 env['CONTENT_TYPE'] = self.headers['content-type']
1009 length = self.headers.get('content-length')
Georg Brandl24420152008-05-26 16:32:26 +00001010 if length:
1011 env['CONTENT_LENGTH'] = length
Barry Warsaw820c1202008-06-12 04:06:45 +00001012 referer = self.headers.get('referer')
Georg Brandl24420152008-05-26 16:32:26 +00001013 if referer:
1014 env['HTTP_REFERER'] = referer
1015 accept = []
1016 for line in self.headers.getallmatchingheaders('accept'):
1017 if line[:1] in "\t\n\r ":
1018 accept.append(line.strip())
1019 else:
1020 accept = accept + line[7:].split(',')
1021 env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw820c1202008-06-12 04:06:45 +00001022 ua = self.headers.get('user-agent')
Georg Brandl24420152008-05-26 16:32:26 +00001023 if ua:
1024 env['HTTP_USER_AGENT'] = ua
Barry Warsaw820c1202008-06-12 04:06:45 +00001025 co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl62e2ca22010-07-31 21:54:24 +00001026 cookie_str = ', '.join(co)
1027 if cookie_str:
1028 env['HTTP_COOKIE'] = cookie_str
Georg Brandl24420152008-05-26 16:32:26 +00001029 # XXX Other HTTP_* headers
1030 # Since we're setting the env in the parent, provide empty
1031 # values to override previously set values
1032 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1033 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1034 env.setdefault(k, "")
1035 os.environ.update(env)
1036
1037 self.send_response(200, "Script output follows")
1038
1039 decoded_query = query.replace('+', ' ')
1040
1041 if self.have_fork:
1042 # Unix -- fork as we should
1043 args = [script]
1044 if '=' not in decoded_query:
1045 args.append(decoded_query)
1046 nobody = nobody_uid()
1047 self.wfile.flush() # Always flush before forking
1048 pid = os.fork()
1049 if pid != 0:
1050 # Parent
1051 pid, sts = os.waitpid(pid, 0)
1052 # throw away additional data [see bug #427345]
1053 while select.select([self.rfile], [], [], 0)[0]:
1054 if not self.rfile.read(1):
1055 break
1056 if sts:
1057 self.log_error("CGI script exit status %#x", sts)
1058 return
1059 # Child
1060 try:
1061 try:
1062 os.setuid(nobody)
1063 except os.error:
1064 pass
1065 os.dup2(self.rfile.fileno(), 0)
1066 os.dup2(self.wfile.fileno(), 1)
1067 os.execve(scriptfile, args, os.environ)
1068 except:
1069 self.server.handle_error(self.request, self.client_address)
1070 os._exit(127)
1071
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001072 else:
1073 # Non-Unix -- use subprocess
1074 import subprocess
Senthil Kumarane29cd162009-11-11 04:17:53 +00001075 cmdline = [scriptfile]
Georg Brandl24420152008-05-26 16:32:26 +00001076 if self.is_python(scriptfile):
1077 interp = sys.executable
1078 if interp.lower().endswith("w.exe"):
1079 # On Windows, use python.exe, not pythonw.exe
1080 interp = interp[:-5] + interp[-4:]
Senthil Kumarane29cd162009-11-11 04:17:53 +00001081 cmdline = [interp, '-u'] + cmdline
1082 if '=' not in query:
1083 cmdline.append(query)
1084 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Georg Brandl24420152008-05-26 16:32:26 +00001085 try:
1086 nbytes = int(length)
1087 except (TypeError, ValueError):
1088 nbytes = 0
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001089 p = subprocess.Popen(cmdline,
1090 stdin=subprocess.PIPE,
1091 stdout=subprocess.PIPE,
Senthil Kumarane29cd162009-11-11 04:17:53 +00001092 stderr=subprocess.PIPE
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001093 )
Georg Brandl24420152008-05-26 16:32:26 +00001094 if self.command.lower() == "post" and nbytes > 0:
1095 data = self.rfile.read(nbytes)
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001096 else:
1097 data = None
Georg Brandl24420152008-05-26 16:32:26 +00001098 # throw away additional data [see bug #427345]
1099 while select.select([self.rfile._sock], [], [], 0)[0]:
1100 if not self.rfile._sock.recv(1):
1101 break
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001102 stdout, stderr = p.communicate(data)
1103 self.wfile.write(stdout)
1104 if stderr:
1105 self.log_error('%s', stderr)
1106 status = p.returncode
1107 if status:
1108 self.log_error("CGI script exit status %#x", status)
Georg Brandl24420152008-05-26 16:32:26 +00001109 else:
1110 self.log_message("CGI script exited OK")
1111
1112
1113def test(HandlerClass = BaseHTTPRequestHandler,
1114 ServerClass = HTTPServer, protocol="HTTP/1.0"):
1115 """Test the HTTP request handler class.
1116
1117 This runs an HTTP server on port 8000 (or the first command line
1118 argument).
1119
1120 """
1121
1122 if sys.argv[1:]:
1123 port = int(sys.argv[1])
1124 else:
1125 port = 8000
1126 server_address = ('', port)
1127
1128 HandlerClass.protocol_version = protocol
1129 httpd = ServerClass(server_address, HandlerClass)
1130
1131 sa = httpd.socket.getsockname()
1132 print("Serving HTTP on", sa[0], "port", sa[1], "...")
Alexandre Vassalottib5292a22009-04-03 07:16:55 +00001133 try:
1134 httpd.serve_forever()
1135 except KeyboardInterrupt:
1136 print("\nKeyboard interrupt received, exiting.")
1137 httpd.server_close()
1138 sys.exit(0)
Georg Brandl24420152008-05-26 16:32:26 +00001139
1140if __name__ == '__main__':
Georg Brandl24420152008-05-26 16:32:26 +00001141 test(HandlerClass=SimpleHTTPRequestHandler)