blob: 5ac6c0d204e3cf8429ec514b2d1328f0f757185a [file] [log] [blame]
Georg Brandl24420152008-05-26 16:32:26 +00001"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +000016subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl24420152008-05-26 16:32:26 +000017
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group T. Berners-Lee
38# INTERNET-DRAFT R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40# Expires September 8, 1995 March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group R. Fielding
47# Request for Comments: 2616 et al
48# Obsoletes: 2068 June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# | host: Either the DNS name or the IP number of the remote client
63# | rfc931: Any information returned by identd for this person,
64# | - otherwise.
65# | authuser: If user sent a userid for authentication, the user name,
66# | - otherwise.
67# | DD: Day
68# | Mon: Month (calendar name)
69# | YYYY: Year
70# | hh: hour (24-hour format, the machine's timezone)
71# | mm: minutes
72# | ss: seconds
73# | request: The first line of the HTTP request as sent by the client.
74# | ddd: the status code returned by the server, - if not available.
75# | bbbb: the total number of bytes sent,
76# | *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
86
Georg Brandl24420152008-05-26 16:32:26 +000087import cgi
Barry Warsaw820c1202008-06-12 04:06:45 +000088import email.message
89import email.parser
Jeremy Hylton914ab452009-03-27 17:16:06 +000090import http.client
91import io
92import mimetypes
93import os
94import posixpath
95import select
96import shutil
97import socket # For gethostbyaddr()
98import socketserver
99import sys
100import time
101import urllib.parse
Senthil Kumaran5e8826c2010-10-03 18:04:52 +0000102import copy
Georg Brandl24420152008-05-26 16:32:26 +0000103
104# Default error message template
105DEFAULT_ERROR_MESSAGE = """\
106<head>
107<title>Error response</title>
108</head>
109<body>
110<h1>Error response</h1>
111<p>Error code %(code)d.
112<p>Message: %(message)s.
113<p>Error code explanation: %(code)s = %(explain)s.
114</body>
115"""
116
117DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
118
119def _quote_html(html):
120 return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
121
122class HTTPServer(socketserver.TCPServer):
123
124 allow_reuse_address = 1 # Seems to make sense in testing environment
125
126 def server_bind(self):
127 """Override server_bind to store the server name."""
128 socketserver.TCPServer.server_bind(self)
129 host, port = self.socket.getsockname()[:2]
130 self.server_name = socket.getfqdn(host)
131 self.server_port = port
132
133
134class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
135
136 """HTTP request handler base class.
137
138 The following explanation of HTTP serves to guide you through the
139 code as well as to expose any misunderstandings I may have about
140 HTTP (so you don't need to read the code to figure out I'm wrong
141 :-).
142
143 HTTP (HyperText Transfer Protocol) is an extensible protocol on
144 top of a reliable stream transport (e.g. TCP/IP). The protocol
145 recognizes three parts to a request:
146
147 1. One line identifying the request type and path
148 2. An optional set of RFC-822-style headers
149 3. An optional data part
150
151 The headers and data are separated by a blank line.
152
153 The first line of the request has the form
154
155 <command> <path> <version>
156
157 where <command> is a (case-sensitive) keyword such as GET or POST,
158 <path> is a string containing path information for the request,
159 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
160 <path> is encoded using the URL encoding scheme (using %xx to signify
161 the ASCII character with hex code xx).
162
163 The specification specifies that lines are separated by CRLF but
164 for compatibility with the widest range of clients recommends
165 servers also handle LF. Similarly, whitespace in the request line
166 is treated sensibly (allowing multiple spaces between components
167 and allowing trailing whitespace).
168
169 Similarly, for output, lines ought to be separated by CRLF pairs
170 but most clients grok LF characters just fine.
171
172 If the first line of the request has the form
173
174 <command> <path>
175
176 (i.e. <version> is left out) then this is assumed to be an HTTP
177 0.9 request; this form has no optional headers and data part and
178 the reply consists of just the data.
179
180 The reply form of the HTTP 1.x protocol again has three parts:
181
182 1. One line giving the response code
183 2. An optional set of RFC-822-style headers
184 3. The data
185
186 Again, the headers and data are separated by a blank line.
187
188 The response code line has the form
189
190 <version> <responsecode> <responsestring>
191
192 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
193 <responsecode> is a 3-digit response code indicating success or
194 failure of the request, and <responsestring> is an optional
195 human-readable string explaining what the response code means.
196
197 This server parses the request and the headers, and then calls a
198 function specific to the request type (<command>). Specifically,
199 a request SPAM will be handled by a method do_SPAM(). If no
200 such method exists the server sends an error response to the
201 client. If it exists, it is called with no arguments:
202
203 do_SPAM()
204
205 Note that the request name is case sensitive (i.e. SPAM and spam
206 are different requests).
207
208 The various request details are stored in instance variables:
209
210 - client_address is the client IP address in the form (host,
211 port);
212
213 - command, path and version are the broken-down request line;
214
Barry Warsaw820c1202008-06-12 04:06:45 +0000215 - headers is an instance of email.message.Message (or a derived
Georg Brandl24420152008-05-26 16:32:26 +0000216 class) containing the header information;
217
218 - rfile is a file object open for reading positioned at the
219 start of the optional input data part;
220
221 - wfile is a file object open for writing.
222
223 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
224
225 The first thing to be written must be the response line. Then
226 follow 0 or more header lines, then a blank line, and then the
227 actual data (if any). The meaning of the header lines depends on
228 the command executed by the server; in most cases, when data is
229 returned, there should be at least one header line of the form
230
231 Content-type: <type>/<subtype>
232
233 where <type> and <subtype> should be registered MIME types,
234 e.g. "text/html" or "text/plain".
235
236 """
237
238 # The Python system version, truncated to its first component.
239 sys_version = "Python/" + sys.version.split()[0]
240
241 # The server software version. You may want to override this.
242 # The format is multiple whitespace-separated strings,
243 # where each string is of the form name[/version].
244 server_version = "BaseHTTP/" + __version__
245
246 error_message_format = DEFAULT_ERROR_MESSAGE
247 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
248
249 # The default request version. This only affects responses up until
250 # the point where the request line is parsed, so it mainly decides what
251 # the client gets back when sending a malformed request line.
252 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
253 default_request_version = "HTTP/0.9"
254
255 def parse_request(self):
256 """Parse a request (internal).
257
258 The request should be stored in self.raw_requestline; the results
259 are in self.command, self.path, self.request_version and
260 self.headers.
261
262 Return True for success, False for failure; on failure, an
263 error is sent back.
264
265 """
266 self.command = None # set in case of error on the first line
267 self.request_version = version = self.default_request_version
268 self.close_connection = 1
269 requestline = str(self.raw_requestline, 'iso-8859-1')
270 if requestline[-2:] == '\r\n':
271 requestline = requestline[:-2]
272 elif requestline[-1:] == '\n':
273 requestline = requestline[:-1]
274 self.requestline = requestline
275 words = requestline.split()
276 if len(words) == 3:
277 [command, path, version] = words
278 if version[:5] != 'HTTP/':
279 self.send_error(400, "Bad request version (%r)" % version)
280 return False
281 try:
282 base_version_number = version.split('/', 1)[1]
283 version_number = base_version_number.split(".")
284 # RFC 2145 section 3.1 says there can be only one "." and
285 # - major and minor numbers MUST be treated as
286 # separate integers;
287 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
288 # turn is lower than HTTP/12.3;
289 # - Leading zeros MUST be ignored by recipients.
290 if len(version_number) != 2:
291 raise ValueError
292 version_number = int(version_number[0]), int(version_number[1])
293 except (ValueError, IndexError):
294 self.send_error(400, "Bad request version (%r)" % version)
295 return False
296 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
297 self.close_connection = 0
298 if version_number >= (2, 0):
299 self.send_error(505,
300 "Invalid HTTP Version (%s)" % base_version_number)
301 return False
302 elif len(words) == 2:
303 [command, path] = words
304 self.close_connection = 1
305 if command != 'GET':
306 self.send_error(400,
307 "Bad HTTP/0.9 request type (%r)" % command)
308 return False
309 elif not words:
310 return False
311 else:
312 self.send_error(400, "Bad request syntax (%r)" % requestline)
313 return False
314 self.command, self.path, self.request_version = command, path, version
315
316 # Examine the headers and look for a Connection directive.
Jeremy Hylton98eb6c22009-03-27 18:31:36 +0000317 self.headers = http.client.parse_headers(self.rfile,
318 _class=self.MessageClass)
Georg Brandl24420152008-05-26 16:32:26 +0000319
320 conntype = self.headers.get('Connection', "")
321 if conntype.lower() == 'close':
322 self.close_connection = 1
323 elif (conntype.lower() == 'keep-alive' and
324 self.protocol_version >= "HTTP/1.1"):
325 self.close_connection = 0
326 return True
327
328 def handle_one_request(self):
329 """Handle a single HTTP request.
330
331 You normally don't need to override this method; see the class
332 __doc__ string for information on how to handle specific HTTP
333 commands such as GET and POST.
334
335 """
Antoine Pitrou3022ce12010-12-16 17:03:16 +0000336 self.raw_requestline = self.rfile.readline(65537)
337 if len(self.raw_requestline) > 65536:
338 self.requestline = ''
339 self.request_version = ''
340 self.command = ''
341 self.send_error(414)
342 return
Georg Brandl24420152008-05-26 16:32:26 +0000343 if not self.raw_requestline:
344 self.close_connection = 1
345 return
346 if not self.parse_request(): # An error code has been sent, just exit
347 return
348 mname = 'do_' + self.command
349 if not hasattr(self, mname):
350 self.send_error(501, "Unsupported method (%r)" % self.command)
351 return
352 method = getattr(self, mname)
353 method()
354
355 def handle(self):
356 """Handle multiple requests if necessary."""
357 self.close_connection = 1
358
359 self.handle_one_request()
360 while not self.close_connection:
361 self.handle_one_request()
362
363 def send_error(self, code, message=None):
364 """Send and log an error reply.
365
366 Arguments are the error code, and a detailed message.
367 The detailed message defaults to the short entry matching the
368 response code.
369
370 This sends an error response (so it must be called before any
371 output has been generated), logs the error, and finally sends
372 a piece of HTML explaining the error to the user.
373
374 """
375
376 try:
377 shortmsg, longmsg = self.responses[code]
378 except KeyError:
379 shortmsg, longmsg = '???', '???'
380 if message is None:
381 message = shortmsg
382 explain = longmsg
383 self.log_error("code %d, message %s", code, message)
384 # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
385 content = (self.error_message_format %
386 {'code': code, 'message': _quote_html(message), 'explain': explain})
387 self.send_response(code, message)
388 self.send_header("Content-Type", self.error_content_type)
389 self.send_header('Connection', 'close')
390 self.end_headers()
391 if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
392 self.wfile.write(content.encode('UTF-8', 'replace'))
393
394 def send_response(self, code, message=None):
395 """Send the response header and log the response code.
396
397 Also send two standard headers with the server software
398 version and the current date.
399
400 """
401 self.log_request(code)
402 if message is None:
403 if code in self.responses:
404 message = self.responses[code][0]
405 else:
406 message = ''
407 if self.request_version != 'HTTP/0.9':
408 self.wfile.write(("%s %d %s\r\n" %
409 (self.protocol_version, code, message)).encode('ASCII', 'strict'))
410 # print (self.protocol_version, code, message)
411 self.send_header('Server', self.version_string())
412 self.send_header('Date', self.date_time_string())
413
414 def send_header(self, keyword, value):
415 """Send a MIME header."""
416 if self.request_version != 'HTTP/0.9':
417 self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict'))
418
419 if keyword.lower() == 'connection':
420 if value.lower() == 'close':
421 self.close_connection = 1
422 elif value.lower() == 'keep-alive':
423 self.close_connection = 0
424
425 def end_headers(self):
426 """Send the blank line ending the MIME headers."""
427 if self.request_version != 'HTTP/0.9':
428 self.wfile.write(b"\r\n")
429
430 def log_request(self, code='-', size='-'):
431 """Log an accepted request.
432
433 This is called by send_response().
434
435 """
436
437 self.log_message('"%s" %s %s',
438 self.requestline, str(code), str(size))
439
440 def log_error(self, format, *args):
441 """Log an error.
442
443 This is called when a request cannot be fulfilled. By
444 default it passes the message on to log_message().
445
446 Arguments are the same as for log_message().
447
448 XXX This should go to the separate error log.
449
450 """
451
452 self.log_message(format, *args)
453
454 def log_message(self, format, *args):
455 """Log an arbitrary message.
456
457 This is used by all other logging functions. Override
458 it if you have specific logging wishes.
459
460 The first argument, FORMAT, is a format string for the
461 message to be logged. If the format string contains
462 any % escapes requiring parameters, they should be
463 specified as subsequent arguments (it's just like
464 printf!).
465
466 The client host and current date/time are prefixed to
467 every message.
468
469 """
470
471 sys.stderr.write("%s - - [%s] %s\n" %
472 (self.address_string(),
473 self.log_date_time_string(),
474 format%args))
475
476 def version_string(self):
477 """Return the server software version string."""
478 return self.server_version + ' ' + self.sys_version
479
480 def date_time_string(self, timestamp=None):
481 """Return the current date and time formatted for a message header."""
482 if timestamp is None:
483 timestamp = time.time()
484 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
485 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
486 self.weekdayname[wd],
487 day, self.monthname[month], year,
488 hh, mm, ss)
489 return s
490
491 def log_date_time_string(self):
492 """Return the current time formatted for logging."""
493 now = time.time()
494 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
495 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
496 day, self.monthname[month], year, hh, mm, ss)
497 return s
498
499 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
500
501 monthname = [None,
502 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
503 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
504
505 def address_string(self):
506 """Return the client address formatted for logging.
507
508 This version looks up the full hostname using gethostbyaddr(),
509 and tries to find a name that contains at least one dot.
510
511 """
512
513 host, port = self.client_address[:2]
514 return socket.getfqdn(host)
515
516 # Essentially static class variables
517
518 # The version of the HTTP protocol we support.
519 # Set this to HTTP/1.1 to enable automatic keepalive
520 protocol_version = "HTTP/1.0"
521
Barry Warsaw820c1202008-06-12 04:06:45 +0000522 # MessageClass used to parse headers
Barry Warsaw820c1202008-06-12 04:06:45 +0000523 MessageClass = http.client.HTTPMessage
Georg Brandl24420152008-05-26 16:32:26 +0000524
525 # Table mapping response codes to messages; entries have the
526 # form {code: (shortmessage, longmessage)}.
527 # See RFC 2616.
528 responses = {
529 100: ('Continue', 'Request received, please continue'),
530 101: ('Switching Protocols',
531 'Switching to new protocol; obey Upgrade header'),
532
533 200: ('OK', 'Request fulfilled, document follows'),
534 201: ('Created', 'Document created, URL follows'),
535 202: ('Accepted',
536 'Request accepted, processing continues off-line'),
537 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
538 204: ('No Content', 'Request fulfilled, nothing follows'),
539 205: ('Reset Content', 'Clear input form for further input.'),
540 206: ('Partial Content', 'Partial content follows.'),
541
542 300: ('Multiple Choices',
543 'Object has several resources -- see URI list'),
544 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
545 302: ('Found', 'Object moved temporarily -- see URI list'),
546 303: ('See Other', 'Object moved -- see Method and URL list'),
547 304: ('Not Modified',
548 'Document has not changed since given time'),
549 305: ('Use Proxy',
550 'You must use proxy specified in Location to access this '
551 'resource.'),
552 307: ('Temporary Redirect',
553 'Object moved temporarily -- see URI list'),
554
555 400: ('Bad Request',
556 'Bad request syntax or unsupported method'),
557 401: ('Unauthorized',
558 'No permission -- see authorization schemes'),
559 402: ('Payment Required',
560 'No payment -- see charging schemes'),
561 403: ('Forbidden',
562 'Request forbidden -- authorization will not help'),
563 404: ('Not Found', 'Nothing matches the given URI'),
564 405: ('Method Not Allowed',
Senthil Kumaran613c61c2010-02-22 11:02:53 +0000565 'Specified method is invalid for this resource.'),
Georg Brandl24420152008-05-26 16:32:26 +0000566 406: ('Not Acceptable', 'URI not available in preferred format.'),
567 407: ('Proxy Authentication Required', 'You must authenticate with '
568 'this proxy before proceeding.'),
569 408: ('Request Timeout', 'Request timed out; try again later.'),
570 409: ('Conflict', 'Request conflict.'),
571 410: ('Gone',
572 'URI no longer exists and has been permanently removed.'),
573 411: ('Length Required', 'Client must specify Content-Length.'),
574 412: ('Precondition Failed', 'Precondition in headers is false.'),
575 413: ('Request Entity Too Large', 'Entity is too large.'),
576 414: ('Request-URI Too Long', 'URI is too long.'),
577 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
578 416: ('Requested Range Not Satisfiable',
579 'Cannot satisfy request range.'),
580 417: ('Expectation Failed',
581 'Expect condition could not be satisfied.'),
582
583 500: ('Internal Server Error', 'Server got itself in trouble'),
584 501: ('Not Implemented',
585 'Server does not support this operation'),
586 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
587 503: ('Service Unavailable',
588 'The server cannot process the request due to a high load'),
589 504: ('Gateway Timeout',
590 'The gateway server did not receive a timely response'),
591 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
592 }
593
594
595class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
596
597 """Simple HTTP request handler with GET and HEAD commands.
598
599 This serves files from the current directory and any of its
600 subdirectories. The MIME type for files is determined by
601 calling the .guess_type() method.
602
603 The GET and HEAD requests are identical except that the HEAD
604 request omits the actual contents of the file.
605
606 """
607
608 server_version = "SimpleHTTP/" + __version__
609
610 def do_GET(self):
611 """Serve a GET request."""
612 f = self.send_head()
613 if f:
614 self.copyfile(f, self.wfile)
615 f.close()
616
617 def do_HEAD(self):
618 """Serve a HEAD request."""
619 f = self.send_head()
620 if f:
621 f.close()
622
623 def send_head(self):
624 """Common code for GET and HEAD commands.
625
626 This sends the response code and MIME headers.
627
628 Return value is either a file object (which has to be copied
629 to the outputfile by the caller unless the command was HEAD,
630 and must be closed by the caller under all circumstances), or
631 None, in which case the caller has nothing further to do.
632
633 """
634 path = self.translate_path(self.path)
635 f = None
636 if os.path.isdir(path):
637 if not self.path.endswith('/'):
638 # redirect browser - doing basically what apache does
639 self.send_response(301)
640 self.send_header("Location", self.path + "/")
641 self.end_headers()
642 return None
643 for index in "index.html", "index.htm":
644 index = os.path.join(path, index)
645 if os.path.exists(index):
646 path = index
647 break
648 else:
649 return self.list_directory(path)
650 ctype = self.guess_type(path)
651 try:
652 f = open(path, 'rb')
653 except IOError:
654 self.send_error(404, "File not found")
655 return None
656 self.send_response(200)
657 self.send_header("Content-type", ctype)
658 fs = os.fstat(f.fileno())
659 self.send_header("Content-Length", str(fs[6]))
660 self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
661 self.end_headers()
662 return f
663
664 def list_directory(self, path):
665 """Helper to produce a directory listing (absent index.html).
666
667 Return value is either a file object, or None (indicating an
668 error). In either case, the headers are sent, making the
669 interface the same as for send_head().
670
671 """
672 try:
673 list = os.listdir(path)
674 except os.error:
675 self.send_error(404, "No permission to list directory")
676 return None
677 list.sort(key=lambda a: a.lower())
678 r = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000679 displaypath = cgi.escape(urllib.parse.unquote(self.path))
Georg Brandl24420152008-05-26 16:32:26 +0000680 r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
681 r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
682 r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
683 r.append("<hr>\n<ul>\n")
684 for name in list:
685 fullname = os.path.join(path, name)
686 displayname = linkname = name
687 # Append / for directories or @ for symbolic links
688 if os.path.isdir(fullname):
689 displayname = name + "/"
690 linkname = name + "/"
691 if os.path.islink(fullname):
692 displayname = name + "@"
693 # Note: a link to a directory displays with @ and links with /
694 r.append('<li><a href="%s">%s</a>\n'
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000695 % (urllib.parse.quote(linkname), cgi.escape(displayname)))
Georg Brandl24420152008-05-26 16:32:26 +0000696 r.append("</ul>\n<hr>\n</body>\n</html>\n")
697 enc = sys.getfilesystemencoding()
698 encoded = ''.join(r).encode(enc)
699 f = io.BytesIO()
700 f.write(encoded)
701 f.seek(0)
702 self.send_response(200)
703 self.send_header("Content-type", "text/html; charset=%s" % enc)
704 self.send_header("Content-Length", str(len(encoded)))
705 self.end_headers()
706 return f
707
708 def translate_path(self, path):
709 """Translate a /-separated PATH to the local filename syntax.
710
711 Components that mean special things to the local file system
712 (e.g. drive or directory names) are ignored. (XXX They should
713 probably be diagnosed.)
714
715 """
716 # abandon query parameters
717 path = path.split('?',1)[0]
718 path = path.split('#',1)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000719 path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl24420152008-05-26 16:32:26 +0000720 words = path.split('/')
721 words = filter(None, words)
722 path = os.getcwd()
723 for word in words:
724 drive, word = os.path.splitdrive(word)
725 head, word = os.path.split(word)
726 if word in (os.curdir, os.pardir): continue
727 path = os.path.join(path, word)
728 return path
729
730 def copyfile(self, source, outputfile):
731 """Copy all data between two file objects.
732
733 The SOURCE argument is a file object open for reading
734 (or anything with a read() method) and the DESTINATION
735 argument is a file object open for writing (or
736 anything with a write() method).
737
738 The only reason for overriding this would be to change
739 the block size or perhaps to replace newlines by CRLF
740 -- note however that this the default server uses this
741 to copy binary data as well.
742
743 """
744 shutil.copyfileobj(source, outputfile)
745
746 def guess_type(self, path):
747 """Guess the type of a file.
748
749 Argument is a PATH (a filename).
750
751 Return value is a string of the form type/subtype,
752 usable for a MIME Content-type header.
753
754 The default implementation looks the file's extension
755 up in the table self.extensions_map, using application/octet-stream
756 as a default; however it would be permissible (if
757 slow) to look inside the data to make a better guess.
758
759 """
760
761 base, ext = posixpath.splitext(path)
762 if ext in self.extensions_map:
763 return self.extensions_map[ext]
764 ext = ext.lower()
765 if ext in self.extensions_map:
766 return self.extensions_map[ext]
767 else:
768 return self.extensions_map['']
769
770 if not mimetypes.inited:
771 mimetypes.init() # try to read system mime.types
772 extensions_map = mimetypes.types_map.copy()
773 extensions_map.update({
774 '': 'application/octet-stream', # Default
775 '.py': 'text/plain',
776 '.c': 'text/plain',
777 '.h': 'text/plain',
778 })
779
780
781# Utilities for CGIHTTPRequestHandler
782
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000783# TODO(gregory.p.smith): Move this into an appropriate library.
784def _url_collapse_path_split(path):
785 """
786 Given a URL path, remove extra '/'s and '.' path elements and collapse
787 any '..' references.
788
789 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
790
791 Returns: A tuple of (head, tail) where tail is everything after the final /
792 and head is everything before it. Head will always start with a '/' and,
793 if it contains anything else, never have a trailing '/'.
794
795 Raises: IndexError if too many '..' occur within the path.
796 """
797 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
798 # path semantics rather than local operating system semantics.
799 path_parts = []
800 for part in path.split('/'):
801 if part == '.':
802 path_parts.append('')
803 else:
804 path_parts.append(part)
805 # Filter out blank non trailing parts before consuming the '..'.
806 path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
807 if path_parts:
808 tail_part = path_parts.pop()
809 else:
810 tail_part = ''
811 head_parts = []
812 for part in path_parts:
813 if part == '..':
814 head_parts.pop()
815 else:
816 head_parts.append(part)
817 if tail_part and tail_part == '..':
818 head_parts.pop()
819 tail_part = ''
820 return ('/' + '/'.join(head_parts), tail_part)
821
822
Georg Brandl24420152008-05-26 16:32:26 +0000823nobody = None
824
825def nobody_uid():
826 """Internal routine to get nobody's uid"""
827 global nobody
828 if nobody:
829 return nobody
830 try:
831 import pwd
832 except ImportError:
833 return -1
834 try:
835 nobody = pwd.getpwnam('nobody')[2]
836 except KeyError:
837 nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
838 return nobody
839
840
841def executable(path):
842 """Test for executable file."""
843 try:
844 st = os.stat(path)
845 except os.error:
846 return False
847 return st.st_mode & 0o111 != 0
848
849
850class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
851
852 """Complete HTTP server with GET, HEAD and POST commands.
853
854 GET and HEAD also support running CGI scripts.
855
856 The POST command is *only* implemented for CGI scripts.
857
858 """
859
860 # Determine platform specifics
861 have_fork = hasattr(os, 'fork')
Georg Brandl24420152008-05-26 16:32:26 +0000862
863 # Make rfile unbuffered -- we need to read one line and then pass
864 # the rest to a subprocess, so we can't use buffered input.
865 rbufsize = 0
866
867 def do_POST(self):
868 """Serve a POST request.
869
870 This is only implemented for CGI scripts.
871
872 """
873
874 if self.is_cgi():
875 self.run_cgi()
876 else:
877 self.send_error(501, "Can only POST to CGI scripts")
878
879 def send_head(self):
880 """Version of send_head that support CGI scripts"""
881 if self.is_cgi():
882 return self.run_cgi()
883 else:
884 return SimpleHTTPRequestHandler.send_head(self)
885
886 def is_cgi(self):
887 """Test whether self.path corresponds to a CGI script.
888
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000889 Returns True and updates the cgi_info attribute to the tuple
890 (dir, rest) if self.path requires running a CGI script.
891 Returns False otherwise.
Georg Brandl24420152008-05-26 16:32:26 +0000892
Benjamin Petersona7deeee2009-05-08 20:54:42 +0000893 If any exception is raised, the caller should assume that
894 self.path was rejected as invalid and act accordingly.
895
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000896 The default implementation tests whether the normalized url
897 path begins with one of the strings in self.cgi_directories
898 (and the next character is a '/' or the end of the string).
Georg Brandl24420152008-05-26 16:32:26 +0000899
900 """
901
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000902 splitpath = _url_collapse_path_split(self.path)
903 if splitpath[0] in self.cgi_directories:
904 self.cgi_info = splitpath
905 return True
Georg Brandl24420152008-05-26 16:32:26 +0000906 return False
907
908 cgi_directories = ['/cgi-bin', '/htbin']
909
910 def is_executable(self, path):
911 """Test whether argument path is an executable file."""
912 return executable(path)
913
914 def is_python(self, path):
915 """Test whether argument path is a Python script."""
916 head, tail = os.path.splitext(path)
917 return tail.lower() in (".py", ".pyw")
918
919 def run_cgi(self):
920 """Execute a CGI script."""
921 path = self.path
922 dir, rest = self.cgi_info
923
924 i = path.find('/', len(dir) + 1)
925 while i >= 0:
926 nextdir = path[:i]
927 nextrest = path[i+1:]
928
929 scriptdir = self.translate_path(nextdir)
930 if os.path.isdir(scriptdir):
931 dir, rest = nextdir, nextrest
932 i = path.find('/', len(dir) + 1)
933 else:
934 break
935
936 # find an explicit query string, if present.
937 i = rest.rfind('?')
938 if i >= 0:
939 rest, query = rest[:i], rest[i+1:]
940 else:
941 query = ''
942
943 # dissect the part after the directory name into a script name &
944 # a possible additional path, to be stored in PATH_INFO.
945 i = rest.find('/')
946 if i >= 0:
947 script, rest = rest[:i], rest[i:]
948 else:
949 script, rest = rest, ''
950
951 scriptname = dir + '/' + script
952 scriptfile = self.translate_path(scriptname)
953 if not os.path.exists(scriptfile):
954 self.send_error(404, "No such CGI script (%r)" % scriptname)
955 return
956 if not os.path.isfile(scriptfile):
957 self.send_error(403, "CGI script is not a plain file (%r)" %
958 scriptname)
959 return
960 ispy = self.is_python(scriptname)
961 if not ispy:
Georg Brandl24420152008-05-26 16:32:26 +0000962 if not self.is_executable(scriptfile):
963 self.send_error(403, "CGI script is not executable (%r)" %
964 scriptname)
965 return
966
967 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
968 # XXX Much of the following could be prepared ahead of time!
Senthil Kumaran5e8826c2010-10-03 18:04:52 +0000969 env = copy.deepcopy(os.environ)
Georg Brandl24420152008-05-26 16:32:26 +0000970 env['SERVER_SOFTWARE'] = self.version_string()
971 env['SERVER_NAME'] = self.server.server_name
972 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
973 env['SERVER_PROTOCOL'] = self.protocol_version
974 env['SERVER_PORT'] = str(self.server.server_port)
975 env['REQUEST_METHOD'] = self.command
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000976 uqrest = urllib.parse.unquote(rest)
Georg Brandl24420152008-05-26 16:32:26 +0000977 env['PATH_INFO'] = uqrest
978 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
979 env['SCRIPT_NAME'] = scriptname
980 if query:
981 env['QUERY_STRING'] = query
982 host = self.address_string()
983 if host != self.client_address[0]:
984 env['REMOTE_HOST'] = host
985 env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw820c1202008-06-12 04:06:45 +0000986 authorization = self.headers.get("authorization")
Georg Brandl24420152008-05-26 16:32:26 +0000987 if authorization:
988 authorization = authorization.split()
989 if len(authorization) == 2:
990 import base64, binascii
991 env['AUTH_TYPE'] = authorization[0]
992 if authorization[0].lower() == "basic":
993 try:
994 authorization = authorization[1].encode('ascii')
Georg Brandl706824f2009-06-04 09:42:55 +0000995 authorization = base64.decodebytes(authorization).\
Georg Brandl24420152008-05-26 16:32:26 +0000996 decode('ascii')
997 except (binascii.Error, UnicodeError):
998 pass
999 else:
1000 authorization = authorization.split(':')
1001 if len(authorization) == 2:
1002 env['REMOTE_USER'] = authorization[0]
1003 # XXX REMOTE_IDENT
Barry Warsaw820c1202008-06-12 04:06:45 +00001004 if self.headers.get('content-type') is None:
1005 env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl24420152008-05-26 16:32:26 +00001006 else:
Barry Warsaw820c1202008-06-12 04:06:45 +00001007 env['CONTENT_TYPE'] = self.headers['content-type']
1008 length = self.headers.get('content-length')
Georg Brandl24420152008-05-26 16:32:26 +00001009 if length:
1010 env['CONTENT_LENGTH'] = length
Barry Warsaw820c1202008-06-12 04:06:45 +00001011 referer = self.headers.get('referer')
Georg Brandl24420152008-05-26 16:32:26 +00001012 if referer:
1013 env['HTTP_REFERER'] = referer
1014 accept = []
1015 for line in self.headers.getallmatchingheaders('accept'):
1016 if line[:1] in "\t\n\r ":
1017 accept.append(line.strip())
1018 else:
1019 accept = accept + line[7:].split(',')
1020 env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw820c1202008-06-12 04:06:45 +00001021 ua = self.headers.get('user-agent')
Georg Brandl24420152008-05-26 16:32:26 +00001022 if ua:
1023 env['HTTP_USER_AGENT'] = ua
Barry Warsaw820c1202008-06-12 04:06:45 +00001024 co = filter(None, self.headers.get_all('cookie', []))
Georg Brandlcaa78fe2010-08-01 19:07:28 +00001025 cookie_str = ', '.join(co)
1026 if cookie_str:
1027 env['HTTP_COOKIE'] = cookie_str
Georg Brandl24420152008-05-26 16:32:26 +00001028 # XXX Other HTTP_* headers
1029 # Since we're setting the env in the parent, provide empty
1030 # values to override previously set values
1031 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1032 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1033 env.setdefault(k, "")
Georg Brandl24420152008-05-26 16:32:26 +00001034
1035 self.send_response(200, "Script output follows")
1036
1037 decoded_query = query.replace('+', ' ')
1038
1039 if self.have_fork:
1040 # Unix -- fork as we should
1041 args = [script]
1042 if '=' not in decoded_query:
1043 args.append(decoded_query)
1044 nobody = nobody_uid()
1045 self.wfile.flush() # Always flush before forking
1046 pid = os.fork()
1047 if pid != 0:
1048 # Parent
1049 pid, sts = os.waitpid(pid, 0)
1050 # throw away additional data [see bug #427345]
1051 while select.select([self.rfile], [], [], 0)[0]:
1052 if not self.rfile.read(1):
1053 break
1054 if sts:
1055 self.log_error("CGI script exit status %#x", sts)
1056 return
1057 # Child
1058 try:
1059 try:
1060 os.setuid(nobody)
1061 except os.error:
1062 pass
1063 os.dup2(self.rfile.fileno(), 0)
1064 os.dup2(self.wfile.fileno(), 1)
Senthil Kumaran5e8826c2010-10-03 18:04:52 +00001065 os.execve(scriptfile, args, env)
Georg Brandl24420152008-05-26 16:32:26 +00001066 except:
1067 self.server.handle_error(self.request, self.client_address)
1068 os._exit(127)
1069
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001070 else:
1071 # Non-Unix -- use subprocess
1072 import subprocess
Senthil Kumaranca5130c2009-11-11 04:21:22 +00001073 cmdline = [scriptfile]
Georg Brandl24420152008-05-26 16:32:26 +00001074 if self.is_python(scriptfile):
1075 interp = sys.executable
1076 if interp.lower().endswith("w.exe"):
1077 # On Windows, use python.exe, not pythonw.exe
1078 interp = interp[:-5] + interp[-4:]
Senthil Kumaranca5130c2009-11-11 04:21:22 +00001079 cmdline = [interp, '-u'] + cmdline
1080 if '=' not in query:
1081 cmdline.append(query)
1082 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Georg Brandl24420152008-05-26 16:32:26 +00001083 try:
1084 nbytes = int(length)
1085 except (TypeError, ValueError):
1086 nbytes = 0
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001087 p = subprocess.Popen(cmdline,
1088 stdin=subprocess.PIPE,
1089 stdout=subprocess.PIPE,
Senthil Kumaran5e8826c2010-10-03 18:04:52 +00001090 stderr=subprocess.PIPE,
1091 env = env
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001092 )
Georg Brandl24420152008-05-26 16:32:26 +00001093 if self.command.lower() == "post" and nbytes > 0:
1094 data = self.rfile.read(nbytes)
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001095 else:
1096 data = None
Georg Brandl24420152008-05-26 16:32:26 +00001097 # throw away additional data [see bug #427345]
1098 while select.select([self.rfile._sock], [], [], 0)[0]:
1099 if not self.rfile._sock.recv(1):
1100 break
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001101 stdout, stderr = p.communicate(data)
1102 self.wfile.write(stdout)
1103 if stderr:
1104 self.log_error('%s', stderr)
Brian Curtin938ece72010-11-05 15:08:19 +00001105 p.stderr.close()
1106 p.stdout.close()
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001107 status = p.returncode
1108 if status:
1109 self.log_error("CGI script exit status %#x", status)
Georg Brandl24420152008-05-26 16:32:26 +00001110 else:
1111 self.log_message("CGI script exited OK")
1112
1113
1114def test(HandlerClass = BaseHTTPRequestHandler,
1115 ServerClass = HTTPServer, protocol="HTTP/1.0"):
1116 """Test the HTTP request handler class.
1117
1118 This runs an HTTP server on port 8000 (or the first command line
1119 argument).
1120
1121 """
1122
1123 if sys.argv[1:]:
1124 port = int(sys.argv[1])
1125 else:
1126 port = 8000
1127 server_address = ('', port)
1128
1129 HandlerClass.protocol_version = protocol
1130 httpd = ServerClass(server_address, HandlerClass)
1131
1132 sa = httpd.socket.getsockname()
1133 print("Serving HTTP on", sa[0], "port", sa[1], "...")
Alexandre Vassalottib5292a22009-04-03 07:16:55 +00001134 try:
1135 httpd.serve_forever()
1136 except KeyboardInterrupt:
1137 print("\nKeyboard interrupt received, exiting.")
1138 httpd.server_close()
1139 sys.exit(0)
Georg Brandl24420152008-05-26 16:32:26 +00001140
1141if __name__ == '__main__':
Georg Brandl24420152008-05-26 16:32:26 +00001142 test(HandlerClass=SimpleHTTPRequestHandler)