blob: 78fbcda6191cd7863d4ab79e816a0359b43e9179 [file] [log] [blame]
Georg Brandl24420152008-05-26 16:32:26 +00001"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +000016subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl24420152008-05-26 16:32:26 +000017
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group T. Berners-Lee
38# INTERNET-DRAFT R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40# Expires September 8, 1995 March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group R. Fielding
47# Request for Comments: 2616 et al
48# Obsoletes: 2068 June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# | host: Either the DNS name or the IP number of the remote client
63# | rfc931: Any information returned by identd for this person,
64# | - otherwise.
65# | authuser: If user sent a userid for authentication, the user name,
66# | - otherwise.
67# | DD: Day
68# | Mon: Month (calendar name)
69# | YYYY: Year
70# | hh: hour (24-hour format, the machine's timezone)
71# | mm: minutes
72# | ss: seconds
73# | request: The first line of the HTTP request as sent by the client.
74# | ddd: the status code returned by the server, - if not available.
75# | bbbb: the total number of bytes sent,
76# | *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
86
Georg Brandl1f7fffb2010-10-15 15:57:45 +000087import html
Barry Warsaw820c1202008-06-12 04:06:45 +000088import email.message
89import email.parser
Jeremy Hylton914ab452009-03-27 17:16:06 +000090import http.client
91import io
92import mimetypes
93import os
94import posixpath
95import select
96import shutil
97import socket # For gethostbyaddr()
98import socketserver
99import sys
100import time
101import urllib.parse
Senthil Kumaran42713722010-10-03 17:55:45 +0000102import copy
Georg Brandl24420152008-05-26 16:32:26 +0000103
104# Default error message template
105DEFAULT_ERROR_MESSAGE = """\
Senthil Kumaran1b407fe2011-03-20 10:44:30 +0800106<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
107 "http://www.w3.org/TR/html4/strict.dtd">
Ezio Melottica897e92011-11-02 19:33:29 +0200108<html>
Senthil Kumaranb253c9f2011-03-17 16:43:22 +0800109 <head>
Senthil Kumaran1b407fe2011-03-20 10:44:30 +0800110 <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
Senthil Kumaranb253c9f2011-03-17 16:43:22 +0800111 <title>Error response</title>
112 </head>
113 <body>
114 <h1>Error response</h1>
115 <p>Error code: %(code)d</p>
116 <p>Message: %(message)s.</p>
117 <p>Error code explanation: %(code)s - %(explain)s.</p>
118 </body>
119</html>
Georg Brandl24420152008-05-26 16:32:26 +0000120"""
121
122DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
123
124def _quote_html(html):
125 return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
126
127class HTTPServer(socketserver.TCPServer):
128
129 allow_reuse_address = 1 # Seems to make sense in testing environment
130
131 def server_bind(self):
132 """Override server_bind to store the server name."""
133 socketserver.TCPServer.server_bind(self)
134 host, port = self.socket.getsockname()[:2]
135 self.server_name = socket.getfqdn(host)
136 self.server_port = port
137
138
139class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
140
141 """HTTP request handler base class.
142
143 The following explanation of HTTP serves to guide you through the
144 code as well as to expose any misunderstandings I may have about
145 HTTP (so you don't need to read the code to figure out I'm wrong
146 :-).
147
148 HTTP (HyperText Transfer Protocol) is an extensible protocol on
149 top of a reliable stream transport (e.g. TCP/IP). The protocol
150 recognizes three parts to a request:
151
152 1. One line identifying the request type and path
153 2. An optional set of RFC-822-style headers
154 3. An optional data part
155
156 The headers and data are separated by a blank line.
157
158 The first line of the request has the form
159
160 <command> <path> <version>
161
162 where <command> is a (case-sensitive) keyword such as GET or POST,
163 <path> is a string containing path information for the request,
164 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
165 <path> is encoded using the URL encoding scheme (using %xx to signify
166 the ASCII character with hex code xx).
167
168 The specification specifies that lines are separated by CRLF but
169 for compatibility with the widest range of clients recommends
170 servers also handle LF. Similarly, whitespace in the request line
171 is treated sensibly (allowing multiple spaces between components
172 and allowing trailing whitespace).
173
174 Similarly, for output, lines ought to be separated by CRLF pairs
175 but most clients grok LF characters just fine.
176
177 If the first line of the request has the form
178
179 <command> <path>
180
181 (i.e. <version> is left out) then this is assumed to be an HTTP
182 0.9 request; this form has no optional headers and data part and
183 the reply consists of just the data.
184
185 The reply form of the HTTP 1.x protocol again has three parts:
186
187 1. One line giving the response code
188 2. An optional set of RFC-822-style headers
189 3. The data
190
191 Again, the headers and data are separated by a blank line.
192
193 The response code line has the form
194
195 <version> <responsecode> <responsestring>
196
197 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
198 <responsecode> is a 3-digit response code indicating success or
199 failure of the request, and <responsestring> is an optional
200 human-readable string explaining what the response code means.
201
202 This server parses the request and the headers, and then calls a
203 function specific to the request type (<command>). Specifically,
204 a request SPAM will be handled by a method do_SPAM(). If no
205 such method exists the server sends an error response to the
206 client. If it exists, it is called with no arguments:
207
208 do_SPAM()
209
210 Note that the request name is case sensitive (i.e. SPAM and spam
211 are different requests).
212
213 The various request details are stored in instance variables:
214
215 - client_address is the client IP address in the form (host,
216 port);
217
218 - command, path and version are the broken-down request line;
219
Barry Warsaw820c1202008-06-12 04:06:45 +0000220 - headers is an instance of email.message.Message (or a derived
Georg Brandl24420152008-05-26 16:32:26 +0000221 class) containing the header information;
222
223 - rfile is a file object open for reading positioned at the
224 start of the optional input data part;
225
226 - wfile is a file object open for writing.
227
228 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
229
230 The first thing to be written must be the response line. Then
231 follow 0 or more header lines, then a blank line, and then the
232 actual data (if any). The meaning of the header lines depends on
233 the command executed by the server; in most cases, when data is
234 returned, there should be at least one header line of the form
235
236 Content-type: <type>/<subtype>
237
238 where <type> and <subtype> should be registered MIME types,
239 e.g. "text/html" or "text/plain".
240
241 """
242
243 # The Python system version, truncated to its first component.
244 sys_version = "Python/" + sys.version.split()[0]
245
246 # The server software version. You may want to override this.
247 # The format is multiple whitespace-separated strings,
248 # where each string is of the form name[/version].
249 server_version = "BaseHTTP/" + __version__
250
251 error_message_format = DEFAULT_ERROR_MESSAGE
252 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
253
254 # The default request version. This only affects responses up until
255 # the point where the request line is parsed, so it mainly decides what
256 # the client gets back when sending a malformed request line.
257 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
258 default_request_version = "HTTP/0.9"
259
260 def parse_request(self):
261 """Parse a request (internal).
262
263 The request should be stored in self.raw_requestline; the results
264 are in self.command, self.path, self.request_version and
265 self.headers.
266
267 Return True for success, False for failure; on failure, an
268 error is sent back.
269
270 """
271 self.command = None # set in case of error on the first line
272 self.request_version = version = self.default_request_version
273 self.close_connection = 1
274 requestline = str(self.raw_requestline, 'iso-8859-1')
Senthil Kumaran30755492011-12-23 17:03:41 +0800275 requestline = requestline.rstrip('\r\n')
Georg Brandl24420152008-05-26 16:32:26 +0000276 self.requestline = requestline
277 words = requestline.split()
278 if len(words) == 3:
Senthil Kumaran30755492011-12-23 17:03:41 +0800279 command, path, version = words
Georg Brandl24420152008-05-26 16:32:26 +0000280 if version[:5] != 'HTTP/':
281 self.send_error(400, "Bad request version (%r)" % version)
282 return False
283 try:
284 base_version_number = version.split('/', 1)[1]
285 version_number = base_version_number.split(".")
286 # RFC 2145 section 3.1 says there can be only one "." and
287 # - major and minor numbers MUST be treated as
288 # separate integers;
289 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
290 # turn is lower than HTTP/12.3;
291 # - Leading zeros MUST be ignored by recipients.
292 if len(version_number) != 2:
293 raise ValueError
294 version_number = int(version_number[0]), int(version_number[1])
295 except (ValueError, IndexError):
296 self.send_error(400, "Bad request version (%r)" % version)
297 return False
298 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
299 self.close_connection = 0
300 if version_number >= (2, 0):
301 self.send_error(505,
302 "Invalid HTTP Version (%s)" % base_version_number)
303 return False
304 elif len(words) == 2:
Senthil Kumaran30755492011-12-23 17:03:41 +0800305 command, path = words
Georg Brandl24420152008-05-26 16:32:26 +0000306 self.close_connection = 1
307 if command != 'GET':
308 self.send_error(400,
309 "Bad HTTP/0.9 request type (%r)" % command)
310 return False
311 elif not words:
312 return False
313 else:
314 self.send_error(400, "Bad request syntax (%r)" % requestline)
315 return False
316 self.command, self.path, self.request_version = command, path, version
317
318 # Examine the headers and look for a Connection directive.
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000319 try:
320 self.headers = http.client.parse_headers(self.rfile,
321 _class=self.MessageClass)
322 except http.client.LineTooLong:
323 self.send_error(400, "Line too long")
324 return False
Georg Brandl24420152008-05-26 16:32:26 +0000325
326 conntype = self.headers.get('Connection', "")
327 if conntype.lower() == 'close':
328 self.close_connection = 1
329 elif (conntype.lower() == 'keep-alive' and
330 self.protocol_version >= "HTTP/1.1"):
331 self.close_connection = 0
Senthil Kumaran0f476d42010-09-30 06:09:18 +0000332 # Examine the headers and look for an Expect directive
333 expect = self.headers.get('Expect', "")
334 if (expect.lower() == "100-continue" and
335 self.protocol_version >= "HTTP/1.1" and
336 self.request_version >= "HTTP/1.1"):
337 if not self.handle_expect_100():
338 return False
339 return True
340
341 def handle_expect_100(self):
342 """Decide what to do with an "Expect: 100-continue" header.
343
344 If the client is expecting a 100 Continue response, we must
345 respond with either a 100 Continue or a final response before
346 waiting for the request body. The default is to always respond
347 with a 100 Continue. You can behave differently (for example,
348 reject unauthorized requests) by overriding this method.
349
350 This method should either return True (possibly after sending
351 a 100 Continue response) or send an error response and return
352 False.
353
354 """
355 self.send_response_only(100)
Senthil Kumaranc7ae19b2011-05-09 23:25:02 +0800356 self.flush_headers()
Georg Brandl24420152008-05-26 16:32:26 +0000357 return True
358
359 def handle_one_request(self):
360 """Handle a single HTTP request.
361
362 You normally don't need to override this method; see the class
363 __doc__ string for information on how to handle specific HTTP
364 commands such as GET and POST.
365
366 """
Kristján Valur Jónsson985fc6a2009-07-01 10:01:31 +0000367 try:
Antoine Pitrouc4924372010-12-16 16:48:36 +0000368 self.raw_requestline = self.rfile.readline(65537)
369 if len(self.raw_requestline) > 65536:
370 self.requestline = ''
371 self.request_version = ''
372 self.command = ''
373 self.send_error(414)
374 return
Kristján Valur Jónsson985fc6a2009-07-01 10:01:31 +0000375 if not self.raw_requestline:
376 self.close_connection = 1
377 return
378 if not self.parse_request():
379 # An error code has been sent, just exit
380 return
381 mname = 'do_' + self.command
382 if not hasattr(self, mname):
383 self.send_error(501, "Unsupported method (%r)" % self.command)
384 return
385 method = getattr(self, mname)
386 method()
387 self.wfile.flush() #actually send the response if not already done.
388 except socket.timeout as e:
389 #a read or a write timed out. Discard this connection
390 self.log_error("Request timed out: %r", e)
Georg Brandl24420152008-05-26 16:32:26 +0000391 self.close_connection = 1
392 return
Georg Brandl24420152008-05-26 16:32:26 +0000393
394 def handle(self):
395 """Handle multiple requests if necessary."""
396 self.close_connection = 1
397
398 self.handle_one_request()
399 while not self.close_connection:
400 self.handle_one_request()
401
402 def send_error(self, code, message=None):
403 """Send and log an error reply.
404
405 Arguments are the error code, and a detailed message.
406 The detailed message defaults to the short entry matching the
407 response code.
408
409 This sends an error response (so it must be called before any
410 output has been generated), logs the error, and finally sends
411 a piece of HTML explaining the error to the user.
412
413 """
414
415 try:
416 shortmsg, longmsg = self.responses[code]
417 except KeyError:
418 shortmsg, longmsg = '???', '???'
419 if message is None:
420 message = shortmsg
421 explain = longmsg
422 self.log_error("code %d, message %s", code, message)
423 # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
424 content = (self.error_message_format %
425 {'code': code, 'message': _quote_html(message), 'explain': explain})
426 self.send_response(code, message)
427 self.send_header("Content-Type", self.error_content_type)
428 self.send_header('Connection', 'close')
429 self.end_headers()
430 if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
431 self.wfile.write(content.encode('UTF-8', 'replace'))
432
433 def send_response(self, code, message=None):
Senthil Kumaranc7ae19b2011-05-09 23:25:02 +0800434 """Add the response header to the headers buffer and log the
435 response code.
Georg Brandl24420152008-05-26 16:32:26 +0000436
437 Also send two standard headers with the server software
438 version and the current date.
439
440 """
441 self.log_request(code)
Senthil Kumaran0f476d42010-09-30 06:09:18 +0000442 self.send_response_only(code, message)
443 self.send_header('Server', self.version_string())
444 self.send_header('Date', self.date_time_string())
445
446 def send_response_only(self, code, message=None):
447 """Send the response header only."""
Georg Brandl24420152008-05-26 16:32:26 +0000448 if message is None:
449 if code in self.responses:
450 message = self.responses[code][0]
451 else:
452 message = ''
453 if self.request_version != 'HTTP/0.9':
Senthil Kumaranc7ae19b2011-05-09 23:25:02 +0800454 if not hasattr(self, '_headers_buffer'):
455 self._headers_buffer = []
456 self._headers_buffer.append(("%s %d %s\r\n" %
457 (self.protocol_version, code, message)).encode(
458 'latin-1', 'strict'))
Georg Brandl24420152008-05-26 16:32:26 +0000459
460 def send_header(self, keyword, value):
Senthil Kumaranc7ae19b2011-05-09 23:25:02 +0800461 """Send a MIME header to the headers buffer."""
Georg Brandl24420152008-05-26 16:32:26 +0000462 if self.request_version != 'HTTP/0.9':
Senthil Kumarane4dad4f2010-11-21 14:36:14 +0000463 if not hasattr(self, '_headers_buffer'):
464 self._headers_buffer = []
465 self._headers_buffer.append(
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000466 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
Georg Brandl24420152008-05-26 16:32:26 +0000467
468 if keyword.lower() == 'connection':
469 if value.lower() == 'close':
470 self.close_connection = 1
471 elif value.lower() == 'keep-alive':
472 self.close_connection = 0
473
474 def end_headers(self):
475 """Send the blank line ending the MIME headers."""
476 if self.request_version != 'HTTP/0.9':
Senthil Kumarane4dad4f2010-11-21 14:36:14 +0000477 self._headers_buffer.append(b"\r\n")
Senthil Kumaranc7ae19b2011-05-09 23:25:02 +0800478 self.flush_headers()
479
480 def flush_headers(self):
481 if hasattr(self, '_headers_buffer'):
Senthil Kumarane4dad4f2010-11-21 14:36:14 +0000482 self.wfile.write(b"".join(self._headers_buffer))
483 self._headers_buffer = []
Georg Brandl24420152008-05-26 16:32:26 +0000484
485 def log_request(self, code='-', size='-'):
486 """Log an accepted request.
487
488 This is called by send_response().
489
490 """
491
492 self.log_message('"%s" %s %s',
493 self.requestline, str(code), str(size))
494
495 def log_error(self, format, *args):
496 """Log an error.
497
498 This is called when a request cannot be fulfilled. By
499 default it passes the message on to log_message().
500
501 Arguments are the same as for log_message().
502
503 XXX This should go to the separate error log.
504
505 """
506
507 self.log_message(format, *args)
508
509 def log_message(self, format, *args):
510 """Log an arbitrary message.
511
512 This is used by all other logging functions. Override
513 it if you have specific logging wishes.
514
515 The first argument, FORMAT, is a format string for the
516 message to be logged. If the format string contains
517 any % escapes requiring parameters, they should be
518 specified as subsequent arguments (it's just like
519 printf!).
520
521 The client host and current date/time are prefixed to
522 every message.
523
524 """
525
526 sys.stderr.write("%s - - [%s] %s\n" %
527 (self.address_string(),
528 self.log_date_time_string(),
529 format%args))
530
531 def version_string(self):
532 """Return the server software version string."""
533 return self.server_version + ' ' + self.sys_version
534
535 def date_time_string(self, timestamp=None):
536 """Return the current date and time formatted for a message header."""
537 if timestamp is None:
538 timestamp = time.time()
539 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
540 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
541 self.weekdayname[wd],
542 day, self.monthname[month], year,
543 hh, mm, ss)
544 return s
545
546 def log_date_time_string(self):
547 """Return the current time formatted for logging."""
548 now = time.time()
549 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
550 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
551 day, self.monthname[month], year, hh, mm, ss)
552 return s
553
554 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
555
556 monthname = [None,
557 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
558 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
559
560 def address_string(self):
561 """Return the client address formatted for logging.
562
563 This version looks up the full hostname using gethostbyaddr(),
564 and tries to find a name that contains at least one dot.
565
566 """
567
568 host, port = self.client_address[:2]
569 return socket.getfqdn(host)
570
571 # Essentially static class variables
572
573 # The version of the HTTP protocol we support.
574 # Set this to HTTP/1.1 to enable automatic keepalive
575 protocol_version = "HTTP/1.0"
576
Barry Warsaw820c1202008-06-12 04:06:45 +0000577 # MessageClass used to parse headers
Barry Warsaw820c1202008-06-12 04:06:45 +0000578 MessageClass = http.client.HTTPMessage
Georg Brandl24420152008-05-26 16:32:26 +0000579
580 # Table mapping response codes to messages; entries have the
581 # form {code: (shortmessage, longmessage)}.
582 # See RFC 2616.
583 responses = {
584 100: ('Continue', 'Request received, please continue'),
585 101: ('Switching Protocols',
586 'Switching to new protocol; obey Upgrade header'),
587
588 200: ('OK', 'Request fulfilled, document follows'),
589 201: ('Created', 'Document created, URL follows'),
590 202: ('Accepted',
591 'Request accepted, processing continues off-line'),
592 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
593 204: ('No Content', 'Request fulfilled, nothing follows'),
594 205: ('Reset Content', 'Clear input form for further input.'),
595 206: ('Partial Content', 'Partial content follows.'),
596
597 300: ('Multiple Choices',
598 'Object has several resources -- see URI list'),
599 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
600 302: ('Found', 'Object moved temporarily -- see URI list'),
601 303: ('See Other', 'Object moved -- see Method and URL list'),
602 304: ('Not Modified',
603 'Document has not changed since given time'),
604 305: ('Use Proxy',
605 'You must use proxy specified in Location to access this '
606 'resource.'),
607 307: ('Temporary Redirect',
608 'Object moved temporarily -- see URI list'),
609
610 400: ('Bad Request',
611 'Bad request syntax or unsupported method'),
612 401: ('Unauthorized',
613 'No permission -- see authorization schemes'),
614 402: ('Payment Required',
615 'No payment -- see charging schemes'),
616 403: ('Forbidden',
617 'Request forbidden -- authorization will not help'),
618 404: ('Not Found', 'Nothing matches the given URI'),
619 405: ('Method Not Allowed',
Senthil Kumaran7aa26212010-02-22 11:00:50 +0000620 'Specified method is invalid for this resource.'),
Georg Brandl24420152008-05-26 16:32:26 +0000621 406: ('Not Acceptable', 'URI not available in preferred format.'),
622 407: ('Proxy Authentication Required', 'You must authenticate with '
623 'this proxy before proceeding.'),
624 408: ('Request Timeout', 'Request timed out; try again later.'),
625 409: ('Conflict', 'Request conflict.'),
626 410: ('Gone',
627 'URI no longer exists and has been permanently removed.'),
628 411: ('Length Required', 'Client must specify Content-Length.'),
629 412: ('Precondition Failed', 'Precondition in headers is false.'),
630 413: ('Request Entity Too Large', 'Entity is too large.'),
631 414: ('Request-URI Too Long', 'URI is too long.'),
632 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
633 416: ('Requested Range Not Satisfiable',
634 'Cannot satisfy request range.'),
635 417: ('Expectation Failed',
636 'Expect condition could not be satisfied.'),
637
638 500: ('Internal Server Error', 'Server got itself in trouble'),
639 501: ('Not Implemented',
640 'Server does not support this operation'),
641 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
642 503: ('Service Unavailable',
643 'The server cannot process the request due to a high load'),
644 504: ('Gateway Timeout',
645 'The gateway server did not receive a timely response'),
646 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
647 }
648
649
650class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
651
652 """Simple HTTP request handler with GET and HEAD commands.
653
654 This serves files from the current directory and any of its
655 subdirectories. The MIME type for files is determined by
656 calling the .guess_type() method.
657
658 The GET and HEAD requests are identical except that the HEAD
659 request omits the actual contents of the file.
660
661 """
662
663 server_version = "SimpleHTTP/" + __version__
664
665 def do_GET(self):
666 """Serve a GET request."""
667 f = self.send_head()
668 if f:
669 self.copyfile(f, self.wfile)
670 f.close()
671
672 def do_HEAD(self):
673 """Serve a HEAD request."""
674 f = self.send_head()
675 if f:
676 f.close()
677
678 def send_head(self):
679 """Common code for GET and HEAD commands.
680
681 This sends the response code and MIME headers.
682
683 Return value is either a file object (which has to be copied
684 to the outputfile by the caller unless the command was HEAD,
685 and must be closed by the caller under all circumstances), or
686 None, in which case the caller has nothing further to do.
687
688 """
689 path = self.translate_path(self.path)
690 f = None
691 if os.path.isdir(path):
692 if not self.path.endswith('/'):
693 # redirect browser - doing basically what apache does
694 self.send_response(301)
695 self.send_header("Location", self.path + "/")
696 self.end_headers()
697 return None
698 for index in "index.html", "index.htm":
699 index = os.path.join(path, index)
700 if os.path.exists(index):
701 path = index
702 break
703 else:
704 return self.list_directory(path)
705 ctype = self.guess_type(path)
706 try:
707 f = open(path, 'rb')
708 except IOError:
709 self.send_error(404, "File not found")
710 return None
711 self.send_response(200)
712 self.send_header("Content-type", ctype)
713 fs = os.fstat(f.fileno())
714 self.send_header("Content-Length", str(fs[6]))
715 self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
716 self.end_headers()
717 return f
718
719 def list_directory(self, path):
720 """Helper to produce a directory listing (absent index.html).
721
722 Return value is either a file object, or None (indicating an
723 error). In either case, the headers are sent, making the
724 interface the same as for send_head().
725
726 """
727 try:
728 list = os.listdir(path)
729 except os.error:
730 self.send_error(404, "No permission to list directory")
731 return None
732 list.sort(key=lambda a: a.lower())
733 r = []
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000734 displaypath = html.escape(urllib.parse.unquote(self.path))
Ezio Melottica897e92011-11-02 19:33:29 +0200735 enc = sys.getfilesystemencoding()
736 title = 'Directory listing for %s' % displaypath
737 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
738 '"http://www.w3.org/TR/html4/strict.dtd">')
739 r.append('<html>\n<head>')
740 r.append('<meta http-equiv="Content-Type" '
741 'content="text/html; charset=%s">' % enc)
742 r.append('<title>%s</title>\n</head>' % title)
743 r.append('<body>\n<h1>%s</h1>' % title)
744 r.append('<hr>\n<ul>')
Georg Brandl24420152008-05-26 16:32:26 +0000745 for name in list:
746 fullname = os.path.join(path, name)
747 displayname = linkname = name
748 # Append / for directories or @ for symbolic links
749 if os.path.isdir(fullname):
750 displayname = name + "/"
751 linkname = name + "/"
752 if os.path.islink(fullname):
753 displayname = name + "@"
754 # Note: a link to a directory displays with @ and links with /
Ezio Melottica897e92011-11-02 19:33:29 +0200755 r.append('<li><a href="%s">%s</a></li>'
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000756 % (urllib.parse.quote(linkname), html.escape(displayname)))
Ezio Melottica897e92011-11-02 19:33:29 +0200757 r.append('</ul>\n<hr>\n</body>\n</html>\n')
758 encoded = '\n'.join(r).encode(enc)
Georg Brandl24420152008-05-26 16:32:26 +0000759 f = io.BytesIO()
760 f.write(encoded)
761 f.seek(0)
762 self.send_response(200)
763 self.send_header("Content-type", "text/html; charset=%s" % enc)
764 self.send_header("Content-Length", str(len(encoded)))
765 self.end_headers()
766 return f
767
768 def translate_path(self, path):
769 """Translate a /-separated PATH to the local filename syntax.
770
771 Components that mean special things to the local file system
772 (e.g. drive or directory names) are ignored. (XXX They should
773 probably be diagnosed.)
774
775 """
776 # abandon query parameters
777 path = path.split('?',1)[0]
778 path = path.split('#',1)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000779 path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl24420152008-05-26 16:32:26 +0000780 words = path.split('/')
781 words = filter(None, words)
782 path = os.getcwd()
783 for word in words:
784 drive, word = os.path.splitdrive(word)
785 head, word = os.path.split(word)
786 if word in (os.curdir, os.pardir): continue
787 path = os.path.join(path, word)
788 return path
789
790 def copyfile(self, source, outputfile):
791 """Copy all data between two file objects.
792
793 The SOURCE argument is a file object open for reading
794 (or anything with a read() method) and the DESTINATION
795 argument is a file object open for writing (or
796 anything with a write() method).
797
798 The only reason for overriding this would be to change
799 the block size or perhaps to replace newlines by CRLF
800 -- note however that this the default server uses this
801 to copy binary data as well.
802
803 """
804 shutil.copyfileobj(source, outputfile)
805
806 def guess_type(self, path):
807 """Guess the type of a file.
808
809 Argument is a PATH (a filename).
810
811 Return value is a string of the form type/subtype,
812 usable for a MIME Content-type header.
813
814 The default implementation looks the file's extension
815 up in the table self.extensions_map, using application/octet-stream
816 as a default; however it would be permissible (if
817 slow) to look inside the data to make a better guess.
818
819 """
820
821 base, ext = posixpath.splitext(path)
822 if ext in self.extensions_map:
823 return self.extensions_map[ext]
824 ext = ext.lower()
825 if ext in self.extensions_map:
826 return self.extensions_map[ext]
827 else:
828 return self.extensions_map['']
829
830 if not mimetypes.inited:
831 mimetypes.init() # try to read system mime.types
832 extensions_map = mimetypes.types_map.copy()
833 extensions_map.update({
834 '': 'application/octet-stream', # Default
835 '.py': 'text/plain',
836 '.c': 'text/plain',
837 '.h': 'text/plain',
838 })
839
840
841# Utilities for CGIHTTPRequestHandler
842
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000843# TODO(gregory.p.smith): Move this into an appropriate library.
844def _url_collapse_path_split(path):
845 """
846 Given a URL path, remove extra '/'s and '.' path elements and collapse
847 any '..' references.
848
849 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
850
851 Returns: A tuple of (head, tail) where tail is everything after the final /
852 and head is everything before it. Head will always start with a '/' and,
853 if it contains anything else, never have a trailing '/'.
854
855 Raises: IndexError if too many '..' occur within the path.
856 """
857 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
858 # path semantics rather than local operating system semantics.
859 path_parts = []
860 for part in path.split('/'):
861 if part == '.':
862 path_parts.append('')
863 else:
864 path_parts.append(part)
865 # Filter out blank non trailing parts before consuming the '..'.
866 path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
867 if path_parts:
868 tail_part = path_parts.pop()
869 else:
870 tail_part = ''
871 head_parts = []
872 for part in path_parts:
873 if part == '..':
874 head_parts.pop()
875 else:
876 head_parts.append(part)
877 if tail_part and tail_part == '..':
878 head_parts.pop()
879 tail_part = ''
880 return ('/' + '/'.join(head_parts), tail_part)
881
882
Georg Brandl24420152008-05-26 16:32:26 +0000883nobody = None
884
885def nobody_uid():
886 """Internal routine to get nobody's uid"""
887 global nobody
888 if nobody:
889 return nobody
890 try:
891 import pwd
892 except ImportError:
893 return -1
894 try:
895 nobody = pwd.getpwnam('nobody')[2]
896 except KeyError:
Georg Brandlcbd2ab12010-12-04 10:39:14 +0000897 nobody = 1 + max(x[2] for x in pwd.getpwall())
Georg Brandl24420152008-05-26 16:32:26 +0000898 return nobody
899
900
901def executable(path):
902 """Test for executable file."""
Victor Stinnerfb25ba92011-06-20 17:45:54 +0200903 return os.access(path, os.X_OK)
Georg Brandl24420152008-05-26 16:32:26 +0000904
905
906class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
907
908 """Complete HTTP server with GET, HEAD and POST commands.
909
910 GET and HEAD also support running CGI scripts.
911
912 The POST command is *only* implemented for CGI scripts.
913
914 """
915
916 # Determine platform specifics
917 have_fork = hasattr(os, 'fork')
Georg Brandl24420152008-05-26 16:32:26 +0000918
919 # Make rfile unbuffered -- we need to read one line and then pass
920 # the rest to a subprocess, so we can't use buffered input.
921 rbufsize = 0
922
923 def do_POST(self):
924 """Serve a POST request.
925
926 This is only implemented for CGI scripts.
927
928 """
929
930 if self.is_cgi():
931 self.run_cgi()
932 else:
933 self.send_error(501, "Can only POST to CGI scripts")
934
935 def send_head(self):
936 """Version of send_head that support CGI scripts"""
937 if self.is_cgi():
938 return self.run_cgi()
939 else:
940 return SimpleHTTPRequestHandler.send_head(self)
941
942 def is_cgi(self):
943 """Test whether self.path corresponds to a CGI script.
944
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000945 Returns True and updates the cgi_info attribute to the tuple
946 (dir, rest) if self.path requires running a CGI script.
947 Returns False otherwise.
Georg Brandl24420152008-05-26 16:32:26 +0000948
Benjamin Petersona7deeee2009-05-08 20:54:42 +0000949 If any exception is raised, the caller should assume that
950 self.path was rejected as invalid and act accordingly.
951
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000952 The default implementation tests whether the normalized url
953 path begins with one of the strings in self.cgi_directories
954 (and the next character is a '/' or the end of the string).
Georg Brandl24420152008-05-26 16:32:26 +0000955
956 """
957
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000958 splitpath = _url_collapse_path_split(self.path)
959 if splitpath[0] in self.cgi_directories:
960 self.cgi_info = splitpath
961 return True
Georg Brandl24420152008-05-26 16:32:26 +0000962 return False
963
964 cgi_directories = ['/cgi-bin', '/htbin']
965
966 def is_executable(self, path):
967 """Test whether argument path is an executable file."""
968 return executable(path)
969
970 def is_python(self, path):
971 """Test whether argument path is a Python script."""
972 head, tail = os.path.splitext(path)
973 return tail.lower() in (".py", ".pyw")
974
975 def run_cgi(self):
976 """Execute a CGI script."""
977 path = self.path
978 dir, rest = self.cgi_info
979
980 i = path.find('/', len(dir) + 1)
981 while i >= 0:
982 nextdir = path[:i]
983 nextrest = path[i+1:]
984
985 scriptdir = self.translate_path(nextdir)
986 if os.path.isdir(scriptdir):
987 dir, rest = nextdir, nextrest
988 i = path.find('/', len(dir) + 1)
989 else:
990 break
991
992 # find an explicit query string, if present.
993 i = rest.rfind('?')
994 if i >= 0:
995 rest, query = rest[:i], rest[i+1:]
996 else:
997 query = ''
998
999 # dissect the part after the directory name into a script name &
1000 # a possible additional path, to be stored in PATH_INFO.
1001 i = rest.find('/')
1002 if i >= 0:
1003 script, rest = rest[:i], rest[i:]
1004 else:
1005 script, rest = rest, ''
1006
1007 scriptname = dir + '/' + script
1008 scriptfile = self.translate_path(scriptname)
1009 if not os.path.exists(scriptfile):
1010 self.send_error(404, "No such CGI script (%r)" % scriptname)
1011 return
1012 if not os.path.isfile(scriptfile):
1013 self.send_error(403, "CGI script is not a plain file (%r)" %
1014 scriptname)
1015 return
1016 ispy = self.is_python(scriptname)
Victor Stinnerfb25ba92011-06-20 17:45:54 +02001017 if self.have_fork or not ispy:
Georg Brandl24420152008-05-26 16:32:26 +00001018 if not self.is_executable(scriptfile):
1019 self.send_error(403, "CGI script is not executable (%r)" %
1020 scriptname)
1021 return
1022
1023 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1024 # XXX Much of the following could be prepared ahead of time!
Senthil Kumaran42713722010-10-03 17:55:45 +00001025 env = copy.deepcopy(os.environ)
Georg Brandl24420152008-05-26 16:32:26 +00001026 env['SERVER_SOFTWARE'] = self.version_string()
1027 env['SERVER_NAME'] = self.server.server_name
1028 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1029 env['SERVER_PROTOCOL'] = self.protocol_version
1030 env['SERVER_PORT'] = str(self.server.server_port)
1031 env['REQUEST_METHOD'] = self.command
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001032 uqrest = urllib.parse.unquote(rest)
Georg Brandl24420152008-05-26 16:32:26 +00001033 env['PATH_INFO'] = uqrest
1034 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1035 env['SCRIPT_NAME'] = scriptname
1036 if query:
1037 env['QUERY_STRING'] = query
1038 host = self.address_string()
1039 if host != self.client_address[0]:
1040 env['REMOTE_HOST'] = host
1041 env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw820c1202008-06-12 04:06:45 +00001042 authorization = self.headers.get("authorization")
Georg Brandl24420152008-05-26 16:32:26 +00001043 if authorization:
1044 authorization = authorization.split()
1045 if len(authorization) == 2:
1046 import base64, binascii
1047 env['AUTH_TYPE'] = authorization[0]
1048 if authorization[0].lower() == "basic":
1049 try:
1050 authorization = authorization[1].encode('ascii')
Georg Brandl706824f2009-06-04 09:42:55 +00001051 authorization = base64.decodebytes(authorization).\
Georg Brandl24420152008-05-26 16:32:26 +00001052 decode('ascii')
1053 except (binascii.Error, UnicodeError):
1054 pass
1055 else:
1056 authorization = authorization.split(':')
1057 if len(authorization) == 2:
1058 env['REMOTE_USER'] = authorization[0]
1059 # XXX REMOTE_IDENT
Barry Warsaw820c1202008-06-12 04:06:45 +00001060 if self.headers.get('content-type') is None:
1061 env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl24420152008-05-26 16:32:26 +00001062 else:
Barry Warsaw820c1202008-06-12 04:06:45 +00001063 env['CONTENT_TYPE'] = self.headers['content-type']
1064 length = self.headers.get('content-length')
Georg Brandl24420152008-05-26 16:32:26 +00001065 if length:
1066 env['CONTENT_LENGTH'] = length
Barry Warsaw820c1202008-06-12 04:06:45 +00001067 referer = self.headers.get('referer')
Georg Brandl24420152008-05-26 16:32:26 +00001068 if referer:
1069 env['HTTP_REFERER'] = referer
1070 accept = []
1071 for line in self.headers.getallmatchingheaders('accept'):
1072 if line[:1] in "\t\n\r ":
1073 accept.append(line.strip())
1074 else:
1075 accept = accept + line[7:].split(',')
1076 env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw820c1202008-06-12 04:06:45 +00001077 ua = self.headers.get('user-agent')
Georg Brandl24420152008-05-26 16:32:26 +00001078 if ua:
1079 env['HTTP_USER_AGENT'] = ua
Barry Warsaw820c1202008-06-12 04:06:45 +00001080 co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl62e2ca22010-07-31 21:54:24 +00001081 cookie_str = ', '.join(co)
1082 if cookie_str:
1083 env['HTTP_COOKIE'] = cookie_str
Georg Brandl24420152008-05-26 16:32:26 +00001084 # XXX Other HTTP_* headers
1085 # Since we're setting the env in the parent, provide empty
1086 # values to override previously set values
1087 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1088 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1089 env.setdefault(k, "")
Georg Brandl24420152008-05-26 16:32:26 +00001090
1091 self.send_response(200, "Script output follows")
Senthil Kumaranc7ae19b2011-05-09 23:25:02 +08001092 self.flush_headers()
Georg Brandl24420152008-05-26 16:32:26 +00001093
1094 decoded_query = query.replace('+', ' ')
1095
1096 if self.have_fork:
1097 # Unix -- fork as we should
1098 args = [script]
1099 if '=' not in decoded_query:
1100 args.append(decoded_query)
1101 nobody = nobody_uid()
1102 self.wfile.flush() # Always flush before forking
1103 pid = os.fork()
1104 if pid != 0:
1105 # Parent
1106 pid, sts = os.waitpid(pid, 0)
1107 # throw away additional data [see bug #427345]
1108 while select.select([self.rfile], [], [], 0)[0]:
1109 if not self.rfile.read(1):
1110 break
1111 if sts:
1112 self.log_error("CGI script exit status %#x", sts)
1113 return
1114 # Child
1115 try:
1116 try:
1117 os.setuid(nobody)
1118 except os.error:
1119 pass
1120 os.dup2(self.rfile.fileno(), 0)
1121 os.dup2(self.wfile.fileno(), 1)
Senthil Kumaran42713722010-10-03 17:55:45 +00001122 os.execve(scriptfile, args, env)
Georg Brandl24420152008-05-26 16:32:26 +00001123 except:
1124 self.server.handle_error(self.request, self.client_address)
1125 os._exit(127)
1126
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001127 else:
1128 # Non-Unix -- use subprocess
1129 import subprocess
Senthil Kumarane29cd162009-11-11 04:17:53 +00001130 cmdline = [scriptfile]
Georg Brandl24420152008-05-26 16:32:26 +00001131 if self.is_python(scriptfile):
1132 interp = sys.executable
1133 if interp.lower().endswith("w.exe"):
1134 # On Windows, use python.exe, not pythonw.exe
1135 interp = interp[:-5] + interp[-4:]
Senthil Kumarane29cd162009-11-11 04:17:53 +00001136 cmdline = [interp, '-u'] + cmdline
1137 if '=' not in query:
1138 cmdline.append(query)
1139 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Georg Brandl24420152008-05-26 16:32:26 +00001140 try:
1141 nbytes = int(length)
1142 except (TypeError, ValueError):
1143 nbytes = 0
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001144 p = subprocess.Popen(cmdline,
1145 stdin=subprocess.PIPE,
1146 stdout=subprocess.PIPE,
Senthil Kumaran42713722010-10-03 17:55:45 +00001147 stderr=subprocess.PIPE,
1148 env = env
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001149 )
Georg Brandl24420152008-05-26 16:32:26 +00001150 if self.command.lower() == "post" and nbytes > 0:
1151 data = self.rfile.read(nbytes)
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001152 else:
1153 data = None
Georg Brandl24420152008-05-26 16:32:26 +00001154 # throw away additional data [see bug #427345]
1155 while select.select([self.rfile._sock], [], [], 0)[0]:
1156 if not self.rfile._sock.recv(1):
1157 break
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001158 stdout, stderr = p.communicate(data)
1159 self.wfile.write(stdout)
1160 if stderr:
1161 self.log_error('%s', stderr)
Brian Curtincbad4df2010-11-05 15:04:48 +00001162 p.stderr.close()
1163 p.stdout.close()
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001164 status = p.returncode
1165 if status:
1166 self.log_error("CGI script exit status %#x", status)
Georg Brandl24420152008-05-26 16:32:26 +00001167 else:
1168 self.log_message("CGI script exited OK")
1169
1170
1171def test(HandlerClass = BaseHTTPRequestHandler,
1172 ServerClass = HTTPServer, protocol="HTTP/1.0"):
1173 """Test the HTTP request handler class.
1174
1175 This runs an HTTP server on port 8000 (or the first command line
1176 argument).
1177
1178 """
1179
1180 if sys.argv[1:]:
1181 port = int(sys.argv[1])
1182 else:
1183 port = 8000
1184 server_address = ('', port)
1185
1186 HandlerClass.protocol_version = protocol
1187 httpd = ServerClass(server_address, HandlerClass)
1188
1189 sa = httpd.socket.getsockname()
1190 print("Serving HTTP on", sa[0], "port", sa[1], "...")
Alexandre Vassalottib5292a22009-04-03 07:16:55 +00001191 try:
1192 httpd.serve_forever()
1193 except KeyboardInterrupt:
1194 print("\nKeyboard interrupt received, exiting.")
1195 httpd.server_close()
1196 sys.exit(0)
Georg Brandl24420152008-05-26 16:32:26 +00001197
1198if __name__ == '__main__':
Georg Brandl24420152008-05-26 16:32:26 +00001199 test(HandlerClass=SimpleHTTPRequestHandler)