blob: b79d191fbb41b0cad0b952cc64b3f605c8c5caca [file] [log] [blame]
Georg Brandl24420152008-05-26 16:32:26 +00001"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +000016subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl24420152008-05-26 16:32:26 +000017
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group T. Berners-Lee
38# INTERNET-DRAFT R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40# Expires September 8, 1995 March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group R. Fielding
47# Request for Comments: 2616 et al
48# Obsoletes: 2068 June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# | host: Either the DNS name or the IP number of the remote client
63# | rfc931: Any information returned by identd for this person,
64# | - otherwise.
65# | authuser: If user sent a userid for authentication, the user name,
66# | - otherwise.
67# | DD: Day
68# | Mon: Month (calendar name)
69# | YYYY: Year
70# | hh: hour (24-hour format, the machine's timezone)
71# | mm: minutes
72# | ss: seconds
73# | request: The first line of the HTTP request as sent by the client.
74# | ddd: the status code returned by the server, - if not available.
75# | bbbb: the total number of bytes sent,
76# | *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
86
Georg Brandl1f7fffb2010-10-15 15:57:45 +000087import html
Barry Warsaw820c1202008-06-12 04:06:45 +000088import email.message
89import email.parser
Jeremy Hylton914ab452009-03-27 17:16:06 +000090import http.client
91import io
92import mimetypes
93import os
94import posixpath
95import select
96import shutil
97import socket # For gethostbyaddr()
98import socketserver
99import sys
100import time
101import urllib.parse
Senthil Kumaran42713722010-10-03 17:55:45 +0000102import copy
Georg Brandl24420152008-05-26 16:32:26 +0000103
104# Default error message template
105DEFAULT_ERROR_MESSAGE = """\
Senthil Kumaran1b407fe2011-03-20 10:44:30 +0800106<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
107 "http://www.w3.org/TR/html4/strict.dtd">
Ezio Melottica897e92011-11-02 19:33:29 +0200108<html>
Senthil Kumaranb253c9f2011-03-17 16:43:22 +0800109 <head>
Senthil Kumaran1b407fe2011-03-20 10:44:30 +0800110 <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
Senthil Kumaranb253c9f2011-03-17 16:43:22 +0800111 <title>Error response</title>
112 </head>
113 <body>
114 <h1>Error response</h1>
115 <p>Error code: %(code)d</p>
116 <p>Message: %(message)s.</p>
117 <p>Error code explanation: %(code)s - %(explain)s.</p>
118 </body>
119</html>
Georg Brandl24420152008-05-26 16:32:26 +0000120"""
121
122DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
123
124def _quote_html(html):
125 return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
126
127class HTTPServer(socketserver.TCPServer):
128
129 allow_reuse_address = 1 # Seems to make sense in testing environment
130
131 def server_bind(self):
132 """Override server_bind to store the server name."""
133 socketserver.TCPServer.server_bind(self)
134 host, port = self.socket.getsockname()[:2]
135 self.server_name = socket.getfqdn(host)
136 self.server_port = port
137
138
139class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
140
141 """HTTP request handler base class.
142
143 The following explanation of HTTP serves to guide you through the
144 code as well as to expose any misunderstandings I may have about
145 HTTP (so you don't need to read the code to figure out I'm wrong
146 :-).
147
148 HTTP (HyperText Transfer Protocol) is an extensible protocol on
149 top of a reliable stream transport (e.g. TCP/IP). The protocol
150 recognizes three parts to a request:
151
152 1. One line identifying the request type and path
153 2. An optional set of RFC-822-style headers
154 3. An optional data part
155
156 The headers and data are separated by a blank line.
157
158 The first line of the request has the form
159
160 <command> <path> <version>
161
162 where <command> is a (case-sensitive) keyword such as GET or POST,
163 <path> is a string containing path information for the request,
164 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
165 <path> is encoded using the URL encoding scheme (using %xx to signify
166 the ASCII character with hex code xx).
167
168 The specification specifies that lines are separated by CRLF but
169 for compatibility with the widest range of clients recommends
170 servers also handle LF. Similarly, whitespace in the request line
171 is treated sensibly (allowing multiple spaces between components
172 and allowing trailing whitespace).
173
174 Similarly, for output, lines ought to be separated by CRLF pairs
175 but most clients grok LF characters just fine.
176
177 If the first line of the request has the form
178
179 <command> <path>
180
181 (i.e. <version> is left out) then this is assumed to be an HTTP
182 0.9 request; this form has no optional headers and data part and
183 the reply consists of just the data.
184
185 The reply form of the HTTP 1.x protocol again has three parts:
186
187 1. One line giving the response code
188 2. An optional set of RFC-822-style headers
189 3. The data
190
191 Again, the headers and data are separated by a blank line.
192
193 The response code line has the form
194
195 <version> <responsecode> <responsestring>
196
197 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
198 <responsecode> is a 3-digit response code indicating success or
199 failure of the request, and <responsestring> is an optional
200 human-readable string explaining what the response code means.
201
202 This server parses the request and the headers, and then calls a
203 function specific to the request type (<command>). Specifically,
204 a request SPAM will be handled by a method do_SPAM(). If no
205 such method exists the server sends an error response to the
206 client. If it exists, it is called with no arguments:
207
208 do_SPAM()
209
210 Note that the request name is case sensitive (i.e. SPAM and spam
211 are different requests).
212
213 The various request details are stored in instance variables:
214
215 - client_address is the client IP address in the form (host,
216 port);
217
218 - command, path and version are the broken-down request line;
219
Barry Warsaw820c1202008-06-12 04:06:45 +0000220 - headers is an instance of email.message.Message (or a derived
Georg Brandl24420152008-05-26 16:32:26 +0000221 class) containing the header information;
222
223 - rfile is a file object open for reading positioned at the
224 start of the optional input data part;
225
226 - wfile is a file object open for writing.
227
228 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
229
230 The first thing to be written must be the response line. Then
231 follow 0 or more header lines, then a blank line, and then the
232 actual data (if any). The meaning of the header lines depends on
233 the command executed by the server; in most cases, when data is
234 returned, there should be at least one header line of the form
235
236 Content-type: <type>/<subtype>
237
238 where <type> and <subtype> should be registered MIME types,
239 e.g. "text/html" or "text/plain".
240
241 """
242
243 # The Python system version, truncated to its first component.
244 sys_version = "Python/" + sys.version.split()[0]
245
246 # The server software version. You may want to override this.
247 # The format is multiple whitespace-separated strings,
248 # where each string is of the form name[/version].
249 server_version = "BaseHTTP/" + __version__
250
251 error_message_format = DEFAULT_ERROR_MESSAGE
252 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
253
254 # The default request version. This only affects responses up until
255 # the point where the request line is parsed, so it mainly decides what
256 # the client gets back when sending a malformed request line.
257 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
258 default_request_version = "HTTP/0.9"
259
260 def parse_request(self):
261 """Parse a request (internal).
262
263 The request should be stored in self.raw_requestline; the results
264 are in self.command, self.path, self.request_version and
265 self.headers.
266
267 Return True for success, False for failure; on failure, an
268 error is sent back.
269
270 """
271 self.command = None # set in case of error on the first line
272 self.request_version = version = self.default_request_version
273 self.close_connection = 1
274 requestline = str(self.raw_requestline, 'iso-8859-1')
275 if requestline[-2:] == '\r\n':
276 requestline = requestline[:-2]
277 elif requestline[-1:] == '\n':
278 requestline = requestline[:-1]
279 self.requestline = requestline
280 words = requestline.split()
281 if len(words) == 3:
282 [command, path, version] = words
283 if version[:5] != 'HTTP/':
284 self.send_error(400, "Bad request version (%r)" % version)
285 return False
286 try:
287 base_version_number = version.split('/', 1)[1]
288 version_number = base_version_number.split(".")
289 # RFC 2145 section 3.1 says there can be only one "." and
290 # - major and minor numbers MUST be treated as
291 # separate integers;
292 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
293 # turn is lower than HTTP/12.3;
294 # - Leading zeros MUST be ignored by recipients.
295 if len(version_number) != 2:
296 raise ValueError
297 version_number = int(version_number[0]), int(version_number[1])
298 except (ValueError, IndexError):
299 self.send_error(400, "Bad request version (%r)" % version)
300 return False
301 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
302 self.close_connection = 0
303 if version_number >= (2, 0):
304 self.send_error(505,
305 "Invalid HTTP Version (%s)" % base_version_number)
306 return False
307 elif len(words) == 2:
308 [command, path] = words
309 self.close_connection = 1
310 if command != 'GET':
311 self.send_error(400,
312 "Bad HTTP/0.9 request type (%r)" % command)
313 return False
314 elif not words:
315 return False
316 else:
317 self.send_error(400, "Bad request syntax (%r)" % requestline)
318 return False
319 self.command, self.path, self.request_version = command, path, version
320
321 # Examine the headers and look for a Connection directive.
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000322 try:
323 self.headers = http.client.parse_headers(self.rfile,
324 _class=self.MessageClass)
325 except http.client.LineTooLong:
326 self.send_error(400, "Line too long")
327 return False
Georg Brandl24420152008-05-26 16:32:26 +0000328
329 conntype = self.headers.get('Connection', "")
330 if conntype.lower() == 'close':
331 self.close_connection = 1
332 elif (conntype.lower() == 'keep-alive' and
333 self.protocol_version >= "HTTP/1.1"):
334 self.close_connection = 0
Senthil Kumaran0f476d42010-09-30 06:09:18 +0000335 # Examine the headers and look for an Expect directive
336 expect = self.headers.get('Expect', "")
337 if (expect.lower() == "100-continue" and
338 self.protocol_version >= "HTTP/1.1" and
339 self.request_version >= "HTTP/1.1"):
340 if not self.handle_expect_100():
341 return False
342 return True
343
344 def handle_expect_100(self):
345 """Decide what to do with an "Expect: 100-continue" header.
346
347 If the client is expecting a 100 Continue response, we must
348 respond with either a 100 Continue or a final response before
349 waiting for the request body. The default is to always respond
350 with a 100 Continue. You can behave differently (for example,
351 reject unauthorized requests) by overriding this method.
352
353 This method should either return True (possibly after sending
354 a 100 Continue response) or send an error response and return
355 False.
356
357 """
358 self.send_response_only(100)
Senthil Kumaranc7ae19b2011-05-09 23:25:02 +0800359 self.flush_headers()
Georg Brandl24420152008-05-26 16:32:26 +0000360 return True
361
362 def handle_one_request(self):
363 """Handle a single HTTP request.
364
365 You normally don't need to override this method; see the class
366 __doc__ string for information on how to handle specific HTTP
367 commands such as GET and POST.
368
369 """
Kristján Valur Jónsson985fc6a2009-07-01 10:01:31 +0000370 try:
Antoine Pitrouc4924372010-12-16 16:48:36 +0000371 self.raw_requestline = self.rfile.readline(65537)
372 if len(self.raw_requestline) > 65536:
373 self.requestline = ''
374 self.request_version = ''
375 self.command = ''
376 self.send_error(414)
377 return
Kristján Valur Jónsson985fc6a2009-07-01 10:01:31 +0000378 if not self.raw_requestline:
379 self.close_connection = 1
380 return
381 if not self.parse_request():
382 # An error code has been sent, just exit
383 return
384 mname = 'do_' + self.command
385 if not hasattr(self, mname):
386 self.send_error(501, "Unsupported method (%r)" % self.command)
387 return
388 method = getattr(self, mname)
389 method()
390 self.wfile.flush() #actually send the response if not already done.
391 except socket.timeout as e:
392 #a read or a write timed out. Discard this connection
393 self.log_error("Request timed out: %r", e)
Georg Brandl24420152008-05-26 16:32:26 +0000394 self.close_connection = 1
395 return
Georg Brandl24420152008-05-26 16:32:26 +0000396
397 def handle(self):
398 """Handle multiple requests if necessary."""
399 self.close_connection = 1
400
401 self.handle_one_request()
402 while not self.close_connection:
403 self.handle_one_request()
404
405 def send_error(self, code, message=None):
406 """Send and log an error reply.
407
408 Arguments are the error code, and a detailed message.
409 The detailed message defaults to the short entry matching the
410 response code.
411
412 This sends an error response (so it must be called before any
413 output has been generated), logs the error, and finally sends
414 a piece of HTML explaining the error to the user.
415
416 """
417
418 try:
419 shortmsg, longmsg = self.responses[code]
420 except KeyError:
421 shortmsg, longmsg = '???', '???'
422 if message is None:
423 message = shortmsg
424 explain = longmsg
425 self.log_error("code %d, message %s", code, message)
426 # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
427 content = (self.error_message_format %
428 {'code': code, 'message': _quote_html(message), 'explain': explain})
429 self.send_response(code, message)
430 self.send_header("Content-Type", self.error_content_type)
431 self.send_header('Connection', 'close')
432 self.end_headers()
433 if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
434 self.wfile.write(content.encode('UTF-8', 'replace'))
435
436 def send_response(self, code, message=None):
Senthil Kumaranc7ae19b2011-05-09 23:25:02 +0800437 """Add the response header to the headers buffer and log the
438 response code.
Georg Brandl24420152008-05-26 16:32:26 +0000439
440 Also send two standard headers with the server software
441 version and the current date.
442
443 """
444 self.log_request(code)
Senthil Kumaran0f476d42010-09-30 06:09:18 +0000445 self.send_response_only(code, message)
446 self.send_header('Server', self.version_string())
447 self.send_header('Date', self.date_time_string())
448
449 def send_response_only(self, code, message=None):
450 """Send the response header only."""
Georg Brandl24420152008-05-26 16:32:26 +0000451 if message is None:
452 if code in self.responses:
453 message = self.responses[code][0]
454 else:
455 message = ''
456 if self.request_version != 'HTTP/0.9':
Senthil Kumaranc7ae19b2011-05-09 23:25:02 +0800457 if not hasattr(self, '_headers_buffer'):
458 self._headers_buffer = []
459 self._headers_buffer.append(("%s %d %s\r\n" %
460 (self.protocol_version, code, message)).encode(
461 'latin-1', 'strict'))
Georg Brandl24420152008-05-26 16:32:26 +0000462
463 def send_header(self, keyword, value):
Senthil Kumaranc7ae19b2011-05-09 23:25:02 +0800464 """Send a MIME header to the headers buffer."""
Georg Brandl24420152008-05-26 16:32:26 +0000465 if self.request_version != 'HTTP/0.9':
Senthil Kumarane4dad4f2010-11-21 14:36:14 +0000466 if not hasattr(self, '_headers_buffer'):
467 self._headers_buffer = []
468 self._headers_buffer.append(
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000469 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
Georg Brandl24420152008-05-26 16:32:26 +0000470
471 if keyword.lower() == 'connection':
472 if value.lower() == 'close':
473 self.close_connection = 1
474 elif value.lower() == 'keep-alive':
475 self.close_connection = 0
476
477 def end_headers(self):
478 """Send the blank line ending the MIME headers."""
479 if self.request_version != 'HTTP/0.9':
Senthil Kumarane4dad4f2010-11-21 14:36:14 +0000480 self._headers_buffer.append(b"\r\n")
Senthil Kumaranc7ae19b2011-05-09 23:25:02 +0800481 self.flush_headers()
482
483 def flush_headers(self):
484 if hasattr(self, '_headers_buffer'):
Senthil Kumarane4dad4f2010-11-21 14:36:14 +0000485 self.wfile.write(b"".join(self._headers_buffer))
486 self._headers_buffer = []
Georg Brandl24420152008-05-26 16:32:26 +0000487
488 def log_request(self, code='-', size='-'):
489 """Log an accepted request.
490
491 This is called by send_response().
492
493 """
494
495 self.log_message('"%s" %s %s',
496 self.requestline, str(code), str(size))
497
498 def log_error(self, format, *args):
499 """Log an error.
500
501 This is called when a request cannot be fulfilled. By
502 default it passes the message on to log_message().
503
504 Arguments are the same as for log_message().
505
506 XXX This should go to the separate error log.
507
508 """
509
510 self.log_message(format, *args)
511
512 def log_message(self, format, *args):
513 """Log an arbitrary message.
514
515 This is used by all other logging functions. Override
516 it if you have specific logging wishes.
517
518 The first argument, FORMAT, is a format string for the
519 message to be logged. If the format string contains
520 any % escapes requiring parameters, they should be
521 specified as subsequent arguments (it's just like
522 printf!).
523
524 The client host and current date/time are prefixed to
525 every message.
526
527 """
528
529 sys.stderr.write("%s - - [%s] %s\n" %
530 (self.address_string(),
531 self.log_date_time_string(),
532 format%args))
533
534 def version_string(self):
535 """Return the server software version string."""
536 return self.server_version + ' ' + self.sys_version
537
538 def date_time_string(self, timestamp=None):
539 """Return the current date and time formatted for a message header."""
540 if timestamp is None:
541 timestamp = time.time()
542 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
543 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
544 self.weekdayname[wd],
545 day, self.monthname[month], year,
546 hh, mm, ss)
547 return s
548
549 def log_date_time_string(self):
550 """Return the current time formatted for logging."""
551 now = time.time()
552 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
553 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
554 day, self.monthname[month], year, hh, mm, ss)
555 return s
556
557 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
558
559 monthname = [None,
560 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
561 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
562
563 def address_string(self):
564 """Return the client address formatted for logging.
565
566 This version looks up the full hostname using gethostbyaddr(),
567 and tries to find a name that contains at least one dot.
568
569 """
570
571 host, port = self.client_address[:2]
572 return socket.getfqdn(host)
573
574 # Essentially static class variables
575
576 # The version of the HTTP protocol we support.
577 # Set this to HTTP/1.1 to enable automatic keepalive
578 protocol_version = "HTTP/1.0"
579
Barry Warsaw820c1202008-06-12 04:06:45 +0000580 # MessageClass used to parse headers
Barry Warsaw820c1202008-06-12 04:06:45 +0000581 MessageClass = http.client.HTTPMessage
Georg Brandl24420152008-05-26 16:32:26 +0000582
583 # Table mapping response codes to messages; entries have the
584 # form {code: (shortmessage, longmessage)}.
585 # See RFC 2616.
586 responses = {
587 100: ('Continue', 'Request received, please continue'),
588 101: ('Switching Protocols',
589 'Switching to new protocol; obey Upgrade header'),
590
591 200: ('OK', 'Request fulfilled, document follows'),
592 201: ('Created', 'Document created, URL follows'),
593 202: ('Accepted',
594 'Request accepted, processing continues off-line'),
595 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
596 204: ('No Content', 'Request fulfilled, nothing follows'),
597 205: ('Reset Content', 'Clear input form for further input.'),
598 206: ('Partial Content', 'Partial content follows.'),
599
600 300: ('Multiple Choices',
601 'Object has several resources -- see URI list'),
602 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
603 302: ('Found', 'Object moved temporarily -- see URI list'),
604 303: ('See Other', 'Object moved -- see Method and URL list'),
605 304: ('Not Modified',
606 'Document has not changed since given time'),
607 305: ('Use Proxy',
608 'You must use proxy specified in Location to access this '
609 'resource.'),
610 307: ('Temporary Redirect',
611 'Object moved temporarily -- see URI list'),
612
613 400: ('Bad Request',
614 'Bad request syntax or unsupported method'),
615 401: ('Unauthorized',
616 'No permission -- see authorization schemes'),
617 402: ('Payment Required',
618 'No payment -- see charging schemes'),
619 403: ('Forbidden',
620 'Request forbidden -- authorization will not help'),
621 404: ('Not Found', 'Nothing matches the given URI'),
622 405: ('Method Not Allowed',
Senthil Kumaran7aa26212010-02-22 11:00:50 +0000623 'Specified method is invalid for this resource.'),
Georg Brandl24420152008-05-26 16:32:26 +0000624 406: ('Not Acceptable', 'URI not available in preferred format.'),
625 407: ('Proxy Authentication Required', 'You must authenticate with '
626 'this proxy before proceeding.'),
627 408: ('Request Timeout', 'Request timed out; try again later.'),
628 409: ('Conflict', 'Request conflict.'),
629 410: ('Gone',
630 'URI no longer exists and has been permanently removed.'),
631 411: ('Length Required', 'Client must specify Content-Length.'),
632 412: ('Precondition Failed', 'Precondition in headers is false.'),
633 413: ('Request Entity Too Large', 'Entity is too large.'),
634 414: ('Request-URI Too Long', 'URI is too long.'),
635 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
636 416: ('Requested Range Not Satisfiable',
637 'Cannot satisfy request range.'),
638 417: ('Expectation Failed',
639 'Expect condition could not be satisfied.'),
640
641 500: ('Internal Server Error', 'Server got itself in trouble'),
642 501: ('Not Implemented',
643 'Server does not support this operation'),
644 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
645 503: ('Service Unavailable',
646 'The server cannot process the request due to a high load'),
647 504: ('Gateway Timeout',
648 'The gateway server did not receive a timely response'),
649 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
650 }
651
652
653class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
654
655 """Simple HTTP request handler with GET and HEAD commands.
656
657 This serves files from the current directory and any of its
658 subdirectories. The MIME type for files is determined by
659 calling the .guess_type() method.
660
661 The GET and HEAD requests are identical except that the HEAD
662 request omits the actual contents of the file.
663
664 """
665
666 server_version = "SimpleHTTP/" + __version__
667
668 def do_GET(self):
669 """Serve a GET request."""
670 f = self.send_head()
671 if f:
672 self.copyfile(f, self.wfile)
673 f.close()
674
675 def do_HEAD(self):
676 """Serve a HEAD request."""
677 f = self.send_head()
678 if f:
679 f.close()
680
681 def send_head(self):
682 """Common code for GET and HEAD commands.
683
684 This sends the response code and MIME headers.
685
686 Return value is either a file object (which has to be copied
687 to the outputfile by the caller unless the command was HEAD,
688 and must be closed by the caller under all circumstances), or
689 None, in which case the caller has nothing further to do.
690
691 """
692 path = self.translate_path(self.path)
693 f = None
694 if os.path.isdir(path):
695 if not self.path.endswith('/'):
696 # redirect browser - doing basically what apache does
697 self.send_response(301)
698 self.send_header("Location", self.path + "/")
699 self.end_headers()
700 return None
701 for index in "index.html", "index.htm":
702 index = os.path.join(path, index)
703 if os.path.exists(index):
704 path = index
705 break
706 else:
707 return self.list_directory(path)
708 ctype = self.guess_type(path)
709 try:
710 f = open(path, 'rb')
711 except IOError:
712 self.send_error(404, "File not found")
713 return None
714 self.send_response(200)
715 self.send_header("Content-type", ctype)
716 fs = os.fstat(f.fileno())
717 self.send_header("Content-Length", str(fs[6]))
718 self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
719 self.end_headers()
720 return f
721
722 def list_directory(self, path):
723 """Helper to produce a directory listing (absent index.html).
724
725 Return value is either a file object, or None (indicating an
726 error). In either case, the headers are sent, making the
727 interface the same as for send_head().
728
729 """
730 try:
731 list = os.listdir(path)
732 except os.error:
733 self.send_error(404, "No permission to list directory")
734 return None
735 list.sort(key=lambda a: a.lower())
736 r = []
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000737 displaypath = html.escape(urllib.parse.unquote(self.path))
Ezio Melottica897e92011-11-02 19:33:29 +0200738 enc = sys.getfilesystemencoding()
739 title = 'Directory listing for %s' % displaypath
740 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
741 '"http://www.w3.org/TR/html4/strict.dtd">')
742 r.append('<html>\n<head>')
743 r.append('<meta http-equiv="Content-Type" '
744 'content="text/html; charset=%s">' % enc)
745 r.append('<title>%s</title>\n</head>' % title)
746 r.append('<body>\n<h1>%s</h1>' % title)
747 r.append('<hr>\n<ul>')
Georg Brandl24420152008-05-26 16:32:26 +0000748 for name in list:
749 fullname = os.path.join(path, name)
750 displayname = linkname = name
751 # Append / for directories or @ for symbolic links
752 if os.path.isdir(fullname):
753 displayname = name + "/"
754 linkname = name + "/"
755 if os.path.islink(fullname):
756 displayname = name + "@"
757 # Note: a link to a directory displays with @ and links with /
Ezio Melottica897e92011-11-02 19:33:29 +0200758 r.append('<li><a href="%s">%s</a></li>'
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000759 % (urllib.parse.quote(linkname), html.escape(displayname)))
Ezio Melottica897e92011-11-02 19:33:29 +0200760 r.append('</ul>\n<hr>\n</body>\n</html>\n')
761 encoded = '\n'.join(r).encode(enc)
Georg Brandl24420152008-05-26 16:32:26 +0000762 f = io.BytesIO()
763 f.write(encoded)
764 f.seek(0)
765 self.send_response(200)
766 self.send_header("Content-type", "text/html; charset=%s" % enc)
767 self.send_header("Content-Length", str(len(encoded)))
768 self.end_headers()
769 return f
770
771 def translate_path(self, path):
772 """Translate a /-separated PATH to the local filename syntax.
773
774 Components that mean special things to the local file system
775 (e.g. drive or directory names) are ignored. (XXX They should
776 probably be diagnosed.)
777
778 """
779 # abandon query parameters
780 path = path.split('?',1)[0]
781 path = path.split('#',1)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000782 path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl24420152008-05-26 16:32:26 +0000783 words = path.split('/')
784 words = filter(None, words)
785 path = os.getcwd()
786 for word in words:
787 drive, word = os.path.splitdrive(word)
788 head, word = os.path.split(word)
789 if word in (os.curdir, os.pardir): continue
790 path = os.path.join(path, word)
791 return path
792
793 def copyfile(self, source, outputfile):
794 """Copy all data between two file objects.
795
796 The SOURCE argument is a file object open for reading
797 (or anything with a read() method) and the DESTINATION
798 argument is a file object open for writing (or
799 anything with a write() method).
800
801 The only reason for overriding this would be to change
802 the block size or perhaps to replace newlines by CRLF
803 -- note however that this the default server uses this
804 to copy binary data as well.
805
806 """
807 shutil.copyfileobj(source, outputfile)
808
809 def guess_type(self, path):
810 """Guess the type of a file.
811
812 Argument is a PATH (a filename).
813
814 Return value is a string of the form type/subtype,
815 usable for a MIME Content-type header.
816
817 The default implementation looks the file's extension
818 up in the table self.extensions_map, using application/octet-stream
819 as a default; however it would be permissible (if
820 slow) to look inside the data to make a better guess.
821
822 """
823
824 base, ext = posixpath.splitext(path)
825 if ext in self.extensions_map:
826 return self.extensions_map[ext]
827 ext = ext.lower()
828 if ext in self.extensions_map:
829 return self.extensions_map[ext]
830 else:
831 return self.extensions_map['']
832
833 if not mimetypes.inited:
834 mimetypes.init() # try to read system mime.types
835 extensions_map = mimetypes.types_map.copy()
836 extensions_map.update({
837 '': 'application/octet-stream', # Default
838 '.py': 'text/plain',
839 '.c': 'text/plain',
840 '.h': 'text/plain',
841 })
842
843
844# Utilities for CGIHTTPRequestHandler
845
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000846# TODO(gregory.p.smith): Move this into an appropriate library.
847def _url_collapse_path_split(path):
848 """
849 Given a URL path, remove extra '/'s and '.' path elements and collapse
850 any '..' references.
851
852 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
853
854 Returns: A tuple of (head, tail) where tail is everything after the final /
855 and head is everything before it. Head will always start with a '/' and,
856 if it contains anything else, never have a trailing '/'.
857
858 Raises: IndexError if too many '..' occur within the path.
859 """
860 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
861 # path semantics rather than local operating system semantics.
862 path_parts = []
863 for part in path.split('/'):
864 if part == '.':
865 path_parts.append('')
866 else:
867 path_parts.append(part)
868 # Filter out blank non trailing parts before consuming the '..'.
869 path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
870 if path_parts:
871 tail_part = path_parts.pop()
872 else:
873 tail_part = ''
874 head_parts = []
875 for part in path_parts:
876 if part == '..':
877 head_parts.pop()
878 else:
879 head_parts.append(part)
880 if tail_part and tail_part == '..':
881 head_parts.pop()
882 tail_part = ''
883 return ('/' + '/'.join(head_parts), tail_part)
884
885
Georg Brandl24420152008-05-26 16:32:26 +0000886nobody = None
887
888def nobody_uid():
889 """Internal routine to get nobody's uid"""
890 global nobody
891 if nobody:
892 return nobody
893 try:
894 import pwd
895 except ImportError:
896 return -1
897 try:
898 nobody = pwd.getpwnam('nobody')[2]
899 except KeyError:
Georg Brandlcbd2ab12010-12-04 10:39:14 +0000900 nobody = 1 + max(x[2] for x in pwd.getpwall())
Georg Brandl24420152008-05-26 16:32:26 +0000901 return nobody
902
903
904def executable(path):
905 """Test for executable file."""
Victor Stinnerfb25ba92011-06-20 17:45:54 +0200906 return os.access(path, os.X_OK)
Georg Brandl24420152008-05-26 16:32:26 +0000907
908
909class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
910
911 """Complete HTTP server with GET, HEAD and POST commands.
912
913 GET and HEAD also support running CGI scripts.
914
915 The POST command is *only* implemented for CGI scripts.
916
917 """
918
919 # Determine platform specifics
920 have_fork = hasattr(os, 'fork')
Georg Brandl24420152008-05-26 16:32:26 +0000921
922 # Make rfile unbuffered -- we need to read one line and then pass
923 # the rest to a subprocess, so we can't use buffered input.
924 rbufsize = 0
925
926 def do_POST(self):
927 """Serve a POST request.
928
929 This is only implemented for CGI scripts.
930
931 """
932
933 if self.is_cgi():
934 self.run_cgi()
935 else:
936 self.send_error(501, "Can only POST to CGI scripts")
937
938 def send_head(self):
939 """Version of send_head that support CGI scripts"""
940 if self.is_cgi():
941 return self.run_cgi()
942 else:
943 return SimpleHTTPRequestHandler.send_head(self)
944
945 def is_cgi(self):
946 """Test whether self.path corresponds to a CGI script.
947
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000948 Returns True and updates the cgi_info attribute to the tuple
949 (dir, rest) if self.path requires running a CGI script.
950 Returns False otherwise.
Georg Brandl24420152008-05-26 16:32:26 +0000951
Benjamin Petersona7deeee2009-05-08 20:54:42 +0000952 If any exception is raised, the caller should assume that
953 self.path was rejected as invalid and act accordingly.
954
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000955 The default implementation tests whether the normalized url
956 path begins with one of the strings in self.cgi_directories
957 (and the next character is a '/' or the end of the string).
Georg Brandl24420152008-05-26 16:32:26 +0000958
959 """
960
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000961 splitpath = _url_collapse_path_split(self.path)
962 if splitpath[0] in self.cgi_directories:
963 self.cgi_info = splitpath
964 return True
Georg Brandl24420152008-05-26 16:32:26 +0000965 return False
966
967 cgi_directories = ['/cgi-bin', '/htbin']
968
969 def is_executable(self, path):
970 """Test whether argument path is an executable file."""
971 return executable(path)
972
973 def is_python(self, path):
974 """Test whether argument path is a Python script."""
975 head, tail = os.path.splitext(path)
976 return tail.lower() in (".py", ".pyw")
977
978 def run_cgi(self):
979 """Execute a CGI script."""
980 path = self.path
981 dir, rest = self.cgi_info
982
983 i = path.find('/', len(dir) + 1)
984 while i >= 0:
985 nextdir = path[:i]
986 nextrest = path[i+1:]
987
988 scriptdir = self.translate_path(nextdir)
989 if os.path.isdir(scriptdir):
990 dir, rest = nextdir, nextrest
991 i = path.find('/', len(dir) + 1)
992 else:
993 break
994
995 # find an explicit query string, if present.
996 i = rest.rfind('?')
997 if i >= 0:
998 rest, query = rest[:i], rest[i+1:]
999 else:
1000 query = ''
1001
1002 # dissect the part after the directory name into a script name &
1003 # a possible additional path, to be stored in PATH_INFO.
1004 i = rest.find('/')
1005 if i >= 0:
1006 script, rest = rest[:i], rest[i:]
1007 else:
1008 script, rest = rest, ''
1009
1010 scriptname = dir + '/' + script
1011 scriptfile = self.translate_path(scriptname)
1012 if not os.path.exists(scriptfile):
1013 self.send_error(404, "No such CGI script (%r)" % scriptname)
1014 return
1015 if not os.path.isfile(scriptfile):
1016 self.send_error(403, "CGI script is not a plain file (%r)" %
1017 scriptname)
1018 return
1019 ispy = self.is_python(scriptname)
Victor Stinnerfb25ba92011-06-20 17:45:54 +02001020 if self.have_fork or not ispy:
Georg Brandl24420152008-05-26 16:32:26 +00001021 if not self.is_executable(scriptfile):
1022 self.send_error(403, "CGI script is not executable (%r)" %
1023 scriptname)
1024 return
1025
1026 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1027 # XXX Much of the following could be prepared ahead of time!
Senthil Kumaran42713722010-10-03 17:55:45 +00001028 env = copy.deepcopy(os.environ)
Georg Brandl24420152008-05-26 16:32:26 +00001029 env['SERVER_SOFTWARE'] = self.version_string()
1030 env['SERVER_NAME'] = self.server.server_name
1031 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1032 env['SERVER_PROTOCOL'] = self.protocol_version
1033 env['SERVER_PORT'] = str(self.server.server_port)
1034 env['REQUEST_METHOD'] = self.command
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001035 uqrest = urllib.parse.unquote(rest)
Georg Brandl24420152008-05-26 16:32:26 +00001036 env['PATH_INFO'] = uqrest
1037 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1038 env['SCRIPT_NAME'] = scriptname
1039 if query:
1040 env['QUERY_STRING'] = query
1041 host = self.address_string()
1042 if host != self.client_address[0]:
1043 env['REMOTE_HOST'] = host
1044 env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw820c1202008-06-12 04:06:45 +00001045 authorization = self.headers.get("authorization")
Georg Brandl24420152008-05-26 16:32:26 +00001046 if authorization:
1047 authorization = authorization.split()
1048 if len(authorization) == 2:
1049 import base64, binascii
1050 env['AUTH_TYPE'] = authorization[0]
1051 if authorization[0].lower() == "basic":
1052 try:
1053 authorization = authorization[1].encode('ascii')
Georg Brandl706824f2009-06-04 09:42:55 +00001054 authorization = base64.decodebytes(authorization).\
Georg Brandl24420152008-05-26 16:32:26 +00001055 decode('ascii')
1056 except (binascii.Error, UnicodeError):
1057 pass
1058 else:
1059 authorization = authorization.split(':')
1060 if len(authorization) == 2:
1061 env['REMOTE_USER'] = authorization[0]
1062 # XXX REMOTE_IDENT
Barry Warsaw820c1202008-06-12 04:06:45 +00001063 if self.headers.get('content-type') is None:
1064 env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl24420152008-05-26 16:32:26 +00001065 else:
Barry Warsaw820c1202008-06-12 04:06:45 +00001066 env['CONTENT_TYPE'] = self.headers['content-type']
1067 length = self.headers.get('content-length')
Georg Brandl24420152008-05-26 16:32:26 +00001068 if length:
1069 env['CONTENT_LENGTH'] = length
Barry Warsaw820c1202008-06-12 04:06:45 +00001070 referer = self.headers.get('referer')
Georg Brandl24420152008-05-26 16:32:26 +00001071 if referer:
1072 env['HTTP_REFERER'] = referer
1073 accept = []
1074 for line in self.headers.getallmatchingheaders('accept'):
1075 if line[:1] in "\t\n\r ":
1076 accept.append(line.strip())
1077 else:
1078 accept = accept + line[7:].split(',')
1079 env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw820c1202008-06-12 04:06:45 +00001080 ua = self.headers.get('user-agent')
Georg Brandl24420152008-05-26 16:32:26 +00001081 if ua:
1082 env['HTTP_USER_AGENT'] = ua
Barry Warsaw820c1202008-06-12 04:06:45 +00001083 co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl62e2ca22010-07-31 21:54:24 +00001084 cookie_str = ', '.join(co)
1085 if cookie_str:
1086 env['HTTP_COOKIE'] = cookie_str
Georg Brandl24420152008-05-26 16:32:26 +00001087 # XXX Other HTTP_* headers
1088 # Since we're setting the env in the parent, provide empty
1089 # values to override previously set values
1090 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1091 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1092 env.setdefault(k, "")
Georg Brandl24420152008-05-26 16:32:26 +00001093
1094 self.send_response(200, "Script output follows")
Senthil Kumaranc7ae19b2011-05-09 23:25:02 +08001095 self.flush_headers()
Georg Brandl24420152008-05-26 16:32:26 +00001096
1097 decoded_query = query.replace('+', ' ')
1098
1099 if self.have_fork:
1100 # Unix -- fork as we should
1101 args = [script]
1102 if '=' not in decoded_query:
1103 args.append(decoded_query)
1104 nobody = nobody_uid()
1105 self.wfile.flush() # Always flush before forking
1106 pid = os.fork()
1107 if pid != 0:
1108 # Parent
1109 pid, sts = os.waitpid(pid, 0)
1110 # throw away additional data [see bug #427345]
1111 while select.select([self.rfile], [], [], 0)[0]:
1112 if not self.rfile.read(1):
1113 break
1114 if sts:
1115 self.log_error("CGI script exit status %#x", sts)
1116 return
1117 # Child
1118 try:
1119 try:
1120 os.setuid(nobody)
1121 except os.error:
1122 pass
1123 os.dup2(self.rfile.fileno(), 0)
1124 os.dup2(self.wfile.fileno(), 1)
Senthil Kumaran42713722010-10-03 17:55:45 +00001125 os.execve(scriptfile, args, env)
Georg Brandl24420152008-05-26 16:32:26 +00001126 except:
1127 self.server.handle_error(self.request, self.client_address)
1128 os._exit(127)
1129
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001130 else:
1131 # Non-Unix -- use subprocess
1132 import subprocess
Senthil Kumarane29cd162009-11-11 04:17:53 +00001133 cmdline = [scriptfile]
Georg Brandl24420152008-05-26 16:32:26 +00001134 if self.is_python(scriptfile):
1135 interp = sys.executable
1136 if interp.lower().endswith("w.exe"):
1137 # On Windows, use python.exe, not pythonw.exe
1138 interp = interp[:-5] + interp[-4:]
Senthil Kumarane29cd162009-11-11 04:17:53 +00001139 cmdline = [interp, '-u'] + cmdline
1140 if '=' not in query:
1141 cmdline.append(query)
1142 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Georg Brandl24420152008-05-26 16:32:26 +00001143 try:
1144 nbytes = int(length)
1145 except (TypeError, ValueError):
1146 nbytes = 0
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001147 p = subprocess.Popen(cmdline,
1148 stdin=subprocess.PIPE,
1149 stdout=subprocess.PIPE,
Senthil Kumaran42713722010-10-03 17:55:45 +00001150 stderr=subprocess.PIPE,
1151 env = env
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001152 )
Georg Brandl24420152008-05-26 16:32:26 +00001153 if self.command.lower() == "post" and nbytes > 0:
1154 data = self.rfile.read(nbytes)
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001155 else:
1156 data = None
Georg Brandl24420152008-05-26 16:32:26 +00001157 # throw away additional data [see bug #427345]
1158 while select.select([self.rfile._sock], [], [], 0)[0]:
1159 if not self.rfile._sock.recv(1):
1160 break
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001161 stdout, stderr = p.communicate(data)
1162 self.wfile.write(stdout)
1163 if stderr:
1164 self.log_error('%s', stderr)
Brian Curtincbad4df2010-11-05 15:04:48 +00001165 p.stderr.close()
1166 p.stdout.close()
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001167 status = p.returncode
1168 if status:
1169 self.log_error("CGI script exit status %#x", status)
Georg Brandl24420152008-05-26 16:32:26 +00001170 else:
1171 self.log_message("CGI script exited OK")
1172
1173
1174def test(HandlerClass = BaseHTTPRequestHandler,
1175 ServerClass = HTTPServer, protocol="HTTP/1.0"):
1176 """Test the HTTP request handler class.
1177
1178 This runs an HTTP server on port 8000 (or the first command line
1179 argument).
1180
1181 """
1182
1183 if sys.argv[1:]:
1184 port = int(sys.argv[1])
1185 else:
1186 port = 8000
1187 server_address = ('', port)
1188
1189 HandlerClass.protocol_version = protocol
1190 httpd = ServerClass(server_address, HandlerClass)
1191
1192 sa = httpd.socket.getsockname()
1193 print("Serving HTTP on", sa[0], "port", sa[1], "...")
Alexandre Vassalottib5292a22009-04-03 07:16:55 +00001194 try:
1195 httpd.serve_forever()
1196 except KeyboardInterrupt:
1197 print("\nKeyboard interrupt received, exiting.")
1198 httpd.server_close()
1199 sys.exit(0)
Georg Brandl24420152008-05-26 16:32:26 +00001200
1201if __name__ == '__main__':
Georg Brandl24420152008-05-26 16:32:26 +00001202 test(HandlerClass=SimpleHTTPRequestHandler)