Blame - Lib/http/server.py - platform/external/python/cpython2

blob: 4fa58a259e391d4899fac524d3f144530644831d [file] [log] [blame]

Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1	"""HTTP server classes.
				2
				3	Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
				4	SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
				5	and CGIHTTPRequestHandler for CGI scripts.
				6
				7	It does, however, optionally implement HTTP/1.1 persistent connections,
				8	as of version 0.3.
				9
				10	Notes on CGIHTTPRequestHandler
				11	------------------------------
				12
				13	This class implements GET and POST requests to cgi-bin scripts.
				14
				15	If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	16	subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	17
				18	In all cases, the implementation is intentionally naive -- all
				19	requests are executed synchronously.
				20
				21	SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
				22	-- it may execute arbitrary Python code or external programs.
				23
				24	Note that status code 200 is sent prior to execution of a CGI script, so
				25	scripts cannot send other status codes such as 302 (redirect).
				26
				27	XXX To do:
				28
				29	- log requests even later (to capture byte count)
				30	- log user-agent header and other interesting goodies
				31	- send error log to separate file
				32	"""
				33
				34
				35	# See also:
				36	#
				37	# HTTP Working Group T. Berners-Lee
				38	# INTERNET-DRAFT R. T. Fielding
				39	# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
				40	# Expires September 8, 1995 March 8, 1995
				41	#
				42	# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
				43	#
				44	# and
				45	#
				46	# Network Working Group R. Fielding
				47	# Request for Comments: 2616 et al
				48	# Obsoletes: 2068 June 1999
				49	# Category: Standards Track
				50	#
				51	# URL: http://www.faqs.org/rfcs/rfc2616.html
				52
				53	# Log files
				54	# ---------
				55	#
				56	# Here's a quote from the NCSA httpd docs about log file format.
				57	#
				58	# \| The logfile format is as follows. Each line consists of:
				59	# \|
				60	# \| host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
				61	# \|
				62	# \| host: Either the DNS name or the IP number of the remote client
				63	# \| rfc931: Any information returned by identd for this person,
				64	# \| - otherwise.
				65	# \| authuser: If user sent a userid for authentication, the user name,
				66	# \| - otherwise.
				67	# \| DD: Day
				68	# \| Mon: Month (calendar name)
				69	# \| YYYY: Year
				70	# \| hh: hour (24-hour format, the machine's timezone)
				71	# \| mm: minutes
				72	# \| ss: seconds
				73	# \| request: The first line of the HTTP request as sent by the client.
				74	# \| ddd: the status code returned by the server, - if not available.
				75	# \| bbbb: the total number of bytes sent,
				76	# \| not including the HTTP/1.0 header, - if not available
				77	# \|
				78	# \| You can determine the name of the file accessed through request.
				79	#
				80	# (Actually, the latter is only true if you know the server configuration
				81	# at the time the request was made!)
				82
				83	__version__ = "0.6"
				84
				85	__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
				86
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	87	import cgi
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	88	import email.message
				89	import email.parser
Jeremy Hylton	914ab45	2009-03-27 17:16:06 +0000	[diff] [blame]	90	import http.client
				91	import io
				92	import mimetypes
				93	import os
				94	import posixpath
				95	import select
				96	import shutil
				97	import socket # For gethostbyaddr()
				98	import socketserver
				99	import sys
				100	import time
				101	import urllib.parse
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	102
				103	# Default error message template
				104	DEFAULT_ERROR_MESSAGE = """\
				105	<head>
				106	<title>Error response</title>
				107	</head>
				108	<body>
				109	<h1>Error response</h1>
				110	<p>Error code %(code)d.
				111	<p>Message: %(message)s.
				112	<p>Error code explanation: %(code)s = %(explain)s.
				113	</body>
				114	"""
				115
				116	DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
				117
				118	def _quote_html(html):
				119	return html.replace("&", "&").replace("<", "<").replace(">", ">")
				120
				121	class HTTPServer(socketserver.TCPServer):
				122
				123	allow_reuse_address = 1 # Seems to make sense in testing environment
				124
				125	def server_bind(self):
				126	"""Override server_bind to store the server name."""
				127	socketserver.TCPServer.server_bind(self)
				128	host, port = self.socket.getsockname()[:2]
				129	self.server_name = socket.getfqdn(host)
				130	self.server_port = port
				131
				132
				133	class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
				134
				135	"""HTTP request handler base class.
				136
				137	The following explanation of HTTP serves to guide you through the
				138	code as well as to expose any misunderstandings I may have about
				139	HTTP (so you don't need to read the code to figure out I'm wrong
				140	:-).
				141
				142	HTTP (HyperText Transfer Protocol) is an extensible protocol on
				143	top of a reliable stream transport (e.g. TCP/IP). The protocol
				144	recognizes three parts to a request:
				145
				146	1. One line identifying the request type and path
				147	2. An optional set of RFC-822-style headers
				148	3. An optional data part
				149
				150	The headers and data are separated by a blank line.
				151
				152	The first line of the request has the form
				153
				154	<command> <path> <version>
				155
				156	where <command> is a (case-sensitive) keyword such as GET or POST,
				157	<path> is a string containing path information for the request,
				158	and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
				159	<path> is encoded using the URL encoding scheme (using %xx to signify
				160	the ASCII character with hex code xx).
				161
				162	The specification specifies that lines are separated by CRLF but
				163	for compatibility with the widest range of clients recommends
				164	servers also handle LF. Similarly, whitespace in the request line
				165	is treated sensibly (allowing multiple spaces between components
				166	and allowing trailing whitespace).
				167
				168	Similarly, for output, lines ought to be separated by CRLF pairs
				169	but most clients grok LF characters just fine.
				170
				171	If the first line of the request has the form
				172
				173	<command> <path>
				174
				175	(i.e. <version> is left out) then this is assumed to be an HTTP
				176	0.9 request; this form has no optional headers and data part and
				177	the reply consists of just the data.
				178
				179	The reply form of the HTTP 1.x protocol again has three parts:
				180
				181	1. One line giving the response code
				182	2. An optional set of RFC-822-style headers
				183	3. The data
				184
				185	Again, the headers and data are separated by a blank line.
				186
				187	The response code line has the form
				188
				189	<version> <responsecode> <responsestring>
				190
				191	where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
				192	<responsecode> is a 3-digit response code indicating success or
				193	failure of the request, and <responsestring> is an optional
				194	human-readable string explaining what the response code means.
				195
				196	This server parses the request and the headers, and then calls a
				197	function specific to the request type (<command>). Specifically,
				198	a request SPAM will be handled by a method do_SPAM(). If no
				199	such method exists the server sends an error response to the
				200	client. If it exists, it is called with no arguments:
				201
				202	do_SPAM()
				203
				204	Note that the request name is case sensitive (i.e. SPAM and spam
				205	are different requests).
				206
				207	The various request details are stored in instance variables:
				208
				209	- client_address is the client IP address in the form (host,
				210	port);
				211
				212	- command, path and version are the broken-down request line;
				213
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	214	- headers is an instance of email.message.Message (or a derived
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	215	class) containing the header information;
				216
				217	- rfile is a file object open for reading positioned at the
				218	start of the optional input data part;
				219
				220	- wfile is a file object open for writing.
				221
				222	IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
				223
				224	The first thing to be written must be the response line. Then
				225	follow 0 or more header lines, then a blank line, and then the
				226	actual data (if any). The meaning of the header lines depends on
				227	the command executed by the server; in most cases, when data is
				228	returned, there should be at least one header line of the form
				229
				230	Content-type: <type>/<subtype>
				231
				232	where <type> and <subtype> should be registered MIME types,
				233	e.g. "text/html" or "text/plain".
				234
				235	"""
				236
				237	# The Python system version, truncated to its first component.
				238	sys_version = "Python/" + sys.version.split()[0]
				239
				240	# The server software version. You may want to override this.
				241	# The format is multiple whitespace-separated strings,
				242	# where each string is of the form name[/version].
				243	server_version = "BaseHTTP/" + __version__
				244
				245	error_message_format = DEFAULT_ERROR_MESSAGE
				246	error_content_type = DEFAULT_ERROR_CONTENT_TYPE
				247
				248	# The default request version. This only affects responses up until
				249	# the point where the request line is parsed, so it mainly decides what
				250	# the client gets back when sending a malformed request line.
				251	# Most web servers default to HTTP 0.9, i.e. don't send a status line.
				252	default_request_version = "HTTP/0.9"
				253
				254	def parse_request(self):
				255	"""Parse a request (internal).
				256
				257	The request should be stored in self.raw_requestline; the results
				258	are in self.command, self.path, self.request_version and
				259	self.headers.
				260
				261	Return True for success, False for failure; on failure, an
				262	error is sent back.
				263
				264	"""
				265	self.command = None # set in case of error on the first line
				266	self.request_version = version = self.default_request_version
				267	self.close_connection = 1
				268	requestline = str(self.raw_requestline, 'iso-8859-1')
				269	if requestline[-2:] == '\r\n':
				270	requestline = requestline[:-2]
				271	elif requestline[-1:] == '\n':
				272	requestline = requestline[:-1]
				273	self.requestline = requestline
				274	words = requestline.split()
				275	if len(words) == 3:
				276	[command, path, version] = words
				277	if version[:5] != 'HTTP/':
				278	self.send_error(400, "Bad request version (%r)" % version)
				279	return False
				280	try:
				281	base_version_number = version.split('/', 1)[1]
				282	version_number = base_version_number.split(".")
				283	# RFC 2145 section 3.1 says there can be only one "." and
				284	# - major and minor numbers MUST be treated as
				285	# separate integers;
				286	# - HTTP/2.4 is a lower version than HTTP/2.13, which in
				287	# turn is lower than HTTP/12.3;
				288	# - Leading zeros MUST be ignored by recipients.
				289	if len(version_number) != 2:
				290	raise ValueError
				291	version_number = int(version_number[0]), int(version_number[1])
				292	except (ValueError, IndexError):
				293	self.send_error(400, "Bad request version (%r)" % version)
				294	return False
				295	if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
				296	self.close_connection = 0
				297	if version_number >= (2, 0):
				298	self.send_error(505,
				299	"Invalid HTTP Version (%s)" % base_version_number)
				300	return False
				301	elif len(words) == 2:
				302	[command, path] = words
				303	self.close_connection = 1
				304	if command != 'GET':
				305	self.send_error(400,
				306	"Bad HTTP/0.9 request type (%r)" % command)
				307	return False
				308	elif not words:
				309	return False
				310	else:
				311	self.send_error(400, "Bad request syntax (%r)" % requestline)
				312	return False
				313	self.command, self.path, self.request_version = command, path, version
				314
				315	# Examine the headers and look for a Connection directive.
Jeremy Hylton	98eb6c2	2009-03-27 18:31:36 +0000	[diff] [blame]	316	self.headers = http.client.parse_headers(self.rfile,
				317	_class=self.MessageClass)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	318
				319	conntype = self.headers.get('Connection', "")
				320	if conntype.lower() == 'close':
				321	self.close_connection = 1
				322	elif (conntype.lower() == 'keep-alive' and
				323	self.protocol_version >= "HTTP/1.1"):
				324	self.close_connection = 0
Senthil Kumaran	0f476d4	2010-09-30 06:09:18 +0000	[diff] [blame]	325	# Examine the headers and look for an Expect directive
				326	expect = self.headers.get('Expect', "")
				327	if (expect.lower() == "100-continue" and
				328	self.protocol_version >= "HTTP/1.1" and
				329	self.request_version >= "HTTP/1.1"):
				330	if not self.handle_expect_100():
				331	return False
				332	return True
				333
				334	def handle_expect_100(self):
				335	"""Decide what to do with an "Expect: 100-continue" header.
				336
				337	If the client is expecting a 100 Continue response, we must
				338	respond with either a 100 Continue or a final response before
				339	waiting for the request body. The default is to always respond
				340	with a 100 Continue. You can behave differently (for example,
				341	reject unauthorized requests) by overriding this method.
				342
				343	This method should either return True (possibly after sending
				344	a 100 Continue response) or send an error response and return
				345	False.
				346
				347	"""
				348	self.send_response_only(100)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	349	return True
				350
				351	def handle_one_request(self):
				352	"""Handle a single HTTP request.
				353
				354	You normally don't need to override this method; see the class
				355	__doc__ string for information on how to handle specific HTTP
				356	commands such as GET and POST.
				357
				358	"""
Kristján Valur Jónsson	985fc6a	2009-07-01 10:01:31 +0000	[diff] [blame]	359	try:
				360	self.raw_requestline = self.rfile.readline()
				361	if not self.raw_requestline:
				362	self.close_connection = 1
				363	return
				364	if not self.parse_request():
				365	# An error code has been sent, just exit
				366	return
				367	mname = 'do_' + self.command
				368	if not hasattr(self, mname):
				369	self.send_error(501, "Unsupported method (%r)" % self.command)
				370	return
				371	method = getattr(self, mname)
				372	method()
				373	self.wfile.flush() #actually send the response if not already done.
				374	except socket.timeout as e:
				375	#a read or a write timed out. Discard this connection
				376	self.log_error("Request timed out: %r", e)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	377	self.close_connection = 1
				378	return
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	379
				380	def handle(self):
				381	"""Handle multiple requests if necessary."""
				382	self.close_connection = 1
				383
				384	self.handle_one_request()
				385	while not self.close_connection:
				386	self.handle_one_request()
				387
				388	def send_error(self, code, message=None):
				389	"""Send and log an error reply.
				390
				391	Arguments are the error code, and a detailed message.
				392	The detailed message defaults to the short entry matching the
				393	response code.
				394
				395	This sends an error response (so it must be called before any
				396	output has been generated), logs the error, and finally sends
				397	a piece of HTML explaining the error to the user.
				398
				399	"""
				400
				401	try:
				402	shortmsg, longmsg = self.responses[code]
				403	except KeyError:
				404	shortmsg, longmsg = '???', '???'
				405	if message is None:
				406	message = shortmsg
				407	explain = longmsg
				408	self.log_error("code %d, message %s", code, message)
				409	# using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
				410	content = (self.error_message_format %
				411	{'code': code, 'message': _quote_html(message), 'explain': explain})
				412	self.send_response(code, message)
				413	self.send_header("Content-Type", self.error_content_type)
				414	self.send_header('Connection', 'close')
				415	self.end_headers()
				416	if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
				417	self.wfile.write(content.encode('UTF-8', 'replace'))
				418
				419	def send_response(self, code, message=None):
				420	"""Send the response header and log the response code.
				421
				422	Also send two standard headers with the server software
				423	version and the current date.
				424
				425	"""
				426	self.log_request(code)
Senthil Kumaran	0f476d4	2010-09-30 06:09:18 +0000	[diff] [blame]	427	self.send_response_only(code, message)
				428	self.send_header('Server', self.version_string())
				429	self.send_header('Date', self.date_time_string())
				430
				431	def send_response_only(self, code, message=None):
				432	"""Send the response header only."""
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	433	if message is None:
				434	if code in self.responses:
				435	message = self.responses[code][0]
				436	else:
				437	message = ''
				438	if self.request_version != 'HTTP/0.9':
				439	self.wfile.write(("%s %d %s\r\n" %
				440	(self.protocol_version, code, message)).encode('ASCII', 'strict'))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	441
				442	def send_header(self, keyword, value):
				443	"""Send a MIME header."""
				444	if self.request_version != 'HTTP/0.9':
				445	self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict'))
				446
				447	if keyword.lower() == 'connection':
				448	if value.lower() == 'close':
				449	self.close_connection = 1
				450	elif value.lower() == 'keep-alive':
				451	self.close_connection = 0
				452
				453	def end_headers(self):
				454	"""Send the blank line ending the MIME headers."""
				455	if self.request_version != 'HTTP/0.9':
				456	self.wfile.write(b"\r\n")
				457
				458	def log_request(self, code='-', size='-'):
				459	"""Log an accepted request.
				460
				461	This is called by send_response().
				462
				463	"""
				464
				465	self.log_message('"%s" %s %s',
				466	self.requestline, str(code), str(size))
				467
				468	def log_error(self, format, *args):
				469	"""Log an error.
				470
				471	This is called when a request cannot be fulfilled. By
				472	default it passes the message on to log_message().
				473
				474	Arguments are the same as for log_message().
				475
				476	XXX This should go to the separate error log.
				477
				478	"""
				479
				480	self.log_message(format, *args)
				481
				482	def log_message(self, format, *args):
				483	"""Log an arbitrary message.
				484
				485	This is used by all other logging functions. Override
				486	it if you have specific logging wishes.
				487
				488	The first argument, FORMAT, is a format string for the
				489	message to be logged. If the format string contains
				490	any % escapes requiring parameters, they should be
				491	specified as subsequent arguments (it's just like
				492	printf!).
				493
				494	The client host and current date/time are prefixed to
				495	every message.
				496
				497	"""
				498
				499	sys.stderr.write("%s - - [%s] %s\n" %
				500	(self.address_string(),
				501	self.log_date_time_string(),
				502	format%args))
				503
				504	def version_string(self):
				505	"""Return the server software version string."""
				506	return self.server_version + ' ' + self.sys_version
				507
				508	def date_time_string(self, timestamp=None):
				509	"""Return the current date and time formatted for a message header."""
				510	if timestamp is None:
				511	timestamp = time.time()
				512	year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
				513	s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
				514	self.weekdayname[wd],
				515	day, self.monthname[month], year,
				516	hh, mm, ss)
				517	return s
				518
				519	def log_date_time_string(self):
				520	"""Return the current time formatted for logging."""
				521	now = time.time()
				522	year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
				523	s = "%02d/%3s/%04d %02d:%02d:%02d" % (
				524	day, self.monthname[month], year, hh, mm, ss)
				525	return s
				526
				527	weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
				528
				529	monthname = [None,
				530	'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
				531	'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
				532
				533	def address_string(self):
				534	"""Return the client address formatted for logging.
				535
				536	This version looks up the full hostname using gethostbyaddr(),
				537	and tries to find a name that contains at least one dot.
				538
				539	"""
				540
				541	host, port = self.client_address[:2]
				542	return socket.getfqdn(host)
				543
				544	# Essentially static class variables
				545
				546	# The version of the HTTP protocol we support.
				547	# Set this to HTTP/1.1 to enable automatic keepalive
				548	protocol_version = "HTTP/1.0"
				549
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	550	# MessageClass used to parse headers
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	551	MessageClass = http.client.HTTPMessage
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	552
				553	# Table mapping response codes to messages; entries have the
				554	# form {code: (shortmessage, longmessage)}.
				555	# See RFC 2616.
				556	responses = {
				557	100: ('Continue', 'Request received, please continue'),
				558	101: ('Switching Protocols',
				559	'Switching to new protocol; obey Upgrade header'),
				560
				561	200: ('OK', 'Request fulfilled, document follows'),
				562	201: ('Created', 'Document created, URL follows'),
				563	202: ('Accepted',
				564	'Request accepted, processing continues off-line'),
				565	203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
				566	204: ('No Content', 'Request fulfilled, nothing follows'),
				567	205: ('Reset Content', 'Clear input form for further input.'),
				568	206: ('Partial Content', 'Partial content follows.'),
				569
				570	300: ('Multiple Choices',
				571	'Object has several resources -- see URI list'),
				572	301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
				573	302: ('Found', 'Object moved temporarily -- see URI list'),
				574	303: ('See Other', 'Object moved -- see Method and URL list'),
				575	304: ('Not Modified',
				576	'Document has not changed since given time'),
				577	305: ('Use Proxy',
				578	'You must use proxy specified in Location to access this '
				579	'resource.'),
				580	307: ('Temporary Redirect',
				581	'Object moved temporarily -- see URI list'),
				582
				583	400: ('Bad Request',
				584	'Bad request syntax or unsupported method'),
				585	401: ('Unauthorized',
				586	'No permission -- see authorization schemes'),
				587	402: ('Payment Required',
				588	'No payment -- see charging schemes'),
				589	403: ('Forbidden',
				590	'Request forbidden -- authorization will not help'),
				591	404: ('Not Found', 'Nothing matches the given URI'),
				592	405: ('Method Not Allowed',
Senthil Kumaran	7aa2621	2010-02-22 11:00:50 +0000	[diff] [blame]	593	'Specified method is invalid for this resource.'),
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	594	406: ('Not Acceptable', 'URI not available in preferred format.'),
				595	407: ('Proxy Authentication Required', 'You must authenticate with '
				596	'this proxy before proceeding.'),
				597	408: ('Request Timeout', 'Request timed out; try again later.'),
				598	409: ('Conflict', 'Request conflict.'),
				599	410: ('Gone',
				600	'URI no longer exists and has been permanently removed.'),
				601	411: ('Length Required', 'Client must specify Content-Length.'),
				602	412: ('Precondition Failed', 'Precondition in headers is false.'),
				603	413: ('Request Entity Too Large', 'Entity is too large.'),
				604	414: ('Request-URI Too Long', 'URI is too long.'),
				605	415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
				606	416: ('Requested Range Not Satisfiable',
				607	'Cannot satisfy request range.'),
				608	417: ('Expectation Failed',
				609	'Expect condition could not be satisfied.'),
				610
				611	500: ('Internal Server Error', 'Server got itself in trouble'),
				612	501: ('Not Implemented',
				613	'Server does not support this operation'),
				614	502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
				615	503: ('Service Unavailable',
				616	'The server cannot process the request due to a high load'),
				617	504: ('Gateway Timeout',
				618	'The gateway server did not receive a timely response'),
				619	505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
				620	}
				621
				622
				623	class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
				624
				625	"""Simple HTTP request handler with GET and HEAD commands.
				626
				627	This serves files from the current directory and any of its
				628	subdirectories. The MIME type for files is determined by
				629	calling the .guess_type() method.
				630
				631	The GET and HEAD requests are identical except that the HEAD
				632	request omits the actual contents of the file.
				633
				634	"""
				635
				636	server_version = "SimpleHTTP/" + __version__
				637
				638	def do_GET(self):
				639	"""Serve a GET request."""
				640	f = self.send_head()
				641	if f:
				642	self.copyfile(f, self.wfile)
				643	f.close()
				644
				645	def do_HEAD(self):
				646	"""Serve a HEAD request."""
				647	f = self.send_head()
				648	if f:
				649	f.close()
				650
				651	def send_head(self):
				652	"""Common code for GET and HEAD commands.
				653
				654	This sends the response code and MIME headers.
				655
				656	Return value is either a file object (which has to be copied
				657	to the outputfile by the caller unless the command was HEAD,
				658	and must be closed by the caller under all circumstances), or
				659	None, in which case the caller has nothing further to do.
				660
				661	"""
				662	path = self.translate_path(self.path)
				663	f = None
				664	if os.path.isdir(path):
				665	if not self.path.endswith('/'):
				666	# redirect browser - doing basically what apache does
				667	self.send_response(301)
				668	self.send_header("Location", self.path + "/")
				669	self.end_headers()
				670	return None
				671	for index in "index.html", "index.htm":
				672	index = os.path.join(path, index)
				673	if os.path.exists(index):
				674	path = index
				675	break
				676	else:
				677	return self.list_directory(path)
				678	ctype = self.guess_type(path)
				679	try:
				680	f = open(path, 'rb')
				681	except IOError:
				682	self.send_error(404, "File not found")
				683	return None
				684	self.send_response(200)
				685	self.send_header("Content-type", ctype)
				686	fs = os.fstat(f.fileno())
				687	self.send_header("Content-Length", str(fs[6]))
				688	self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
				689	self.end_headers()
				690	return f
				691
				692	def list_directory(self, path):
				693	"""Helper to produce a directory listing (absent index.html).
				694
				695	Return value is either a file object, or None (indicating an
				696	error). In either case, the headers are sent, making the
				697	interface the same as for send_head().
				698
				699	"""
				700	try:
				701	list = os.listdir(path)
				702	except os.error:
				703	self.send_error(404, "No permission to list directory")
				704	return None
				705	list.sort(key=lambda a: a.lower())
				706	r = []
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	707	displaypath = cgi.escape(urllib.parse.unquote(self.path))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	708	r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
				709	r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
				710	r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
				711	r.append("<hr>\n<ul>\n")
				712	for name in list:
				713	fullname = os.path.join(path, name)
				714	displayname = linkname = name
				715	# Append / for directories or @ for symbolic links
				716	if os.path.isdir(fullname):
				717	displayname = name + "/"
				718	linkname = name + "/"
				719	if os.path.islink(fullname):
				720	displayname = name + "@"
				721	# Note: a link to a directory displays with @ and links with /
				722	r.append('<li><a href="%s">%s</a>\n'
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	723	% (urllib.parse.quote(linkname), cgi.escape(displayname)))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	724	r.append("</ul>\n<hr>\n</body>\n</html>\n")
				725	enc = sys.getfilesystemencoding()
				726	encoded = ''.join(r).encode(enc)
				727	f = io.BytesIO()
				728	f.write(encoded)
				729	f.seek(0)
				730	self.send_response(200)
				731	self.send_header("Content-type", "text/html; charset=%s" % enc)
				732	self.send_header("Content-Length", str(len(encoded)))
				733	self.end_headers()
				734	return f
				735
				736	def translate_path(self, path):
				737	"""Translate a /-separated PATH to the local filename syntax.
				738
				739	Components that mean special things to the local file system
				740	(e.g. drive or directory names) are ignored. (XXX They should
				741	probably be diagnosed.)
				742
				743	"""
				744	# abandon query parameters
				745	path = path.split('?',1)[0]
				746	path = path.split('#',1)[0]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	747	path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	748	words = path.split('/')
				749	words = filter(None, words)
				750	path = os.getcwd()
				751	for word in words:
				752	drive, word = os.path.splitdrive(word)
				753	head, word = os.path.split(word)
				754	if word in (os.curdir, os.pardir): continue
				755	path = os.path.join(path, word)
				756	return path
				757
				758	def copyfile(self, source, outputfile):
				759	"""Copy all data between two file objects.
				760
				761	The SOURCE argument is a file object open for reading
				762	(or anything with a read() method) and the DESTINATION
				763	argument is a file object open for writing (or
				764	anything with a write() method).
				765
				766	The only reason for overriding this would be to change
				767	the block size or perhaps to replace newlines by CRLF
				768	-- note however that this the default server uses this
				769	to copy binary data as well.
				770
				771	"""
				772	shutil.copyfileobj(source, outputfile)
				773
				774	def guess_type(self, path):
				775	"""Guess the type of a file.
				776
				777	Argument is a PATH (a filename).
				778
				779	Return value is a string of the form type/subtype,
				780	usable for a MIME Content-type header.
				781
				782	The default implementation looks the file's extension
				783	up in the table self.extensions_map, using application/octet-stream
				784	as a default; however it would be permissible (if
				785	slow) to look inside the data to make a better guess.
				786
				787	"""
				788
				789	base, ext = posixpath.splitext(path)
				790	if ext in self.extensions_map:
				791	return self.extensions_map[ext]
				792	ext = ext.lower()
				793	if ext in self.extensions_map:
				794	return self.extensions_map[ext]
				795	else:
				796	return self.extensions_map['']
				797
				798	if not mimetypes.inited:
				799	mimetypes.init() # try to read system mime.types
				800	extensions_map = mimetypes.types_map.copy()
				801	extensions_map.update({
				802	'': 'application/octet-stream', # Default
				803	'.py': 'text/plain',
				804	'.c': 'text/plain',
				805	'.h': 'text/plain',
				806	})
				807
				808
				809	# Utilities for CGIHTTPRequestHandler
				810
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	811	# TODO(gregory.p.smith): Move this into an appropriate library.
				812	def _url_collapse_path_split(path):
				813	"""
				814	Given a URL path, remove extra '/'s and '.' path elements and collapse
				815	any '..' references.
				816
				817	Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
				818
				819	Returns: A tuple of (head, tail) where tail is everything after the final /
				820	and head is everything before it. Head will always start with a '/' and,
				821	if it contains anything else, never have a trailing '/'.
				822
				823	Raises: IndexError if too many '..' occur within the path.
				824	"""
				825	# Similar to os.path.split(os.path.normpath(path)) but specific to URL
				826	# path semantics rather than local operating system semantics.
				827	path_parts = []
				828	for part in path.split('/'):
				829	if part == '.':
				830	path_parts.append('')
				831	else:
				832	path_parts.append(part)
				833	# Filter out blank non trailing parts before consuming the '..'.
				834	path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
				835	if path_parts:
				836	tail_part = path_parts.pop()
				837	else:
				838	tail_part = ''
				839	head_parts = []
				840	for part in path_parts:
				841	if part == '..':
				842	head_parts.pop()
				843	else:
				844	head_parts.append(part)
				845	if tail_part and tail_part == '..':
				846	head_parts.pop()
				847	tail_part = ''
				848	return ('/' + '/'.join(head_parts), tail_part)
				849
				850
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	851	nobody = None
				852
				853	def nobody_uid():
				854	"""Internal routine to get nobody's uid"""
				855	global nobody
				856	if nobody:
				857	return nobody
				858	try:
				859	import pwd
				860	except ImportError:
				861	return -1
				862	try:
				863	nobody = pwd.getpwnam('nobody')[2]
				864	except KeyError:
				865	nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
				866	return nobody
				867
				868
				869	def executable(path):
				870	"""Test for executable file."""
				871	try:
				872	st = os.stat(path)
				873	except os.error:
				874	return False
				875	return st.st_mode & 0o111 != 0
				876
				877
				878	class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
				879
				880	"""Complete HTTP server with GET, HEAD and POST commands.
				881
				882	GET and HEAD also support running CGI scripts.
				883
				884	The POST command is only implemented for CGI scripts.
				885
				886	"""
				887
				888	# Determine platform specifics
				889	have_fork = hasattr(os, 'fork')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	890
				891	# Make rfile unbuffered -- we need to read one line and then pass
				892	# the rest to a subprocess, so we can't use buffered input.
				893	rbufsize = 0
				894
				895	def do_POST(self):
				896	"""Serve a POST request.
				897
				898	This is only implemented for CGI scripts.
				899
				900	"""
				901
				902	if self.is_cgi():
				903	self.run_cgi()
				904	else:
				905	self.send_error(501, "Can only POST to CGI scripts")
				906
				907	def send_head(self):
				908	"""Version of send_head that support CGI scripts"""
				909	if self.is_cgi():
				910	return self.run_cgi()
				911	else:
				912	return SimpleHTTPRequestHandler.send_head(self)
				913
				914	def is_cgi(self):
				915	"""Test whether self.path corresponds to a CGI script.
				916
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	917	Returns True and updates the cgi_info attribute to the tuple
				918	(dir, rest) if self.path requires running a CGI script.
				919	Returns False otherwise.
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	920
Benjamin Peterson	a7deeee	2009-05-08 20:54:42 +0000	[diff] [blame]	921	If any exception is raised, the caller should assume that
				922	self.path was rejected as invalid and act accordingly.
				923
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	924	The default implementation tests whether the normalized url
				925	path begins with one of the strings in self.cgi_directories
				926	(and the next character is a '/' or the end of the string).
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	927
				928	"""
				929
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	930	splitpath = _url_collapse_path_split(self.path)
				931	if splitpath[0] in self.cgi_directories:
				932	self.cgi_info = splitpath
				933	return True
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	934	return False
				935
				936	cgi_directories = ['/cgi-bin', '/htbin']
				937
				938	def is_executable(self, path):
				939	"""Test whether argument path is an executable file."""
				940	return executable(path)
				941
				942	def is_python(self, path):
				943	"""Test whether argument path is a Python script."""
				944	head, tail = os.path.splitext(path)
				945	return tail.lower() in (".py", ".pyw")
				946
				947	def run_cgi(self):
				948	"""Execute a CGI script."""
				949	path = self.path
				950	dir, rest = self.cgi_info
				951
				952	i = path.find('/', len(dir) + 1)
				953	while i >= 0:
				954	nextdir = path[:i]
				955	nextrest = path[i+1:]
				956
				957	scriptdir = self.translate_path(nextdir)
				958	if os.path.isdir(scriptdir):
				959	dir, rest = nextdir, nextrest
				960	i = path.find('/', len(dir) + 1)
				961	else:
				962	break
				963
				964	# find an explicit query string, if present.
				965	i = rest.rfind('?')
				966	if i >= 0:
				967	rest, query = rest[:i], rest[i+1:]
				968	else:
				969	query = ''
				970
				971	# dissect the part after the directory name into a script name &
				972	# a possible additional path, to be stored in PATH_INFO.
				973	i = rest.find('/')
				974	if i >= 0:
				975	script, rest = rest[:i], rest[i:]
				976	else:
				977	script, rest = rest, ''
				978
				979	scriptname = dir + '/' + script
				980	scriptfile = self.translate_path(scriptname)
				981	if not os.path.exists(scriptfile):
				982	self.send_error(404, "No such CGI script (%r)" % scriptname)
				983	return
				984	if not os.path.isfile(scriptfile):
				985	self.send_error(403, "CGI script is not a plain file (%r)" %
				986	scriptname)
				987	return
				988	ispy = self.is_python(scriptname)
				989	if not ispy:
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	990	if not self.is_executable(scriptfile):
				991	self.send_error(403, "CGI script is not executable (%r)" %
				992	scriptname)
				993	return
				994
				995	# Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
				996	# XXX Much of the following could be prepared ahead of time!
				997	env = {}
				998	env['SERVER_SOFTWARE'] = self.version_string()
				999	env['SERVER_NAME'] = self.server.server_name
				1000	env['GATEWAY_INTERFACE'] = 'CGI/1.1'
				1001	env['SERVER_PROTOCOL'] = self.protocol_version
				1002	env['SERVER_PORT'] = str(self.server.server_port)
				1003	env['REQUEST_METHOD'] = self.command
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1004	uqrest = urllib.parse.unquote(rest)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1005	env['PATH_INFO'] = uqrest
				1006	env['PATH_TRANSLATED'] = self.translate_path(uqrest)
				1007	env['SCRIPT_NAME'] = scriptname
				1008	if query:
				1009	env['QUERY_STRING'] = query
				1010	host = self.address_string()
				1011	if host != self.client_address[0]:
				1012	env['REMOTE_HOST'] = host
				1013	env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1014	authorization = self.headers.get("authorization")
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1015	if authorization:
				1016	authorization = authorization.split()
				1017	if len(authorization) == 2:
				1018	import base64, binascii
				1019	env['AUTH_TYPE'] = authorization[0]
				1020	if authorization[0].lower() == "basic":
				1021	try:
				1022	authorization = authorization[1].encode('ascii')
Georg Brandl	706824f	2009-06-04 09:42:55 +0000	[diff] [blame]	1023	authorization = base64.decodebytes(authorization).\
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1024	decode('ascii')
				1025	except (binascii.Error, UnicodeError):
				1026	pass
				1027	else:
				1028	authorization = authorization.split(':')
				1029	if len(authorization) == 2:
				1030	env['REMOTE_USER'] = authorization[0]
				1031	# XXX REMOTE_IDENT
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1032	if self.headers.get('content-type') is None:
				1033	env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1034	else:
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1035	env['CONTENT_TYPE'] = self.headers['content-type']
				1036	length = self.headers.get('content-length')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1037	if length:
				1038	env['CONTENT_LENGTH'] = length
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1039	referer = self.headers.get('referer')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1040	if referer:
				1041	env['HTTP_REFERER'] = referer
				1042	accept = []
				1043	for line in self.headers.getallmatchingheaders('accept'):
				1044	if line[:1] in "\t\n\r ":
				1045	accept.append(line.strip())
				1046	else:
				1047	accept = accept + line[7:].split(',')
				1048	env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1049	ua = self.headers.get('user-agent')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1050	if ua:
				1051	env['HTTP_USER_AGENT'] = ua
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1052	co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl	62e2ca2	2010-07-31 21:54:24 +0000	[diff] [blame]	1053	cookie_str = ', '.join(co)
				1054	if cookie_str:
				1055	env['HTTP_COOKIE'] = cookie_str
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1056	# XXX Other HTTP_* headers
				1057	# Since we're setting the env in the parent, provide empty
				1058	# values to override previously set values
				1059	for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
				1060	'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
				1061	env.setdefault(k, "")
				1062	os.environ.update(env)
				1063
				1064	self.send_response(200, "Script output follows")
				1065
				1066	decoded_query = query.replace('+', ' ')
				1067
				1068	if self.have_fork:
				1069	# Unix -- fork as we should
				1070	args = [script]
				1071	if '=' not in decoded_query:
				1072	args.append(decoded_query)
				1073	nobody = nobody_uid()
				1074	self.wfile.flush() # Always flush before forking
				1075	pid = os.fork()
				1076	if pid != 0:
				1077	# Parent
				1078	pid, sts = os.waitpid(pid, 0)
				1079	# throw away additional data [see bug #427345]
				1080	while select.select([self.rfile], [], [], 0)[0]:
				1081	if not self.rfile.read(1):
				1082	break
				1083	if sts:
				1084	self.log_error("CGI script exit status %#x", sts)
				1085	return
				1086	# Child
				1087	try:
				1088	try:
				1089	os.setuid(nobody)
				1090	except os.error:
				1091	pass
				1092	os.dup2(self.rfile.fileno(), 0)
				1093	os.dup2(self.wfile.fileno(), 1)
				1094	os.execve(scriptfile, args, os.environ)
				1095	except:
				1096	self.server.handle_error(self.request, self.client_address)
				1097	os._exit(127)
				1098
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1099	else:
				1100	# Non-Unix -- use subprocess
				1101	import subprocess
Senthil Kumaran	e29cd16	2009-11-11 04:17:53 +0000	[diff] [blame]	1102	cmdline = [scriptfile]
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1103	if self.is_python(scriptfile):
				1104	interp = sys.executable
				1105	if interp.lower().endswith("w.exe"):
				1106	# On Windows, use python.exe, not pythonw.exe
				1107	interp = interp[:-5] + interp[-4:]
Senthil Kumaran	e29cd16	2009-11-11 04:17:53 +0000	[diff] [blame]	1108	cmdline = [interp, '-u'] + cmdline
				1109	if '=' not in query:
				1110	cmdline.append(query)
				1111	self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1112	try:
				1113	nbytes = int(length)
				1114	except (TypeError, ValueError):
				1115	nbytes = 0
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1116	p = subprocess.Popen(cmdline,
				1117	stdin=subprocess.PIPE,
				1118	stdout=subprocess.PIPE,
Senthil Kumaran	e29cd16	2009-11-11 04:17:53 +0000	[diff] [blame]	1119	stderr=subprocess.PIPE
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1120	)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1121	if self.command.lower() == "post" and nbytes > 0:
				1122	data = self.rfile.read(nbytes)
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1123	else:
				1124	data = None
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1125	# throw away additional data [see bug #427345]
				1126	while select.select([self.rfile._sock], [], [], 0)[0]:
				1127	if not self.rfile._sock.recv(1):
				1128	break
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1129	stdout, stderr = p.communicate(data)
				1130	self.wfile.write(stdout)
				1131	if stderr:
				1132	self.log_error('%s', stderr)
				1133	status = p.returncode
				1134	if status:
				1135	self.log_error("CGI script exit status %#x", status)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1136	else:
				1137	self.log_message("CGI script exited OK")
				1138
				1139
				1140	def test(HandlerClass = BaseHTTPRequestHandler,
				1141	ServerClass = HTTPServer, protocol="HTTP/1.0"):
				1142	"""Test the HTTP request handler class.
				1143
				1144	This runs an HTTP server on port 8000 (or the first command line
				1145	argument).
				1146
				1147	"""
				1148
				1149	if sys.argv[1:]:
				1150	port = int(sys.argv[1])
				1151	else:
				1152	port = 8000
				1153	server_address = ('', port)
				1154
				1155	HandlerClass.protocol_version = protocol
				1156	httpd = ServerClass(server_address, HandlerClass)
				1157
				1158	sa = httpd.socket.getsockname()
				1159	print("Serving HTTP on", sa[0], "port", sa[1], "...")
Alexandre Vassalotti	b5292a2	2009-04-03 07:16:55 +0000	[diff] [blame]	1160	try:
				1161	httpd.serve_forever()
				1162	except KeyboardInterrupt:
				1163	print("\nKeyboard interrupt received, exiting.")
				1164	httpd.server_close()
				1165	sys.exit(0)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1166
				1167	if __name__ == '__main__':
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1168	test(HandlerClass=SimpleHTTPRequestHandler)