Blame - Lib/http/server.py - platform/external/python/cpython3

blob: 5726017b38c93a858679a62dbc00e87c1d037daa [file] [log] [blame]

Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1	"""HTTP server classes.
				2
				3	Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
				4	SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
				5	and CGIHTTPRequestHandler for CGI scripts.
				6
				7	It does, however, optionally implement HTTP/1.1 persistent connections,
				8	as of version 0.3.
				9
				10	Notes on CGIHTTPRequestHandler
				11	------------------------------
				12
				13	This class implements GET and POST requests to cgi-bin scripts.
				14
				15	If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	16	subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	17
				18	In all cases, the implementation is intentionally naive -- all
				19	requests are executed synchronously.
				20
				21	SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
				22	-- it may execute arbitrary Python code or external programs.
				23
				24	Note that status code 200 is sent prior to execution of a CGI script, so
				25	scripts cannot send other status codes such as 302 (redirect).
				26
				27	XXX To do:
				28
				29	- log requests even later (to capture byte count)
				30	- log user-agent header and other interesting goodies
				31	- send error log to separate file
				32	"""
				33
				34
				35	# See also:
				36	#
				37	# HTTP Working Group T. Berners-Lee
				38	# INTERNET-DRAFT R. T. Fielding
				39	# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
				40	# Expires September 8, 1995 March 8, 1995
				41	#
				42	# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
				43	#
				44	# and
				45	#
				46	# Network Working Group R. Fielding
				47	# Request for Comments: 2616 et al
				48	# Obsoletes: 2068 June 1999
				49	# Category: Standards Track
				50	#
				51	# URL: http://www.faqs.org/rfcs/rfc2616.html
				52
				53	# Log files
				54	# ---------
				55	#
				56	# Here's a quote from the NCSA httpd docs about log file format.
				57	#
				58	# \| The logfile format is as follows. Each line consists of:
				59	# \|
				60	# \| host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
				61	# \|
				62	# \| host: Either the DNS name or the IP number of the remote client
				63	# \| rfc931: Any information returned by identd for this person,
				64	# \| - otherwise.
				65	# \| authuser: If user sent a userid for authentication, the user name,
				66	# \| - otherwise.
				67	# \| DD: Day
				68	# \| Mon: Month (calendar name)
				69	# \| YYYY: Year
				70	# \| hh: hour (24-hour format, the machine's timezone)
				71	# \| mm: minutes
				72	# \| ss: seconds
				73	# \| request: The first line of the HTTP request as sent by the client.
				74	# \| ddd: the status code returned by the server, - if not available.
				75	# \| bbbb: the total number of bytes sent,
				76	# \| not including the HTTP/1.0 header, - if not available
				77	# \|
				78	# \| You can determine the name of the file accessed through request.
				79	#
				80	# (Actually, the latter is only true if you know the server configuration
				81	# at the time the request was made!)
				82
				83	__version__ = "0.6"
				84
				85	__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
				86
				87	import io
				88	import os
				89	import sys
				90	import cgi
				91	import time
				92	import socket # For gethostbyaddr()
				93	import shutil
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	94	import urllib.parse
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	95	import select
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	96	import mimetypes
				97	import posixpath
				98	import socketserver
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	99	import email.message
				100	import email.parser
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	101
				102	# Default error message template
				103	DEFAULT_ERROR_MESSAGE = """\
				104	<head>
				105	<title>Error response</title>
				106	</head>
				107	<body>
				108	<h1>Error response</h1>
				109	<p>Error code %(code)d.
				110	<p>Message: %(message)s.
				111	<p>Error code explanation: %(code)s = %(explain)s.
				112	</body>
				113	"""
				114
				115	DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
				116
				117	def _quote_html(html):
				118	return html.replace("&", "&").replace("<", "<").replace(">", ">")
				119
				120	class HTTPServer(socketserver.TCPServer):
				121
				122	allow_reuse_address = 1 # Seems to make sense in testing environment
				123
				124	def server_bind(self):
				125	"""Override server_bind to store the server name."""
				126	socketserver.TCPServer.server_bind(self)
				127	host, port = self.socket.getsockname()[:2]
				128	self.server_name = socket.getfqdn(host)
				129	self.server_port = port
				130
				131
				132	class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
				133
				134	"""HTTP request handler base class.
				135
				136	The following explanation of HTTP serves to guide you through the
				137	code as well as to expose any misunderstandings I may have about
				138	HTTP (so you don't need to read the code to figure out I'm wrong
				139	:-).
				140
				141	HTTP (HyperText Transfer Protocol) is an extensible protocol on
				142	top of a reliable stream transport (e.g. TCP/IP). The protocol
				143	recognizes three parts to a request:
				144
				145	1. One line identifying the request type and path
				146	2. An optional set of RFC-822-style headers
				147	3. An optional data part
				148
				149	The headers and data are separated by a blank line.
				150
				151	The first line of the request has the form
				152
				153	<command> <path> <version>
				154
				155	where <command> is a (case-sensitive) keyword such as GET or POST,
				156	<path> is a string containing path information for the request,
				157	and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
				158	<path> is encoded using the URL encoding scheme (using %xx to signify
				159	the ASCII character with hex code xx).
				160
				161	The specification specifies that lines are separated by CRLF but
				162	for compatibility with the widest range of clients recommends
				163	servers also handle LF. Similarly, whitespace in the request line
				164	is treated sensibly (allowing multiple spaces between components
				165	and allowing trailing whitespace).
				166
				167	Similarly, for output, lines ought to be separated by CRLF pairs
				168	but most clients grok LF characters just fine.
				169
				170	If the first line of the request has the form
				171
				172	<command> <path>
				173
				174	(i.e. <version> is left out) then this is assumed to be an HTTP
				175	0.9 request; this form has no optional headers and data part and
				176	the reply consists of just the data.
				177
				178	The reply form of the HTTP 1.x protocol again has three parts:
				179
				180	1. One line giving the response code
				181	2. An optional set of RFC-822-style headers
				182	3. The data
				183
				184	Again, the headers and data are separated by a blank line.
				185
				186	The response code line has the form
				187
				188	<version> <responsecode> <responsestring>
				189
				190	where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
				191	<responsecode> is a 3-digit response code indicating success or
				192	failure of the request, and <responsestring> is an optional
				193	human-readable string explaining what the response code means.
				194
				195	This server parses the request and the headers, and then calls a
				196	function specific to the request type (<command>). Specifically,
				197	a request SPAM will be handled by a method do_SPAM(). If no
				198	such method exists the server sends an error response to the
				199	client. If it exists, it is called with no arguments:
				200
				201	do_SPAM()
				202
				203	Note that the request name is case sensitive (i.e. SPAM and spam
				204	are different requests).
				205
				206	The various request details are stored in instance variables:
				207
				208	- client_address is the client IP address in the form (host,
				209	port);
				210
				211	- command, path and version are the broken-down request line;
				212
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	213	- headers is an instance of email.message.Message (or a derived
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	214	class) containing the header information;
				215
				216	- rfile is a file object open for reading positioned at the
				217	start of the optional input data part;
				218
				219	- wfile is a file object open for writing.
				220
				221	IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
				222
				223	The first thing to be written must be the response line. Then
				224	follow 0 or more header lines, then a blank line, and then the
				225	actual data (if any). The meaning of the header lines depends on
				226	the command executed by the server; in most cases, when data is
				227	returned, there should be at least one header line of the form
				228
				229	Content-type: <type>/<subtype>
				230
				231	where <type> and <subtype> should be registered MIME types,
				232	e.g. "text/html" or "text/plain".
				233
				234	"""
				235
				236	# The Python system version, truncated to its first component.
				237	sys_version = "Python/" + sys.version.split()[0]
				238
				239	# The server software version. You may want to override this.
				240	# The format is multiple whitespace-separated strings,
				241	# where each string is of the form name[/version].
				242	server_version = "BaseHTTP/" + __version__
				243
				244	error_message_format = DEFAULT_ERROR_MESSAGE
				245	error_content_type = DEFAULT_ERROR_CONTENT_TYPE
				246
				247	# The default request version. This only affects responses up until
				248	# the point where the request line is parsed, so it mainly decides what
				249	# the client gets back when sending a malformed request line.
				250	# Most web servers default to HTTP 0.9, i.e. don't send a status line.
				251	default_request_version = "HTTP/0.9"
				252
				253	def parse_request(self):
				254	"""Parse a request (internal).
				255
				256	The request should be stored in self.raw_requestline; the results
				257	are in self.command, self.path, self.request_version and
				258	self.headers.
				259
				260	Return True for success, False for failure; on failure, an
				261	error is sent back.
				262
				263	"""
				264	self.command = None # set in case of error on the first line
				265	self.request_version = version = self.default_request_version
				266	self.close_connection = 1
				267	requestline = str(self.raw_requestline, 'iso-8859-1')
				268	if requestline[-2:] == '\r\n':
				269	requestline = requestline[:-2]
				270	elif requestline[-1:] == '\n':
				271	requestline = requestline[:-1]
				272	self.requestline = requestline
				273	words = requestline.split()
				274	if len(words) == 3:
				275	[command, path, version] = words
				276	if version[:5] != 'HTTP/':
				277	self.send_error(400, "Bad request version (%r)" % version)
				278	return False
				279	try:
				280	base_version_number = version.split('/', 1)[1]
				281	version_number = base_version_number.split(".")
				282	# RFC 2145 section 3.1 says there can be only one "." and
				283	# - major and minor numbers MUST be treated as
				284	# separate integers;
				285	# - HTTP/2.4 is a lower version than HTTP/2.13, which in
				286	# turn is lower than HTTP/12.3;
				287	# - Leading zeros MUST be ignored by recipients.
				288	if len(version_number) != 2:
				289	raise ValueError
				290	version_number = int(version_number[0]), int(version_number[1])
				291	except (ValueError, IndexError):
				292	self.send_error(400, "Bad request version (%r)" % version)
				293	return False
				294	if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
				295	self.close_connection = 0
				296	if version_number >= (2, 0):
				297	self.send_error(505,
				298	"Invalid HTTP Version (%s)" % base_version_number)
				299	return False
				300	elif len(words) == 2:
				301	[command, path] = words
				302	self.close_connection = 1
				303	if command != 'GET':
				304	self.send_error(400,
				305	"Bad HTTP/0.9 request type (%r)" % command)
				306	return False
				307	elif not words:
				308	return False
				309	else:
				310	self.send_error(400, "Bad request syntax (%r)" % requestline)
				311	return False
				312	self.command, self.path, self.request_version = command, path, version
				313
				314	# Examine the headers and look for a Connection directive.
				315
Georg Brandl	9f0f960	2008-06-12 22:23:59 +0000	[diff] [blame]	316	# MessageClass wants to see strings rather than bytes.
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	317	# But a TextIOWrapper around self.rfile would buffer too many bytes
				318	# from the stream, bytes which we later need to read as bytes.
				319	# So we read the correct bytes here, as bytes, then use StringIO
				320	# to make them look like strings for MessageClass to parse.
				321	headers = []
				322	while True:
				323	line = self.rfile.readline()
				324	headers.append(line)
				325	if line in (b'\r\n', b'\n', b''):
				326	break
				327	hfile = io.StringIO(b''.join(headers).decode('iso-8859-1'))
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	328	self.headers = email.parser.Parser(_class=self.MessageClass).parse(hfile)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	329
				330	conntype = self.headers.get('Connection', "")
				331	if conntype.lower() == 'close':
				332	self.close_connection = 1
				333	elif (conntype.lower() == 'keep-alive' and
				334	self.protocol_version >= "HTTP/1.1"):
				335	self.close_connection = 0
				336	return True
				337
				338	def handle_one_request(self):
				339	"""Handle a single HTTP request.
				340
				341	You normally don't need to override this method; see the class
				342	__doc__ string for information on how to handle specific HTTP
				343	commands such as GET and POST.
				344
				345	"""
				346	self.raw_requestline = self.rfile.readline()
				347	if not self.raw_requestline:
				348	self.close_connection = 1
				349	return
				350	if not self.parse_request(): # An error code has been sent, just exit
				351	return
				352	mname = 'do_' + self.command
				353	if not hasattr(self, mname):
				354	self.send_error(501, "Unsupported method (%r)" % self.command)
				355	return
				356	method = getattr(self, mname)
				357	method()
				358
				359	def handle(self):
				360	"""Handle multiple requests if necessary."""
				361	self.close_connection = 1
				362
				363	self.handle_one_request()
				364	while not self.close_connection:
				365	self.handle_one_request()
				366
				367	def send_error(self, code, message=None):
				368	"""Send and log an error reply.
				369
				370	Arguments are the error code, and a detailed message.
				371	The detailed message defaults to the short entry matching the
				372	response code.
				373
				374	This sends an error response (so it must be called before any
				375	output has been generated), logs the error, and finally sends
				376	a piece of HTML explaining the error to the user.
				377
				378	"""
				379
				380	try:
				381	shortmsg, longmsg = self.responses[code]
				382	except KeyError:
				383	shortmsg, longmsg = '???', '???'
				384	if message is None:
				385	message = shortmsg
				386	explain = longmsg
				387	self.log_error("code %d, message %s", code, message)
				388	# using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
				389	content = (self.error_message_format %
				390	{'code': code, 'message': _quote_html(message), 'explain': explain})
				391	self.send_response(code, message)
				392	self.send_header("Content-Type", self.error_content_type)
				393	self.send_header('Connection', 'close')
				394	self.end_headers()
				395	if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
				396	self.wfile.write(content.encode('UTF-8', 'replace'))
				397
				398	def send_response(self, code, message=None):
				399	"""Send the response header and log the response code.
				400
				401	Also send two standard headers with the server software
				402	version and the current date.
				403
				404	"""
				405	self.log_request(code)
				406	if message is None:
				407	if code in self.responses:
				408	message = self.responses[code][0]
				409	else:
				410	message = ''
				411	if self.request_version != 'HTTP/0.9':
				412	self.wfile.write(("%s %d %s\r\n" %
				413	(self.protocol_version, code, message)).encode('ASCII', 'strict'))
				414	# print (self.protocol_version, code, message)
				415	self.send_header('Server', self.version_string())
				416	self.send_header('Date', self.date_time_string())
				417
				418	def send_header(self, keyword, value):
				419	"""Send a MIME header."""
				420	if self.request_version != 'HTTP/0.9':
				421	self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict'))
				422
				423	if keyword.lower() == 'connection':
				424	if value.lower() == 'close':
				425	self.close_connection = 1
				426	elif value.lower() == 'keep-alive':
				427	self.close_connection = 0
				428
				429	def end_headers(self):
				430	"""Send the blank line ending the MIME headers."""
				431	if self.request_version != 'HTTP/0.9':
				432	self.wfile.write(b"\r\n")
				433
				434	def log_request(self, code='-', size='-'):
				435	"""Log an accepted request.
				436
				437	This is called by send_response().
				438
				439	"""
				440
				441	self.log_message('"%s" %s %s',
				442	self.requestline, str(code), str(size))
				443
				444	def log_error(self, format, *args):
				445	"""Log an error.
				446
				447	This is called when a request cannot be fulfilled. By
				448	default it passes the message on to log_message().
				449
				450	Arguments are the same as for log_message().
				451
				452	XXX This should go to the separate error log.
				453
				454	"""
				455
				456	self.log_message(format, *args)
				457
				458	def log_message(self, format, *args):
				459	"""Log an arbitrary message.
				460
				461	This is used by all other logging functions. Override
				462	it if you have specific logging wishes.
				463
				464	The first argument, FORMAT, is a format string for the
				465	message to be logged. If the format string contains
				466	any % escapes requiring parameters, they should be
				467	specified as subsequent arguments (it's just like
				468	printf!).
				469
				470	The client host and current date/time are prefixed to
				471	every message.
				472
				473	"""
				474
				475	sys.stderr.write("%s - - [%s] %s\n" %
				476	(self.address_string(),
				477	self.log_date_time_string(),
				478	format%args))
				479
				480	def version_string(self):
				481	"""Return the server software version string."""
				482	return self.server_version + ' ' + self.sys_version
				483
				484	def date_time_string(self, timestamp=None):
				485	"""Return the current date and time formatted for a message header."""
				486	if timestamp is None:
				487	timestamp = time.time()
				488	year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
				489	s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
				490	self.weekdayname[wd],
				491	day, self.monthname[month], year,
				492	hh, mm, ss)
				493	return s
				494
				495	def log_date_time_string(self):
				496	"""Return the current time formatted for logging."""
				497	now = time.time()
				498	year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
				499	s = "%02d/%3s/%04d %02d:%02d:%02d" % (
				500	day, self.monthname[month], year, hh, mm, ss)
				501	return s
				502
				503	weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
				504
				505	monthname = [None,
				506	'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
				507	'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
				508
				509	def address_string(self):
				510	"""Return the client address formatted for logging.
				511
				512	This version looks up the full hostname using gethostbyaddr(),
				513	and tries to find a name that contains at least one dot.
				514
				515	"""
				516
				517	host, port = self.client_address[:2]
				518	return socket.getfqdn(host)
				519
				520	# Essentially static class variables
				521
				522	# The version of the HTTP protocol we support.
				523	# Set this to HTTP/1.1 to enable automatic keepalive
				524	protocol_version = "HTTP/1.0"
				525
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	526	# MessageClass used to parse headers
				527	import http.client
				528	MessageClass = http.client.HTTPMessage
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	529
				530	# Table mapping response codes to messages; entries have the
				531	# form {code: (shortmessage, longmessage)}.
				532	# See RFC 2616.
				533	responses = {
				534	100: ('Continue', 'Request received, please continue'),
				535	101: ('Switching Protocols',
				536	'Switching to new protocol; obey Upgrade header'),
				537
				538	200: ('OK', 'Request fulfilled, document follows'),
				539	201: ('Created', 'Document created, URL follows'),
				540	202: ('Accepted',
				541	'Request accepted, processing continues off-line'),
				542	203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
				543	204: ('No Content', 'Request fulfilled, nothing follows'),
				544	205: ('Reset Content', 'Clear input form for further input.'),
				545	206: ('Partial Content', 'Partial content follows.'),
				546
				547	300: ('Multiple Choices',
				548	'Object has several resources -- see URI list'),
				549	301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
				550	302: ('Found', 'Object moved temporarily -- see URI list'),
				551	303: ('See Other', 'Object moved -- see Method and URL list'),
				552	304: ('Not Modified',
				553	'Document has not changed since given time'),
				554	305: ('Use Proxy',
				555	'You must use proxy specified in Location to access this '
				556	'resource.'),
				557	307: ('Temporary Redirect',
				558	'Object moved temporarily -- see URI list'),
				559
				560	400: ('Bad Request',
				561	'Bad request syntax or unsupported method'),
				562	401: ('Unauthorized',
				563	'No permission -- see authorization schemes'),
				564	402: ('Payment Required',
				565	'No payment -- see charging schemes'),
				566	403: ('Forbidden',
				567	'Request forbidden -- authorization will not help'),
				568	404: ('Not Found', 'Nothing matches the given URI'),
				569	405: ('Method Not Allowed',
				570	'Specified method is invalid for this server.'),
				571	406: ('Not Acceptable', 'URI not available in preferred format.'),
				572	407: ('Proxy Authentication Required', 'You must authenticate with '
				573	'this proxy before proceeding.'),
				574	408: ('Request Timeout', 'Request timed out; try again later.'),
				575	409: ('Conflict', 'Request conflict.'),
				576	410: ('Gone',
				577	'URI no longer exists and has been permanently removed.'),
				578	411: ('Length Required', 'Client must specify Content-Length.'),
				579	412: ('Precondition Failed', 'Precondition in headers is false.'),
				580	413: ('Request Entity Too Large', 'Entity is too large.'),
				581	414: ('Request-URI Too Long', 'URI is too long.'),
				582	415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
				583	416: ('Requested Range Not Satisfiable',
				584	'Cannot satisfy request range.'),
				585	417: ('Expectation Failed',
				586	'Expect condition could not be satisfied.'),
				587
				588	500: ('Internal Server Error', 'Server got itself in trouble'),
				589	501: ('Not Implemented',
				590	'Server does not support this operation'),
				591	502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
				592	503: ('Service Unavailable',
				593	'The server cannot process the request due to a high load'),
				594	504: ('Gateway Timeout',
				595	'The gateway server did not receive a timely response'),
				596	505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
				597	}
				598
				599
				600	class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
				601
				602	"""Simple HTTP request handler with GET and HEAD commands.
				603
				604	This serves files from the current directory and any of its
				605	subdirectories. The MIME type for files is determined by
				606	calling the .guess_type() method.
				607
				608	The GET and HEAD requests are identical except that the HEAD
				609	request omits the actual contents of the file.
				610
				611	"""
				612
				613	server_version = "SimpleHTTP/" + __version__
				614
				615	def do_GET(self):
				616	"""Serve a GET request."""
				617	f = self.send_head()
				618	if f:
				619	self.copyfile(f, self.wfile)
				620	f.close()
				621
				622	def do_HEAD(self):
				623	"""Serve a HEAD request."""
				624	f = self.send_head()
				625	if f:
				626	f.close()
				627
				628	def send_head(self):
				629	"""Common code for GET and HEAD commands.
				630
				631	This sends the response code and MIME headers.
				632
				633	Return value is either a file object (which has to be copied
				634	to the outputfile by the caller unless the command was HEAD,
				635	and must be closed by the caller under all circumstances), or
				636	None, in which case the caller has nothing further to do.
				637
				638	"""
				639	path = self.translate_path(self.path)
				640	f = None
				641	if os.path.isdir(path):
				642	if not self.path.endswith('/'):
				643	# redirect browser - doing basically what apache does
				644	self.send_response(301)
				645	self.send_header("Location", self.path + "/")
				646	self.end_headers()
				647	return None
				648	for index in "index.html", "index.htm":
				649	index = os.path.join(path, index)
				650	if os.path.exists(index):
				651	path = index
				652	break
				653	else:
				654	return self.list_directory(path)
				655	ctype = self.guess_type(path)
				656	try:
				657	f = open(path, 'rb')
				658	except IOError:
				659	self.send_error(404, "File not found")
				660	return None
				661	self.send_response(200)
				662	self.send_header("Content-type", ctype)
				663	fs = os.fstat(f.fileno())
				664	self.send_header("Content-Length", str(fs[6]))
				665	self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
				666	self.end_headers()
				667	return f
				668
				669	def list_directory(self, path):
				670	"""Helper to produce a directory listing (absent index.html).
				671
				672	Return value is either a file object, or None (indicating an
				673	error). In either case, the headers are sent, making the
				674	interface the same as for send_head().
				675
				676	"""
				677	try:
				678	list = os.listdir(path)
				679	except os.error:
				680	self.send_error(404, "No permission to list directory")
				681	return None
				682	list.sort(key=lambda a: a.lower())
				683	r = []
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	684	displaypath = cgi.escape(urllib.parse.unquote(self.path))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	685	r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
				686	r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
				687	r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
				688	r.append("<hr>\n<ul>\n")
				689	for name in list:
				690	fullname = os.path.join(path, name)
				691	displayname = linkname = name
				692	# Append / for directories or @ for symbolic links
				693	if os.path.isdir(fullname):
				694	displayname = name + "/"
				695	linkname = name + "/"
				696	if os.path.islink(fullname):
				697	displayname = name + "@"
				698	# Note: a link to a directory displays with @ and links with /
				699	r.append('<li><a href="%s">%s</a>\n'
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	700	% (urllib.parse.quote(linkname), cgi.escape(displayname)))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	701	r.append("</ul>\n<hr>\n</body>\n</html>\n")
				702	enc = sys.getfilesystemencoding()
				703	encoded = ''.join(r).encode(enc)
				704	f = io.BytesIO()
				705	f.write(encoded)
				706	f.seek(0)
				707	self.send_response(200)
				708	self.send_header("Content-type", "text/html; charset=%s" % enc)
				709	self.send_header("Content-Length", str(len(encoded)))
				710	self.end_headers()
				711	return f
				712
				713	def translate_path(self, path):
				714	"""Translate a /-separated PATH to the local filename syntax.
				715
				716	Components that mean special things to the local file system
				717	(e.g. drive or directory names) are ignored. (XXX They should
				718	probably be diagnosed.)
				719
				720	"""
				721	# abandon query parameters
				722	path = path.split('?',1)[0]
				723	path = path.split('#',1)[0]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	724	path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	725	words = path.split('/')
				726	words = filter(None, words)
				727	path = os.getcwd()
				728	for word in words:
				729	drive, word = os.path.splitdrive(word)
				730	head, word = os.path.split(word)
				731	if word in (os.curdir, os.pardir): continue
				732	path = os.path.join(path, word)
				733	return path
				734
				735	def copyfile(self, source, outputfile):
				736	"""Copy all data between two file objects.
				737
				738	The SOURCE argument is a file object open for reading
				739	(or anything with a read() method) and the DESTINATION
				740	argument is a file object open for writing (or
				741	anything with a write() method).
				742
				743	The only reason for overriding this would be to change
				744	the block size or perhaps to replace newlines by CRLF
				745	-- note however that this the default server uses this
				746	to copy binary data as well.
				747
				748	"""
				749	shutil.copyfileobj(source, outputfile)
				750
				751	def guess_type(self, path):
				752	"""Guess the type of a file.
				753
				754	Argument is a PATH (a filename).
				755
				756	Return value is a string of the form type/subtype,
				757	usable for a MIME Content-type header.
				758
				759	The default implementation looks the file's extension
				760	up in the table self.extensions_map, using application/octet-stream
				761	as a default; however it would be permissible (if
				762	slow) to look inside the data to make a better guess.
				763
				764	"""
				765
				766	base, ext = posixpath.splitext(path)
				767	if ext in self.extensions_map:
				768	return self.extensions_map[ext]
				769	ext = ext.lower()
				770	if ext in self.extensions_map:
				771	return self.extensions_map[ext]
				772	else:
				773	return self.extensions_map['']
				774
				775	if not mimetypes.inited:
				776	mimetypes.init() # try to read system mime.types
				777	extensions_map = mimetypes.types_map.copy()
				778	extensions_map.update({
				779	'': 'application/octet-stream', # Default
				780	'.py': 'text/plain',
				781	'.c': 'text/plain',
				782	'.h': 'text/plain',
				783	})
				784
				785
				786	# Utilities for CGIHTTPRequestHandler
				787
				788	nobody = None
				789
				790	def nobody_uid():
				791	"""Internal routine to get nobody's uid"""
				792	global nobody
				793	if nobody:
				794	return nobody
				795	try:
				796	import pwd
				797	except ImportError:
				798	return -1
				799	try:
				800	nobody = pwd.getpwnam('nobody')[2]
				801	except KeyError:
				802	nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
				803	return nobody
				804
				805
				806	def executable(path):
				807	"""Test for executable file."""
				808	try:
				809	st = os.stat(path)
				810	except os.error:
				811	return False
				812	return st.st_mode & 0o111 != 0
				813
				814
				815	class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
				816
				817	"""Complete HTTP server with GET, HEAD and POST commands.
				818
				819	GET and HEAD also support running CGI scripts.
				820
				821	The POST command is only implemented for CGI scripts.
				822
				823	"""
				824
				825	# Determine platform specifics
				826	have_fork = hasattr(os, 'fork')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	827
				828	# Make rfile unbuffered -- we need to read one line and then pass
				829	# the rest to a subprocess, so we can't use buffered input.
				830	rbufsize = 0
				831
				832	def do_POST(self):
				833	"""Serve a POST request.
				834
				835	This is only implemented for CGI scripts.
				836
				837	"""
				838
				839	if self.is_cgi():
				840	self.run_cgi()
				841	else:
				842	self.send_error(501, "Can only POST to CGI scripts")
				843
				844	def send_head(self):
				845	"""Version of send_head that support CGI scripts"""
				846	if self.is_cgi():
				847	return self.run_cgi()
				848	else:
				849	return SimpleHTTPRequestHandler.send_head(self)
				850
				851	def is_cgi(self):
				852	"""Test whether self.path corresponds to a CGI script.
				853
				854	Return a tuple (dir, rest) if self.path requires running a
				855	CGI script, None if not. Note that rest begins with a
				856	slash if it is not empty.
				857
				858	The default implementation tests whether the path
				859	begins with one of the strings in the list
				860	self.cgi_directories (and the next character is a '/'
				861	or the end of the string).
				862
				863	"""
				864
				865	path = self.path
				866
				867	for x in self.cgi_directories:
				868	i = len(x)
				869	if path[:i] == x and (not path[i:] or path[i] == '/'):
				870	self.cgi_info = path[:i], path[i+1:]
				871	return True
				872	return False
				873
				874	cgi_directories = ['/cgi-bin', '/htbin']
				875
				876	def is_executable(self, path):
				877	"""Test whether argument path is an executable file."""
				878	return executable(path)
				879
				880	def is_python(self, path):
				881	"""Test whether argument path is a Python script."""
				882	head, tail = os.path.splitext(path)
				883	return tail.lower() in (".py", ".pyw")
				884
				885	def run_cgi(self):
				886	"""Execute a CGI script."""
				887	path = self.path
				888	dir, rest = self.cgi_info
				889
				890	i = path.find('/', len(dir) + 1)
				891	while i >= 0:
				892	nextdir = path[:i]
				893	nextrest = path[i+1:]
				894
				895	scriptdir = self.translate_path(nextdir)
				896	if os.path.isdir(scriptdir):
				897	dir, rest = nextdir, nextrest
				898	i = path.find('/', len(dir) + 1)
				899	else:
				900	break
				901
				902	# find an explicit query string, if present.
				903	i = rest.rfind('?')
				904	if i >= 0:
				905	rest, query = rest[:i], rest[i+1:]
				906	else:
				907	query = ''
				908
				909	# dissect the part after the directory name into a script name &
				910	# a possible additional path, to be stored in PATH_INFO.
				911	i = rest.find('/')
				912	if i >= 0:
				913	script, rest = rest[:i], rest[i:]
				914	else:
				915	script, rest = rest, ''
				916
				917	scriptname = dir + '/' + script
				918	scriptfile = self.translate_path(scriptname)
				919	if not os.path.exists(scriptfile):
				920	self.send_error(404, "No such CGI script (%r)" % scriptname)
				921	return
				922	if not os.path.isfile(scriptfile):
				923	self.send_error(403, "CGI script is not a plain file (%r)" %
				924	scriptname)
				925	return
				926	ispy = self.is_python(scriptname)
				927	if not ispy:
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	928	if not self.is_executable(scriptfile):
				929	self.send_error(403, "CGI script is not executable (%r)" %
				930	scriptname)
				931	return
				932
				933	# Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
				934	# XXX Much of the following could be prepared ahead of time!
				935	env = {}
				936	env['SERVER_SOFTWARE'] = self.version_string()
				937	env['SERVER_NAME'] = self.server.server_name
				938	env['GATEWAY_INTERFACE'] = 'CGI/1.1'
				939	env['SERVER_PROTOCOL'] = self.protocol_version
				940	env['SERVER_PORT'] = str(self.server.server_port)
				941	env['REQUEST_METHOD'] = self.command
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	942	uqrest = urllib.parse.unquote(rest)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	943	env['PATH_INFO'] = uqrest
				944	env['PATH_TRANSLATED'] = self.translate_path(uqrest)
				945	env['SCRIPT_NAME'] = scriptname
				946	if query:
				947	env['QUERY_STRING'] = query
				948	host = self.address_string()
				949	if host != self.client_address[0]:
				950	env['REMOTE_HOST'] = host
				951	env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	952	authorization = self.headers.get("authorization")
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	953	if authorization:
				954	authorization = authorization.split()
				955	if len(authorization) == 2:
				956	import base64, binascii
				957	env['AUTH_TYPE'] = authorization[0]
				958	if authorization[0].lower() == "basic":
				959	try:
				960	authorization = authorization[1].encode('ascii')
				961	authorization = base64.decodestring(authorization).\
				962	decode('ascii')
				963	except (binascii.Error, UnicodeError):
				964	pass
				965	else:
				966	authorization = authorization.split(':')
				967	if len(authorization) == 2:
				968	env['REMOTE_USER'] = authorization[0]
				969	# XXX REMOTE_IDENT
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	970	if self.headers.get('content-type') is None:
				971	env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	972	else:
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	973	env['CONTENT_TYPE'] = self.headers['content-type']
				974	length = self.headers.get('content-length')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	975	if length:
				976	env['CONTENT_LENGTH'] = length
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	977	referer = self.headers.get('referer')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	978	if referer:
				979	env['HTTP_REFERER'] = referer
				980	accept = []
				981	for line in self.headers.getallmatchingheaders('accept'):
				982	if line[:1] in "\t\n\r ":
				983	accept.append(line.strip())
				984	else:
				985	accept = accept + line[7:].split(',')
				986	env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	987	ua = self.headers.get('user-agent')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	988	if ua:
				989	env['HTTP_USER_AGENT'] = ua
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	990	co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	991	if co:
				992	env['HTTP_COOKIE'] = ', '.join(co)
				993	# XXX Other HTTP_* headers
				994	# Since we're setting the env in the parent, provide empty
				995	# values to override previously set values
				996	for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
				997	'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
				998	env.setdefault(k, "")
				999	os.environ.update(env)
				1000
				1001	self.send_response(200, "Script output follows")
				1002
				1003	decoded_query = query.replace('+', ' ')
				1004
				1005	if self.have_fork:
				1006	# Unix -- fork as we should
				1007	args = [script]
				1008	if '=' not in decoded_query:
				1009	args.append(decoded_query)
				1010	nobody = nobody_uid()
				1011	self.wfile.flush() # Always flush before forking
				1012	pid = os.fork()
				1013	if pid != 0:
				1014	# Parent
				1015	pid, sts = os.waitpid(pid, 0)
				1016	# throw away additional data [see bug #427345]
				1017	while select.select([self.rfile], [], [], 0)[0]:
				1018	if not self.rfile.read(1):
				1019	break
				1020	if sts:
				1021	self.log_error("CGI script exit status %#x", sts)
				1022	return
				1023	# Child
				1024	try:
				1025	try:
				1026	os.setuid(nobody)
				1027	except os.error:
				1028	pass
				1029	os.dup2(self.rfile.fileno(), 0)
				1030	os.dup2(self.wfile.fileno(), 1)
				1031	os.execve(scriptfile, args, os.environ)
				1032	except:
				1033	self.server.handle_error(self.request, self.client_address)
				1034	os._exit(127)
				1035
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1036	else:
				1037	# Non-Unix -- use subprocess
				1038	import subprocess
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1039	cmdline = scriptfile
				1040	if self.is_python(scriptfile):
				1041	interp = sys.executable
				1042	if interp.lower().endswith("w.exe"):
				1043	# On Windows, use python.exe, not pythonw.exe
				1044	interp = interp[:-5] + interp[-4:]
				1045	cmdline = "%s -u %s" % (interp, cmdline)
				1046	if '=' not in query and '"' not in query:
				1047	cmdline = '%s "%s"' % (cmdline, query)
				1048	self.log_message("command: %s", cmdline)
				1049	try:
				1050	nbytes = int(length)
				1051	except (TypeError, ValueError):
				1052	nbytes = 0
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1053	p = subprocess.Popen(cmdline,
				1054	stdin=subprocess.PIPE,
				1055	stdout=subprocess.PIPE,
				1056	stderr=subprocess.PIPE,
				1057	)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1058	if self.command.lower() == "post" and nbytes > 0:
				1059	data = self.rfile.read(nbytes)
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1060	else:
				1061	data = None
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1062	# throw away additional data [see bug #427345]
				1063	while select.select([self.rfile._sock], [], [], 0)[0]:
				1064	if not self.rfile._sock.recv(1):
				1065	break
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1066	stdout, stderr = p.communicate(data)
				1067	self.wfile.write(stdout)
				1068	if stderr:
				1069	self.log_error('%s', stderr)
				1070	status = p.returncode
				1071	if status:
				1072	self.log_error("CGI script exit status %#x", status)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1073	else:
				1074	self.log_message("CGI script exited OK")
				1075
				1076
				1077	def test(HandlerClass = BaseHTTPRequestHandler,
				1078	ServerClass = HTTPServer, protocol="HTTP/1.0"):
				1079	"""Test the HTTP request handler class.
				1080
				1081	This runs an HTTP server on port 8000 (or the first command line
				1082	argument).
				1083
				1084	"""
				1085
				1086	if sys.argv[1:]:
				1087	port = int(sys.argv[1])
				1088	else:
				1089	port = 8000
				1090	server_address = ('', port)
				1091
				1092	HandlerClass.protocol_version = protocol
				1093	httpd = ServerClass(server_address, HandlerClass)
				1094
				1095	sa = httpd.socket.getsockname()
				1096	print("Serving HTTP on", sa[0], "port", sa[1], "...")
				1097	httpd.serve_forever()
				1098
				1099
				1100	if __name__ == '__main__':
				1101	test(HandlerClass=BaseHTTPRequestHandler)
				1102	test(HandlerClass=SimpleHTTPRequestHandler)
				1103	test(HandlerClass=CGIHTTPRequestHandler)