Blame - Lib/http/server.py - platform/external/python/cpython3

blob: 098ad250caca9d8d7e6a8d184ea2ad3656f2d414 [file] [log] [blame]

Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1	"""HTTP server classes.
				2
				3	Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
				4	SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
				5	and CGIHTTPRequestHandler for CGI scripts.
				6
				7	It does, however, optionally implement HTTP/1.1 persistent connections,
				8	as of version 0.3.
				9
				10	Notes on CGIHTTPRequestHandler
				11	------------------------------
				12
				13	This class implements GET and POST requests to cgi-bin scripts.
				14
				15	If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	16	subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	17
				18	In all cases, the implementation is intentionally naive -- all
				19	requests are executed synchronously.
				20
				21	SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
				22	-- it may execute arbitrary Python code or external programs.
				23
				24	Note that status code 200 is sent prior to execution of a CGI script, so
				25	scripts cannot send other status codes such as 302 (redirect).
				26
				27	XXX To do:
				28
				29	- log requests even later (to capture byte count)
				30	- log user-agent header and other interesting goodies
				31	- send error log to separate file
				32	"""
				33
				34
				35	# See also:
				36	#
				37	# HTTP Working Group T. Berners-Lee
				38	# INTERNET-DRAFT R. T. Fielding
				39	# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
				40	# Expires September 8, 1995 March 8, 1995
				41	#
				42	# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
				43	#
				44	# and
				45	#
				46	# Network Working Group R. Fielding
				47	# Request for Comments: 2616 et al
				48	# Obsoletes: 2068 June 1999
				49	# Category: Standards Track
				50	#
				51	# URL: http://www.faqs.org/rfcs/rfc2616.html
				52
				53	# Log files
				54	# ---------
				55	#
				56	# Here's a quote from the NCSA httpd docs about log file format.
				57	#
				58	# \| The logfile format is as follows. Each line consists of:
				59	# \|
				60	# \| host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
				61	# \|
				62	# \| host: Either the DNS name or the IP number of the remote client
				63	# \| rfc931: Any information returned by identd for this person,
				64	# \| - otherwise.
				65	# \| authuser: If user sent a userid for authentication, the user name,
				66	# \| - otherwise.
				67	# \| DD: Day
				68	# \| Mon: Month (calendar name)
				69	# \| YYYY: Year
				70	# \| hh: hour (24-hour format, the machine's timezone)
				71	# \| mm: minutes
				72	# \| ss: seconds
				73	# \| request: The first line of the HTTP request as sent by the client.
				74	# \| ddd: the status code returned by the server, - if not available.
				75	# \| bbbb: the total number of bytes sent,
				76	# \| not including the HTTP/1.0 header, - if not available
				77	# \|
				78	# \| You can determine the name of the file accessed through request.
				79	#
				80	# (Actually, the latter is only true if you know the server configuration
				81	# at the time the request was made!)
				82
				83	__version__ = "0.6"
				84
				85	__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
				86
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	87	import cgi
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	88	import email.message
				89	import email.parser
Jeremy Hylton	914ab45	2009-03-27 17:16:06 +0000	[diff] [blame]	90	import http.client
				91	import io
				92	import mimetypes
				93	import os
				94	import posixpath
				95	import select
				96	import shutil
				97	import socket # For gethostbyaddr()
				98	import socketserver
				99	import sys
				100	import time
				101	import urllib.parse
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	102
				103	# Default error message template
				104	DEFAULT_ERROR_MESSAGE = """\
				105	<head>
				106	<title>Error response</title>
				107	</head>
				108	<body>
				109	<h1>Error response</h1>
				110	<p>Error code %(code)d.
				111	<p>Message: %(message)s.
				112	<p>Error code explanation: %(code)s = %(explain)s.
				113	</body>
				114	"""
				115
				116	DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
				117
				118	def _quote_html(html):
				119	return html.replace("&", "&").replace("<", "<").replace(">", ">")
				120
				121	class HTTPServer(socketserver.TCPServer):
				122
				123	allow_reuse_address = 1 # Seems to make sense in testing environment
				124
				125	def server_bind(self):
				126	"""Override server_bind to store the server name."""
				127	socketserver.TCPServer.server_bind(self)
				128	host, port = self.socket.getsockname()[:2]
				129	self.server_name = socket.getfqdn(host)
				130	self.server_port = port
				131
				132
				133	class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
				134
				135	"""HTTP request handler base class.
				136
				137	The following explanation of HTTP serves to guide you through the
				138	code as well as to expose any misunderstandings I may have about
				139	HTTP (so you don't need to read the code to figure out I'm wrong
				140	:-).
				141
				142	HTTP (HyperText Transfer Protocol) is an extensible protocol on
				143	top of a reliable stream transport (e.g. TCP/IP). The protocol
				144	recognizes three parts to a request:
				145
				146	1. One line identifying the request type and path
				147	2. An optional set of RFC-822-style headers
				148	3. An optional data part
				149
				150	The headers and data are separated by a blank line.
				151
				152	The first line of the request has the form
				153
				154	<command> <path> <version>
				155
				156	where <command> is a (case-sensitive) keyword such as GET or POST,
				157	<path> is a string containing path information for the request,
				158	and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
				159	<path> is encoded using the URL encoding scheme (using %xx to signify
				160	the ASCII character with hex code xx).
				161
				162	The specification specifies that lines are separated by CRLF but
				163	for compatibility with the widest range of clients recommends
				164	servers also handle LF. Similarly, whitespace in the request line
				165	is treated sensibly (allowing multiple spaces between components
				166	and allowing trailing whitespace).
				167
				168	Similarly, for output, lines ought to be separated by CRLF pairs
				169	but most clients grok LF characters just fine.
				170
				171	If the first line of the request has the form
				172
				173	<command> <path>
				174
				175	(i.e. <version> is left out) then this is assumed to be an HTTP
				176	0.9 request; this form has no optional headers and data part and
				177	the reply consists of just the data.
				178
				179	The reply form of the HTTP 1.x protocol again has three parts:
				180
				181	1. One line giving the response code
				182	2. An optional set of RFC-822-style headers
				183	3. The data
				184
				185	Again, the headers and data are separated by a blank line.
				186
				187	The response code line has the form
				188
				189	<version> <responsecode> <responsestring>
				190
				191	where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
				192	<responsecode> is a 3-digit response code indicating success or
				193	failure of the request, and <responsestring> is an optional
				194	human-readable string explaining what the response code means.
				195
				196	This server parses the request and the headers, and then calls a
				197	function specific to the request type (<command>). Specifically,
				198	a request SPAM will be handled by a method do_SPAM(). If no
				199	such method exists the server sends an error response to the
				200	client. If it exists, it is called with no arguments:
				201
				202	do_SPAM()
				203
				204	Note that the request name is case sensitive (i.e. SPAM and spam
				205	are different requests).
				206
				207	The various request details are stored in instance variables:
				208
				209	- client_address is the client IP address in the form (host,
				210	port);
				211
				212	- command, path and version are the broken-down request line;
				213
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	214	- headers is an instance of email.message.Message (or a derived
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	215	class) containing the header information;
				216
				217	- rfile is a file object open for reading positioned at the
				218	start of the optional input data part;
				219
				220	- wfile is a file object open for writing.
				221
				222	IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
				223
				224	The first thing to be written must be the response line. Then
				225	follow 0 or more header lines, then a blank line, and then the
				226	actual data (if any). The meaning of the header lines depends on
				227	the command executed by the server; in most cases, when data is
				228	returned, there should be at least one header line of the form
				229
				230	Content-type: <type>/<subtype>
				231
				232	where <type> and <subtype> should be registered MIME types,
				233	e.g. "text/html" or "text/plain".
				234
				235	"""
				236
				237	# The Python system version, truncated to its first component.
				238	sys_version = "Python/" + sys.version.split()[0]
				239
				240	# The server software version. You may want to override this.
				241	# The format is multiple whitespace-separated strings,
				242	# where each string is of the form name[/version].
				243	server_version = "BaseHTTP/" + __version__
				244
				245	error_message_format = DEFAULT_ERROR_MESSAGE
				246	error_content_type = DEFAULT_ERROR_CONTENT_TYPE
				247
				248	# The default request version. This only affects responses up until
				249	# the point where the request line is parsed, so it mainly decides what
				250	# the client gets back when sending a malformed request line.
				251	# Most web servers default to HTTP 0.9, i.e. don't send a status line.
				252	default_request_version = "HTTP/0.9"
				253
				254	def parse_request(self):
				255	"""Parse a request (internal).
				256
				257	The request should be stored in self.raw_requestline; the results
				258	are in self.command, self.path, self.request_version and
				259	self.headers.
				260
				261	Return True for success, False for failure; on failure, an
				262	error is sent back.
				263
				264	"""
				265	self.command = None # set in case of error on the first line
				266	self.request_version = version = self.default_request_version
				267	self.close_connection = 1
				268	requestline = str(self.raw_requestline, 'iso-8859-1')
				269	if requestline[-2:] == '\r\n':
				270	requestline = requestline[:-2]
				271	elif requestline[-1:] == '\n':
				272	requestline = requestline[:-1]
				273	self.requestline = requestline
				274	words = requestline.split()
				275	if len(words) == 3:
				276	[command, path, version] = words
				277	if version[:5] != 'HTTP/':
				278	self.send_error(400, "Bad request version (%r)" % version)
				279	return False
				280	try:
				281	base_version_number = version.split('/', 1)[1]
				282	version_number = base_version_number.split(".")
				283	# RFC 2145 section 3.1 says there can be only one "." and
				284	# - major and minor numbers MUST be treated as
				285	# separate integers;
				286	# - HTTP/2.4 is a lower version than HTTP/2.13, which in
				287	# turn is lower than HTTP/12.3;
				288	# - Leading zeros MUST be ignored by recipients.
				289	if len(version_number) != 2:
				290	raise ValueError
				291	version_number = int(version_number[0]), int(version_number[1])
				292	except (ValueError, IndexError):
				293	self.send_error(400, "Bad request version (%r)" % version)
				294	return False
				295	if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
				296	self.close_connection = 0
				297	if version_number >= (2, 0):
				298	self.send_error(505,
				299	"Invalid HTTP Version (%s)" % base_version_number)
				300	return False
				301	elif len(words) == 2:
				302	[command, path] = words
				303	self.close_connection = 1
				304	if command != 'GET':
				305	self.send_error(400,
				306	"Bad HTTP/0.9 request type (%r)" % command)
				307	return False
				308	elif not words:
				309	return False
				310	else:
				311	self.send_error(400, "Bad request syntax (%r)" % requestline)
				312	return False
				313	self.command, self.path, self.request_version = command, path, version
				314
				315	# Examine the headers and look for a Connection directive.
Jeremy Hylton	98eb6c2	2009-03-27 18:31:36 +0000	[diff] [blame]	316	self.headers = http.client.parse_headers(self.rfile,
				317	_class=self.MessageClass)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	318
				319	conntype = self.headers.get('Connection', "")
				320	if conntype.lower() == 'close':
				321	self.close_connection = 1
				322	elif (conntype.lower() == 'keep-alive' and
				323	self.protocol_version >= "HTTP/1.1"):
				324	self.close_connection = 0
				325	return True
				326
				327	def handle_one_request(self):
				328	"""Handle a single HTTP request.
				329
				330	You normally don't need to override this method; see the class
				331	__doc__ string for information on how to handle specific HTTP
				332	commands such as GET and POST.
				333
				334	"""
Kristján Valur Jónsson	985fc6a	2009-07-01 10:01:31 +0000	[diff] [blame]	335	try:
				336	self.raw_requestline = self.rfile.readline()
				337	if not self.raw_requestline:
				338	self.close_connection = 1
				339	return
				340	if not self.parse_request():
				341	# An error code has been sent, just exit
				342	return
				343	mname = 'do_' + self.command
				344	if not hasattr(self, mname):
				345	self.send_error(501, "Unsupported method (%r)" % self.command)
				346	return
				347	method = getattr(self, mname)
				348	method()
				349	self.wfile.flush() #actually send the response if not already done.
				350	except socket.timeout as e:
				351	#a read or a write timed out. Discard this connection
				352	self.log_error("Request timed out: %r", e)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	353	self.close_connection = 1
				354	return
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	355
				356	def handle(self):
				357	"""Handle multiple requests if necessary."""
				358	self.close_connection = 1
				359
				360	self.handle_one_request()
				361	while not self.close_connection:
				362	self.handle_one_request()
				363
				364	def send_error(self, code, message=None):
				365	"""Send and log an error reply.
				366
				367	Arguments are the error code, and a detailed message.
				368	The detailed message defaults to the short entry matching the
				369	response code.
				370
				371	This sends an error response (so it must be called before any
				372	output has been generated), logs the error, and finally sends
				373	a piece of HTML explaining the error to the user.
				374
				375	"""
				376
				377	try:
				378	shortmsg, longmsg = self.responses[code]
				379	except KeyError:
				380	shortmsg, longmsg = '???', '???'
				381	if message is None:
				382	message = shortmsg
				383	explain = longmsg
				384	self.log_error("code %d, message %s", code, message)
				385	# using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
				386	content = (self.error_message_format %
				387	{'code': code, 'message': _quote_html(message), 'explain': explain})
				388	self.send_response(code, message)
				389	self.send_header("Content-Type", self.error_content_type)
				390	self.send_header('Connection', 'close')
				391	self.end_headers()
				392	if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
				393	self.wfile.write(content.encode('UTF-8', 'replace'))
				394
				395	def send_response(self, code, message=None):
				396	"""Send the response header and log the response code.
				397
				398	Also send two standard headers with the server software
				399	version and the current date.
				400
				401	"""
				402	self.log_request(code)
				403	if message is None:
				404	if code in self.responses:
				405	message = self.responses[code][0]
				406	else:
				407	message = ''
				408	if self.request_version != 'HTTP/0.9':
				409	self.wfile.write(("%s %d %s\r\n" %
				410	(self.protocol_version, code, message)).encode('ASCII', 'strict'))
				411	# print (self.protocol_version, code, message)
				412	self.send_header('Server', self.version_string())
				413	self.send_header('Date', self.date_time_string())
				414
				415	def send_header(self, keyword, value):
				416	"""Send a MIME header."""
				417	if self.request_version != 'HTTP/0.9':
				418	self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict'))
				419
				420	if keyword.lower() == 'connection':
				421	if value.lower() == 'close':
				422	self.close_connection = 1
				423	elif value.lower() == 'keep-alive':
				424	self.close_connection = 0
				425
				426	def end_headers(self):
				427	"""Send the blank line ending the MIME headers."""
				428	if self.request_version != 'HTTP/0.9':
				429	self.wfile.write(b"\r\n")
				430
				431	def log_request(self, code='-', size='-'):
				432	"""Log an accepted request.
				433
				434	This is called by send_response().
				435
				436	"""
				437
				438	self.log_message('"%s" %s %s',
				439	self.requestline, str(code), str(size))
				440
				441	def log_error(self, format, *args):
				442	"""Log an error.
				443
				444	This is called when a request cannot be fulfilled. By
				445	default it passes the message on to log_message().
				446
				447	Arguments are the same as for log_message().
				448
				449	XXX This should go to the separate error log.
				450
				451	"""
				452
				453	self.log_message(format, *args)
				454
				455	def log_message(self, format, *args):
				456	"""Log an arbitrary message.
				457
				458	This is used by all other logging functions. Override
				459	it if you have specific logging wishes.
				460
				461	The first argument, FORMAT, is a format string for the
				462	message to be logged. If the format string contains
				463	any % escapes requiring parameters, they should be
				464	specified as subsequent arguments (it's just like
				465	printf!).
				466
				467	The client host and current date/time are prefixed to
				468	every message.
				469
				470	"""
				471
				472	sys.stderr.write("%s - - [%s] %s\n" %
				473	(self.address_string(),
				474	self.log_date_time_string(),
				475	format%args))
				476
				477	def version_string(self):
				478	"""Return the server software version string."""
				479	return self.server_version + ' ' + self.sys_version
				480
				481	def date_time_string(self, timestamp=None):
				482	"""Return the current date and time formatted for a message header."""
				483	if timestamp is None:
				484	timestamp = time.time()
				485	year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
				486	s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
				487	self.weekdayname[wd],
				488	day, self.monthname[month], year,
				489	hh, mm, ss)
				490	return s
				491
				492	def log_date_time_string(self):
				493	"""Return the current time formatted for logging."""
				494	now = time.time()
				495	year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
				496	s = "%02d/%3s/%04d %02d:%02d:%02d" % (
				497	day, self.monthname[month], year, hh, mm, ss)
				498	return s
				499
				500	weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
				501
				502	monthname = [None,
				503	'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
				504	'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
				505
				506	def address_string(self):
				507	"""Return the client address formatted for logging.
				508
				509	This version looks up the full hostname using gethostbyaddr(),
				510	and tries to find a name that contains at least one dot.
				511
				512	"""
				513
				514	host, port = self.client_address[:2]
				515	return socket.getfqdn(host)
				516
				517	# Essentially static class variables
				518
				519	# The version of the HTTP protocol we support.
				520	# Set this to HTTP/1.1 to enable automatic keepalive
				521	protocol_version = "HTTP/1.0"
				522
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	523	# MessageClass used to parse headers
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	524	MessageClass = http.client.HTTPMessage
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	525
				526	# Table mapping response codes to messages; entries have the
				527	# form {code: (shortmessage, longmessage)}.
				528	# See RFC 2616.
				529	responses = {
				530	100: ('Continue', 'Request received, please continue'),
				531	101: ('Switching Protocols',
				532	'Switching to new protocol; obey Upgrade header'),
				533
				534	200: ('OK', 'Request fulfilled, document follows'),
				535	201: ('Created', 'Document created, URL follows'),
				536	202: ('Accepted',
				537	'Request accepted, processing continues off-line'),
				538	203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
				539	204: ('No Content', 'Request fulfilled, nothing follows'),
				540	205: ('Reset Content', 'Clear input form for further input.'),
				541	206: ('Partial Content', 'Partial content follows.'),
				542
				543	300: ('Multiple Choices',
				544	'Object has several resources -- see URI list'),
				545	301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
				546	302: ('Found', 'Object moved temporarily -- see URI list'),
				547	303: ('See Other', 'Object moved -- see Method and URL list'),
				548	304: ('Not Modified',
				549	'Document has not changed since given time'),
				550	305: ('Use Proxy',
				551	'You must use proxy specified in Location to access this '
				552	'resource.'),
				553	307: ('Temporary Redirect',
				554	'Object moved temporarily -- see URI list'),
				555
				556	400: ('Bad Request',
				557	'Bad request syntax or unsupported method'),
				558	401: ('Unauthorized',
				559	'No permission -- see authorization schemes'),
				560	402: ('Payment Required',
				561	'No payment -- see charging schemes'),
				562	403: ('Forbidden',
				563	'Request forbidden -- authorization will not help'),
				564	404: ('Not Found', 'Nothing matches the given URI'),
				565	405: ('Method Not Allowed',
Senthil Kumaran	7aa2621	2010-02-22 11:00:50 +0000	[diff] [blame]	566	'Specified method is invalid for this resource.'),
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	567	406: ('Not Acceptable', 'URI not available in preferred format.'),
				568	407: ('Proxy Authentication Required', 'You must authenticate with '
				569	'this proxy before proceeding.'),
				570	408: ('Request Timeout', 'Request timed out; try again later.'),
				571	409: ('Conflict', 'Request conflict.'),
				572	410: ('Gone',
				573	'URI no longer exists and has been permanently removed.'),
				574	411: ('Length Required', 'Client must specify Content-Length.'),
				575	412: ('Precondition Failed', 'Precondition in headers is false.'),
				576	413: ('Request Entity Too Large', 'Entity is too large.'),
				577	414: ('Request-URI Too Long', 'URI is too long.'),
				578	415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
				579	416: ('Requested Range Not Satisfiable',
				580	'Cannot satisfy request range.'),
				581	417: ('Expectation Failed',
				582	'Expect condition could not be satisfied.'),
				583
				584	500: ('Internal Server Error', 'Server got itself in trouble'),
				585	501: ('Not Implemented',
				586	'Server does not support this operation'),
				587	502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
				588	503: ('Service Unavailable',
				589	'The server cannot process the request due to a high load'),
				590	504: ('Gateway Timeout',
				591	'The gateway server did not receive a timely response'),
				592	505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
				593	}
				594
				595
				596	class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
				597
				598	"""Simple HTTP request handler with GET and HEAD commands.
				599
				600	This serves files from the current directory and any of its
				601	subdirectories. The MIME type for files is determined by
				602	calling the .guess_type() method.
				603
				604	The GET and HEAD requests are identical except that the HEAD
				605	request omits the actual contents of the file.
				606
				607	"""
				608
				609	server_version = "SimpleHTTP/" + __version__
				610
				611	def do_GET(self):
				612	"""Serve a GET request."""
				613	f = self.send_head()
				614	if f:
				615	self.copyfile(f, self.wfile)
				616	f.close()
				617
				618	def do_HEAD(self):
				619	"""Serve a HEAD request."""
				620	f = self.send_head()
				621	if f:
				622	f.close()
				623
				624	def send_head(self):
				625	"""Common code for GET and HEAD commands.
				626
				627	This sends the response code and MIME headers.
				628
				629	Return value is either a file object (which has to be copied
				630	to the outputfile by the caller unless the command was HEAD,
				631	and must be closed by the caller under all circumstances), or
				632	None, in which case the caller has nothing further to do.
				633
				634	"""
				635	path = self.translate_path(self.path)
				636	f = None
				637	if os.path.isdir(path):
				638	if not self.path.endswith('/'):
				639	# redirect browser - doing basically what apache does
				640	self.send_response(301)
				641	self.send_header("Location", self.path + "/")
				642	self.end_headers()
				643	return None
				644	for index in "index.html", "index.htm":
				645	index = os.path.join(path, index)
				646	if os.path.exists(index):
				647	path = index
				648	break
				649	else:
				650	return self.list_directory(path)
				651	ctype = self.guess_type(path)
				652	try:
				653	f = open(path, 'rb')
				654	except IOError:
				655	self.send_error(404, "File not found")
				656	return None
				657	self.send_response(200)
				658	self.send_header("Content-type", ctype)
				659	fs = os.fstat(f.fileno())
				660	self.send_header("Content-Length", str(fs[6]))
				661	self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
				662	self.end_headers()
				663	return f
				664
				665	def list_directory(self, path):
				666	"""Helper to produce a directory listing (absent index.html).
				667
				668	Return value is either a file object, or None (indicating an
				669	error). In either case, the headers are sent, making the
				670	interface the same as for send_head().
				671
				672	"""
				673	try:
				674	list = os.listdir(path)
				675	except os.error:
				676	self.send_error(404, "No permission to list directory")
				677	return None
				678	list.sort(key=lambda a: a.lower())
				679	r = []
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	680	displaypath = cgi.escape(urllib.parse.unquote(self.path))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	681	r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
				682	r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
				683	r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
				684	r.append("<hr>\n<ul>\n")
				685	for name in list:
				686	fullname = os.path.join(path, name)
				687	displayname = linkname = name
				688	# Append / for directories or @ for symbolic links
				689	if os.path.isdir(fullname):
				690	displayname = name + "/"
				691	linkname = name + "/"
				692	if os.path.islink(fullname):
				693	displayname = name + "@"
				694	# Note: a link to a directory displays with @ and links with /
				695	r.append('<li><a href="%s">%s</a>\n'
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	696	% (urllib.parse.quote(linkname), cgi.escape(displayname)))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	697	r.append("</ul>\n<hr>\n</body>\n</html>\n")
				698	enc = sys.getfilesystemencoding()
				699	encoded = ''.join(r).encode(enc)
				700	f = io.BytesIO()
				701	f.write(encoded)
				702	f.seek(0)
				703	self.send_response(200)
				704	self.send_header("Content-type", "text/html; charset=%s" % enc)
				705	self.send_header("Content-Length", str(len(encoded)))
				706	self.end_headers()
				707	return f
				708
				709	def translate_path(self, path):
				710	"""Translate a /-separated PATH to the local filename syntax.
				711
				712	Components that mean special things to the local file system
				713	(e.g. drive or directory names) are ignored. (XXX They should
				714	probably be diagnosed.)
				715
				716	"""
				717	# abandon query parameters
				718	path = path.split('?',1)[0]
				719	path = path.split('#',1)[0]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	720	path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	721	words = path.split('/')
				722	words = filter(None, words)
				723	path = os.getcwd()
				724	for word in words:
				725	drive, word = os.path.splitdrive(word)
				726	head, word = os.path.split(word)
				727	if word in (os.curdir, os.pardir): continue
				728	path = os.path.join(path, word)
				729	return path
				730
				731	def copyfile(self, source, outputfile):
				732	"""Copy all data between two file objects.
				733
				734	The SOURCE argument is a file object open for reading
				735	(or anything with a read() method) and the DESTINATION
				736	argument is a file object open for writing (or
				737	anything with a write() method).
				738
				739	The only reason for overriding this would be to change
				740	the block size or perhaps to replace newlines by CRLF
				741	-- note however that this the default server uses this
				742	to copy binary data as well.
				743
				744	"""
				745	shutil.copyfileobj(source, outputfile)
				746
				747	def guess_type(self, path):
				748	"""Guess the type of a file.
				749
				750	Argument is a PATH (a filename).
				751
				752	Return value is a string of the form type/subtype,
				753	usable for a MIME Content-type header.
				754
				755	The default implementation looks the file's extension
				756	up in the table self.extensions_map, using application/octet-stream
				757	as a default; however it would be permissible (if
				758	slow) to look inside the data to make a better guess.
				759
				760	"""
				761
				762	base, ext = posixpath.splitext(path)
				763	if ext in self.extensions_map:
				764	return self.extensions_map[ext]
				765	ext = ext.lower()
				766	if ext in self.extensions_map:
				767	return self.extensions_map[ext]
				768	else:
				769	return self.extensions_map['']
				770
				771	if not mimetypes.inited:
				772	mimetypes.init() # try to read system mime.types
				773	extensions_map = mimetypes.types_map.copy()
				774	extensions_map.update({
				775	'': 'application/octet-stream', # Default
				776	'.py': 'text/plain',
				777	'.c': 'text/plain',
				778	'.h': 'text/plain',
				779	})
				780
				781
				782	# Utilities for CGIHTTPRequestHandler
				783
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	784	# TODO(gregory.p.smith): Move this into an appropriate library.
				785	def _url_collapse_path_split(path):
				786	"""
				787	Given a URL path, remove extra '/'s and '.' path elements and collapse
				788	any '..' references.
				789
				790	Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
				791
				792	Returns: A tuple of (head, tail) where tail is everything after the final /
				793	and head is everything before it. Head will always start with a '/' and,
				794	if it contains anything else, never have a trailing '/'.
				795
				796	Raises: IndexError if too many '..' occur within the path.
				797	"""
				798	# Similar to os.path.split(os.path.normpath(path)) but specific to URL
				799	# path semantics rather than local operating system semantics.
				800	path_parts = []
				801	for part in path.split('/'):
				802	if part == '.':
				803	path_parts.append('')
				804	else:
				805	path_parts.append(part)
				806	# Filter out blank non trailing parts before consuming the '..'.
				807	path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
				808	if path_parts:
				809	tail_part = path_parts.pop()
				810	else:
				811	tail_part = ''
				812	head_parts = []
				813	for part in path_parts:
				814	if part == '..':
				815	head_parts.pop()
				816	else:
				817	head_parts.append(part)
				818	if tail_part and tail_part == '..':
				819	head_parts.pop()
				820	tail_part = ''
				821	return ('/' + '/'.join(head_parts), tail_part)
				822
				823
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	824	nobody = None
				825
				826	def nobody_uid():
				827	"""Internal routine to get nobody's uid"""
				828	global nobody
				829	if nobody:
				830	return nobody
				831	try:
				832	import pwd
				833	except ImportError:
				834	return -1
				835	try:
				836	nobody = pwd.getpwnam('nobody')[2]
				837	except KeyError:
				838	nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
				839	return nobody
				840
				841
				842	def executable(path):
				843	"""Test for executable file."""
				844	try:
				845	st = os.stat(path)
				846	except os.error:
				847	return False
				848	return st.st_mode & 0o111 != 0
				849
				850
				851	class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
				852
				853	"""Complete HTTP server with GET, HEAD and POST commands.
				854
				855	GET and HEAD also support running CGI scripts.
				856
				857	The POST command is only implemented for CGI scripts.
				858
				859	"""
				860
				861	# Determine platform specifics
				862	have_fork = hasattr(os, 'fork')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	863
				864	# Make rfile unbuffered -- we need to read one line and then pass
				865	# the rest to a subprocess, so we can't use buffered input.
				866	rbufsize = 0
				867
				868	def do_POST(self):
				869	"""Serve a POST request.
				870
				871	This is only implemented for CGI scripts.
				872
				873	"""
				874
				875	if self.is_cgi():
				876	self.run_cgi()
				877	else:
				878	self.send_error(501, "Can only POST to CGI scripts")
				879
				880	def send_head(self):
				881	"""Version of send_head that support CGI scripts"""
				882	if self.is_cgi():
				883	return self.run_cgi()
				884	else:
				885	return SimpleHTTPRequestHandler.send_head(self)
				886
				887	def is_cgi(self):
				888	"""Test whether self.path corresponds to a CGI script.
				889
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	890	Returns True and updates the cgi_info attribute to the tuple
				891	(dir, rest) if self.path requires running a CGI script.
				892	Returns False otherwise.
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	893
Benjamin Peterson	a7deeee	2009-05-08 20:54:42 +0000	[diff] [blame]	894	If any exception is raised, the caller should assume that
				895	self.path was rejected as invalid and act accordingly.
				896
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	897	The default implementation tests whether the normalized url
				898	path begins with one of the strings in self.cgi_directories
				899	(and the next character is a '/' or the end of the string).
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	900
				901	"""
				902
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	903	splitpath = _url_collapse_path_split(self.path)
				904	if splitpath[0] in self.cgi_directories:
				905	self.cgi_info = splitpath
				906	return True
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	907	return False
				908
				909	cgi_directories = ['/cgi-bin', '/htbin']
				910
				911	def is_executable(self, path):
				912	"""Test whether argument path is an executable file."""
				913	return executable(path)
				914
				915	def is_python(self, path):
				916	"""Test whether argument path is a Python script."""
				917	head, tail = os.path.splitext(path)
				918	return tail.lower() in (".py", ".pyw")
				919
				920	def run_cgi(self):
				921	"""Execute a CGI script."""
				922	path = self.path
				923	dir, rest = self.cgi_info
				924
				925	i = path.find('/', len(dir) + 1)
				926	while i >= 0:
				927	nextdir = path[:i]
				928	nextrest = path[i+1:]
				929
				930	scriptdir = self.translate_path(nextdir)
				931	if os.path.isdir(scriptdir):
				932	dir, rest = nextdir, nextrest
				933	i = path.find('/', len(dir) + 1)
				934	else:
				935	break
				936
				937	# find an explicit query string, if present.
				938	i = rest.rfind('?')
				939	if i >= 0:
				940	rest, query = rest[:i], rest[i+1:]
				941	else:
				942	query = ''
				943
				944	# dissect the part after the directory name into a script name &
				945	# a possible additional path, to be stored in PATH_INFO.
				946	i = rest.find('/')
				947	if i >= 0:
				948	script, rest = rest[:i], rest[i:]
				949	else:
				950	script, rest = rest, ''
				951
				952	scriptname = dir + '/' + script
				953	scriptfile = self.translate_path(scriptname)
				954	if not os.path.exists(scriptfile):
				955	self.send_error(404, "No such CGI script (%r)" % scriptname)
				956	return
				957	if not os.path.isfile(scriptfile):
				958	self.send_error(403, "CGI script is not a plain file (%r)" %
				959	scriptname)
				960	return
				961	ispy = self.is_python(scriptname)
				962	if not ispy:
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	963	if not self.is_executable(scriptfile):
				964	self.send_error(403, "CGI script is not executable (%r)" %
				965	scriptname)
				966	return
				967
				968	# Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
				969	# XXX Much of the following could be prepared ahead of time!
				970	env = {}
				971	env['SERVER_SOFTWARE'] = self.version_string()
				972	env['SERVER_NAME'] = self.server.server_name
				973	env['GATEWAY_INTERFACE'] = 'CGI/1.1'
				974	env['SERVER_PROTOCOL'] = self.protocol_version
				975	env['SERVER_PORT'] = str(self.server.server_port)
				976	env['REQUEST_METHOD'] = self.command
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	977	uqrest = urllib.parse.unquote(rest)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	978	env['PATH_INFO'] = uqrest
				979	env['PATH_TRANSLATED'] = self.translate_path(uqrest)
				980	env['SCRIPT_NAME'] = scriptname
				981	if query:
				982	env['QUERY_STRING'] = query
				983	host = self.address_string()
				984	if host != self.client_address[0]:
				985	env['REMOTE_HOST'] = host
				986	env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	987	authorization = self.headers.get("authorization")
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	988	if authorization:
				989	authorization = authorization.split()
				990	if len(authorization) == 2:
				991	import base64, binascii
				992	env['AUTH_TYPE'] = authorization[0]
				993	if authorization[0].lower() == "basic":
				994	try:
				995	authorization = authorization[1].encode('ascii')
Georg Brandl	706824f	2009-06-04 09:42:55 +0000	[diff] [blame]	996	authorization = base64.decodebytes(authorization).\
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	997	decode('ascii')
				998	except (binascii.Error, UnicodeError):
				999	pass
				1000	else:
				1001	authorization = authorization.split(':')
				1002	if len(authorization) == 2:
				1003	env['REMOTE_USER'] = authorization[0]
				1004	# XXX REMOTE_IDENT
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1005	if self.headers.get('content-type') is None:
				1006	env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1007	else:
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1008	env['CONTENT_TYPE'] = self.headers['content-type']
				1009	length = self.headers.get('content-length')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1010	if length:
				1011	env['CONTENT_LENGTH'] = length
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1012	referer = self.headers.get('referer')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1013	if referer:
				1014	env['HTTP_REFERER'] = referer
				1015	accept = []
				1016	for line in self.headers.getallmatchingheaders('accept'):
				1017	if line[:1] in "\t\n\r ":
				1018	accept.append(line.strip())
				1019	else:
				1020	accept = accept + line[7:].split(',')
				1021	env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1022	ua = self.headers.get('user-agent')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1023	if ua:
				1024	env['HTTP_USER_AGENT'] = ua
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1025	co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl	62e2ca2	2010-07-31 21:54:24 +0000	[diff] [blame]	1026	cookie_str = ', '.join(co)
				1027	if cookie_str:
				1028	env['HTTP_COOKIE'] = cookie_str
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1029	# XXX Other HTTP_* headers
				1030	# Since we're setting the env in the parent, provide empty
				1031	# values to override previously set values
				1032	for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
				1033	'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
				1034	env.setdefault(k, "")
				1035	os.environ.update(env)
				1036
				1037	self.send_response(200, "Script output follows")
				1038
				1039	decoded_query = query.replace('+', ' ')
				1040
				1041	if self.have_fork:
				1042	# Unix -- fork as we should
				1043	args = [script]
				1044	if '=' not in decoded_query:
				1045	args.append(decoded_query)
				1046	nobody = nobody_uid()
				1047	self.wfile.flush() # Always flush before forking
				1048	pid = os.fork()
				1049	if pid != 0:
				1050	# Parent
				1051	pid, sts = os.waitpid(pid, 0)
				1052	# throw away additional data [see bug #427345]
				1053	while select.select([self.rfile], [], [], 0)[0]:
				1054	if not self.rfile.read(1):
				1055	break
				1056	if sts:
				1057	self.log_error("CGI script exit status %#x", sts)
				1058	return
				1059	# Child
				1060	try:
				1061	try:
				1062	os.setuid(nobody)
				1063	except os.error:
				1064	pass
				1065	os.dup2(self.rfile.fileno(), 0)
				1066	os.dup2(self.wfile.fileno(), 1)
				1067	os.execve(scriptfile, args, os.environ)
				1068	except:
				1069	self.server.handle_error(self.request, self.client_address)
				1070	os._exit(127)
				1071
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1072	else:
				1073	# Non-Unix -- use subprocess
				1074	import subprocess
Senthil Kumaran	e29cd16	2009-11-11 04:17:53 +0000	[diff] [blame]	1075	cmdline = [scriptfile]
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1076	if self.is_python(scriptfile):
				1077	interp = sys.executable
				1078	if interp.lower().endswith("w.exe"):
				1079	# On Windows, use python.exe, not pythonw.exe
				1080	interp = interp[:-5] + interp[-4:]
Senthil Kumaran	e29cd16	2009-11-11 04:17:53 +0000	[diff] [blame]	1081	cmdline = [interp, '-u'] + cmdline
				1082	if '=' not in query:
				1083	cmdline.append(query)
				1084	self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1085	try:
				1086	nbytes = int(length)
				1087	except (TypeError, ValueError):
				1088	nbytes = 0
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1089	p = subprocess.Popen(cmdline,
				1090	stdin=subprocess.PIPE,
				1091	stdout=subprocess.PIPE,
Senthil Kumaran	e29cd16	2009-11-11 04:17:53 +0000	[diff] [blame]	1092	stderr=subprocess.PIPE
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1093	)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1094	if self.command.lower() == "post" and nbytes > 0:
				1095	data = self.rfile.read(nbytes)
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1096	else:
				1097	data = None
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1098	# throw away additional data [see bug #427345]
				1099	while select.select([self.rfile._sock], [], [], 0)[0]:
				1100	if not self.rfile._sock.recv(1):
				1101	break
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1102	stdout, stderr = p.communicate(data)
				1103	self.wfile.write(stdout)
				1104	if stderr:
				1105	self.log_error('%s', stderr)
				1106	status = p.returncode
				1107	if status:
				1108	self.log_error("CGI script exit status %#x", status)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1109	else:
				1110	self.log_message("CGI script exited OK")
				1111
				1112
				1113	def test(HandlerClass = BaseHTTPRequestHandler,
				1114	ServerClass = HTTPServer, protocol="HTTP/1.0"):
				1115	"""Test the HTTP request handler class.
				1116
				1117	This runs an HTTP server on port 8000 (or the first command line
				1118	argument).
				1119
				1120	"""
				1121
				1122	if sys.argv[1:]:
				1123	port = int(sys.argv[1])
				1124	else:
				1125	port = 8000
				1126	server_address = ('', port)
				1127
				1128	HandlerClass.protocol_version = protocol
				1129	httpd = ServerClass(server_address, HandlerClass)
				1130
				1131	sa = httpd.socket.getsockname()
				1132	print("Serving HTTP on", sa[0], "port", sa[1], "...")
Alexandre Vassalotti	b5292a2	2009-04-03 07:16:55 +0000	[diff] [blame]	1133	try:
				1134	httpd.serve_forever()
				1135	except KeyboardInterrupt:
				1136	print("\nKeyboard interrupt received, exiting.")
				1137	httpd.server_close()
				1138	sys.exit(0)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1139
				1140	if __name__ == '__main__':
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1141	test(HandlerClass=SimpleHTTPRequestHandler)