Blame - Lib/http/server.py - platform/external/python/cpython3

blob: 5ac6c0d204e3cf8429ec514b2d1328f0f757185a [file] [log] [blame]

Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1	"""HTTP server classes.
				2
				3	Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
				4	SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
				5	and CGIHTTPRequestHandler for CGI scripts.
				6
				7	It does, however, optionally implement HTTP/1.1 persistent connections,
				8	as of version 0.3.
				9
				10	Notes on CGIHTTPRequestHandler
				11	------------------------------
				12
				13	This class implements GET and POST requests to cgi-bin scripts.
				14
				15	If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	16	subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	17
				18	In all cases, the implementation is intentionally naive -- all
				19	requests are executed synchronously.
				20
				21	SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
				22	-- it may execute arbitrary Python code or external programs.
				23
				24	Note that status code 200 is sent prior to execution of a CGI script, so
				25	scripts cannot send other status codes such as 302 (redirect).
				26
				27	XXX To do:
				28
				29	- log requests even later (to capture byte count)
				30	- log user-agent header and other interesting goodies
				31	- send error log to separate file
				32	"""
				33
				34
				35	# See also:
				36	#
				37	# HTTP Working Group T. Berners-Lee
				38	# INTERNET-DRAFT R. T. Fielding
				39	# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
				40	# Expires September 8, 1995 March 8, 1995
				41	#
				42	# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
				43	#
				44	# and
				45	#
				46	# Network Working Group R. Fielding
				47	# Request for Comments: 2616 et al
				48	# Obsoletes: 2068 June 1999
				49	# Category: Standards Track
				50	#
				51	# URL: http://www.faqs.org/rfcs/rfc2616.html
				52
				53	# Log files
				54	# ---------
				55	#
				56	# Here's a quote from the NCSA httpd docs about log file format.
				57	#
				58	# \| The logfile format is as follows. Each line consists of:
				59	# \|
				60	# \| host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
				61	# \|
				62	# \| host: Either the DNS name or the IP number of the remote client
				63	# \| rfc931: Any information returned by identd for this person,
				64	# \| - otherwise.
				65	# \| authuser: If user sent a userid for authentication, the user name,
				66	# \| - otherwise.
				67	# \| DD: Day
				68	# \| Mon: Month (calendar name)
				69	# \| YYYY: Year
				70	# \| hh: hour (24-hour format, the machine's timezone)
				71	# \| mm: minutes
				72	# \| ss: seconds
				73	# \| request: The first line of the HTTP request as sent by the client.
				74	# \| ddd: the status code returned by the server, - if not available.
				75	# \| bbbb: the total number of bytes sent,
				76	# \| not including the HTTP/1.0 header, - if not available
				77	# \|
				78	# \| You can determine the name of the file accessed through request.
				79	#
				80	# (Actually, the latter is only true if you know the server configuration
				81	# at the time the request was made!)
				82
				83	__version__ = "0.6"
				84
				85	__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
				86
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	87	import cgi
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	88	import email.message
				89	import email.parser
Jeremy Hylton	914ab45	2009-03-27 17:16:06 +0000	[diff] [blame]	90	import http.client
				91	import io
				92	import mimetypes
				93	import os
				94	import posixpath
				95	import select
				96	import shutil
				97	import socket # For gethostbyaddr()
				98	import socketserver
				99	import sys
				100	import time
				101	import urllib.parse
Senthil Kumaran	5e8826c	2010-10-03 18:04:52 +0000	[diff] [blame]	102	import copy
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	103
				104	# Default error message template
				105	DEFAULT_ERROR_MESSAGE = """\
				106	<head>
				107	<title>Error response</title>
				108	</head>
				109	<body>
				110	<h1>Error response</h1>
				111	<p>Error code %(code)d.
				112	<p>Message: %(message)s.
				113	<p>Error code explanation: %(code)s = %(explain)s.
				114	</body>
				115	"""
				116
				117	DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
				118
				119	def _quote_html(html):
				120	return html.replace("&", "&").replace("<", "<").replace(">", ">")
				121
				122	class HTTPServer(socketserver.TCPServer):
				123
				124	allow_reuse_address = 1 # Seems to make sense in testing environment
				125
				126	def server_bind(self):
				127	"""Override server_bind to store the server name."""
				128	socketserver.TCPServer.server_bind(self)
				129	host, port = self.socket.getsockname()[:2]
				130	self.server_name = socket.getfqdn(host)
				131	self.server_port = port
				132
				133
				134	class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
				135
				136	"""HTTP request handler base class.
				137
				138	The following explanation of HTTP serves to guide you through the
				139	code as well as to expose any misunderstandings I may have about
				140	HTTP (so you don't need to read the code to figure out I'm wrong
				141	:-).
				142
				143	HTTP (HyperText Transfer Protocol) is an extensible protocol on
				144	top of a reliable stream transport (e.g. TCP/IP). The protocol
				145	recognizes three parts to a request:
				146
				147	1. One line identifying the request type and path
				148	2. An optional set of RFC-822-style headers
				149	3. An optional data part
				150
				151	The headers and data are separated by a blank line.
				152
				153	The first line of the request has the form
				154
				155	<command> <path> <version>
				156
				157	where <command> is a (case-sensitive) keyword such as GET or POST,
				158	<path> is a string containing path information for the request,
				159	and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
				160	<path> is encoded using the URL encoding scheme (using %xx to signify
				161	the ASCII character with hex code xx).
				162
				163	The specification specifies that lines are separated by CRLF but
				164	for compatibility with the widest range of clients recommends
				165	servers also handle LF. Similarly, whitespace in the request line
				166	is treated sensibly (allowing multiple spaces between components
				167	and allowing trailing whitespace).
				168
				169	Similarly, for output, lines ought to be separated by CRLF pairs
				170	but most clients grok LF characters just fine.
				171
				172	If the first line of the request has the form
				173
				174	<command> <path>
				175
				176	(i.e. <version> is left out) then this is assumed to be an HTTP
				177	0.9 request; this form has no optional headers and data part and
				178	the reply consists of just the data.
				179
				180	The reply form of the HTTP 1.x protocol again has three parts:
				181
				182	1. One line giving the response code
				183	2. An optional set of RFC-822-style headers
				184	3. The data
				185
				186	Again, the headers and data are separated by a blank line.
				187
				188	The response code line has the form
				189
				190	<version> <responsecode> <responsestring>
				191
				192	where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
				193	<responsecode> is a 3-digit response code indicating success or
				194	failure of the request, and <responsestring> is an optional
				195	human-readable string explaining what the response code means.
				196
				197	This server parses the request and the headers, and then calls a
				198	function specific to the request type (<command>). Specifically,
				199	a request SPAM will be handled by a method do_SPAM(). If no
				200	such method exists the server sends an error response to the
				201	client. If it exists, it is called with no arguments:
				202
				203	do_SPAM()
				204
				205	Note that the request name is case sensitive (i.e. SPAM and spam
				206	are different requests).
				207
				208	The various request details are stored in instance variables:
				209
				210	- client_address is the client IP address in the form (host,
				211	port);
				212
				213	- command, path and version are the broken-down request line;
				214
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	215	- headers is an instance of email.message.Message (or a derived
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	216	class) containing the header information;
				217
				218	- rfile is a file object open for reading positioned at the
				219	start of the optional input data part;
				220
				221	- wfile is a file object open for writing.
				222
				223	IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
				224
				225	The first thing to be written must be the response line. Then
				226	follow 0 or more header lines, then a blank line, and then the
				227	actual data (if any). The meaning of the header lines depends on
				228	the command executed by the server; in most cases, when data is
				229	returned, there should be at least one header line of the form
				230
				231	Content-type: <type>/<subtype>
				232
				233	where <type> and <subtype> should be registered MIME types,
				234	e.g. "text/html" or "text/plain".
				235
				236	"""
				237
				238	# The Python system version, truncated to its first component.
				239	sys_version = "Python/" + sys.version.split()[0]
				240
				241	# The server software version. You may want to override this.
				242	# The format is multiple whitespace-separated strings,
				243	# where each string is of the form name[/version].
				244	server_version = "BaseHTTP/" + __version__
				245
				246	error_message_format = DEFAULT_ERROR_MESSAGE
				247	error_content_type = DEFAULT_ERROR_CONTENT_TYPE
				248
				249	# The default request version. This only affects responses up until
				250	# the point where the request line is parsed, so it mainly decides what
				251	# the client gets back when sending a malformed request line.
				252	# Most web servers default to HTTP 0.9, i.e. don't send a status line.
				253	default_request_version = "HTTP/0.9"
				254
				255	def parse_request(self):
				256	"""Parse a request (internal).
				257
				258	The request should be stored in self.raw_requestline; the results
				259	are in self.command, self.path, self.request_version and
				260	self.headers.
				261
				262	Return True for success, False for failure; on failure, an
				263	error is sent back.
				264
				265	"""
				266	self.command = None # set in case of error on the first line
				267	self.request_version = version = self.default_request_version
				268	self.close_connection = 1
				269	requestline = str(self.raw_requestline, 'iso-8859-1')
				270	if requestline[-2:] == '\r\n':
				271	requestline = requestline[:-2]
				272	elif requestline[-1:] == '\n':
				273	requestline = requestline[:-1]
				274	self.requestline = requestline
				275	words = requestline.split()
				276	if len(words) == 3:
				277	[command, path, version] = words
				278	if version[:5] != 'HTTP/':
				279	self.send_error(400, "Bad request version (%r)" % version)
				280	return False
				281	try:
				282	base_version_number = version.split('/', 1)[1]
				283	version_number = base_version_number.split(".")
				284	# RFC 2145 section 3.1 says there can be only one "." and
				285	# - major and minor numbers MUST be treated as
				286	# separate integers;
				287	# - HTTP/2.4 is a lower version than HTTP/2.13, which in
				288	# turn is lower than HTTP/12.3;
				289	# - Leading zeros MUST be ignored by recipients.
				290	if len(version_number) != 2:
				291	raise ValueError
				292	version_number = int(version_number[0]), int(version_number[1])
				293	except (ValueError, IndexError):
				294	self.send_error(400, "Bad request version (%r)" % version)
				295	return False
				296	if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
				297	self.close_connection = 0
				298	if version_number >= (2, 0):
				299	self.send_error(505,
				300	"Invalid HTTP Version (%s)" % base_version_number)
				301	return False
				302	elif len(words) == 2:
				303	[command, path] = words
				304	self.close_connection = 1
				305	if command != 'GET':
				306	self.send_error(400,
				307	"Bad HTTP/0.9 request type (%r)" % command)
				308	return False
				309	elif not words:
				310	return False
				311	else:
				312	self.send_error(400, "Bad request syntax (%r)" % requestline)
				313	return False
				314	self.command, self.path, self.request_version = command, path, version
				315
				316	# Examine the headers and look for a Connection directive.
Jeremy Hylton	98eb6c2	2009-03-27 18:31:36 +0000	[diff] [blame]	317	self.headers = http.client.parse_headers(self.rfile,
				318	_class=self.MessageClass)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	319
				320	conntype = self.headers.get('Connection', "")
				321	if conntype.lower() == 'close':
				322	self.close_connection = 1
				323	elif (conntype.lower() == 'keep-alive' and
				324	self.protocol_version >= "HTTP/1.1"):
				325	self.close_connection = 0
				326	return True
				327
				328	def handle_one_request(self):
				329	"""Handle a single HTTP request.
				330
				331	You normally don't need to override this method; see the class
				332	__doc__ string for information on how to handle specific HTTP
				333	commands such as GET and POST.
				334
				335	"""
Antoine Pitrou	3022ce1	2010-12-16 17:03:16 +0000	[diff] [blame]	336	self.raw_requestline = self.rfile.readline(65537)
				337	if len(self.raw_requestline) > 65536:
				338	self.requestline = ''
				339	self.request_version = ''
				340	self.command = ''
				341	self.send_error(414)
				342	return
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	343	if not self.raw_requestline:
				344	self.close_connection = 1
				345	return
				346	if not self.parse_request(): # An error code has been sent, just exit
				347	return
				348	mname = 'do_' + self.command
				349	if not hasattr(self, mname):
				350	self.send_error(501, "Unsupported method (%r)" % self.command)
				351	return
				352	method = getattr(self, mname)
				353	method()
				354
				355	def handle(self):
				356	"""Handle multiple requests if necessary."""
				357	self.close_connection = 1
				358
				359	self.handle_one_request()
				360	while not self.close_connection:
				361	self.handle_one_request()
				362
				363	def send_error(self, code, message=None):
				364	"""Send and log an error reply.
				365
				366	Arguments are the error code, and a detailed message.
				367	The detailed message defaults to the short entry matching the
				368	response code.
				369
				370	This sends an error response (so it must be called before any
				371	output has been generated), logs the error, and finally sends
				372	a piece of HTML explaining the error to the user.
				373
				374	"""
				375
				376	try:
				377	shortmsg, longmsg = self.responses[code]
				378	except KeyError:
				379	shortmsg, longmsg = '???', '???'
				380	if message is None:
				381	message = shortmsg
				382	explain = longmsg
				383	self.log_error("code %d, message %s", code, message)
				384	# using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
				385	content = (self.error_message_format %
				386	{'code': code, 'message': _quote_html(message), 'explain': explain})
				387	self.send_response(code, message)
				388	self.send_header("Content-Type", self.error_content_type)
				389	self.send_header('Connection', 'close')
				390	self.end_headers()
				391	if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
				392	self.wfile.write(content.encode('UTF-8', 'replace'))
				393
				394	def send_response(self, code, message=None):
				395	"""Send the response header and log the response code.
				396
				397	Also send two standard headers with the server software
				398	version and the current date.
				399
				400	"""
				401	self.log_request(code)
				402	if message is None:
				403	if code in self.responses:
				404	message = self.responses[code][0]
				405	else:
				406	message = ''
				407	if self.request_version != 'HTTP/0.9':
				408	self.wfile.write(("%s %d %s\r\n" %
				409	(self.protocol_version, code, message)).encode('ASCII', 'strict'))
				410	# print (self.protocol_version, code, message)
				411	self.send_header('Server', self.version_string())
				412	self.send_header('Date', self.date_time_string())
				413
				414	def send_header(self, keyword, value):
				415	"""Send a MIME header."""
				416	if self.request_version != 'HTTP/0.9':
				417	self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict'))
				418
				419	if keyword.lower() == 'connection':
				420	if value.lower() == 'close':
				421	self.close_connection = 1
				422	elif value.lower() == 'keep-alive':
				423	self.close_connection = 0
				424
				425	def end_headers(self):
				426	"""Send the blank line ending the MIME headers."""
				427	if self.request_version != 'HTTP/0.9':
				428	self.wfile.write(b"\r\n")
				429
				430	def log_request(self, code='-', size='-'):
				431	"""Log an accepted request.
				432
				433	This is called by send_response().
				434
				435	"""
				436
				437	self.log_message('"%s" %s %s',
				438	self.requestline, str(code), str(size))
				439
				440	def log_error(self, format, *args):
				441	"""Log an error.
				442
				443	This is called when a request cannot be fulfilled. By
				444	default it passes the message on to log_message().
				445
				446	Arguments are the same as for log_message().
				447
				448	XXX This should go to the separate error log.
				449
				450	"""
				451
				452	self.log_message(format, *args)
				453
				454	def log_message(self, format, *args):
				455	"""Log an arbitrary message.
				456
				457	This is used by all other logging functions. Override
				458	it if you have specific logging wishes.
				459
				460	The first argument, FORMAT, is a format string for the
				461	message to be logged. If the format string contains
				462	any % escapes requiring parameters, they should be
				463	specified as subsequent arguments (it's just like
				464	printf!).
				465
				466	The client host and current date/time are prefixed to
				467	every message.
				468
				469	"""
				470
				471	sys.stderr.write("%s - - [%s] %s\n" %
				472	(self.address_string(),
				473	self.log_date_time_string(),
				474	format%args))
				475
				476	def version_string(self):
				477	"""Return the server software version string."""
				478	return self.server_version + ' ' + self.sys_version
				479
				480	def date_time_string(self, timestamp=None):
				481	"""Return the current date and time formatted for a message header."""
				482	if timestamp is None:
				483	timestamp = time.time()
				484	year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
				485	s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
				486	self.weekdayname[wd],
				487	day, self.monthname[month], year,
				488	hh, mm, ss)
				489	return s
				490
				491	def log_date_time_string(self):
				492	"""Return the current time formatted for logging."""
				493	now = time.time()
				494	year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
				495	s = "%02d/%3s/%04d %02d:%02d:%02d" % (
				496	day, self.monthname[month], year, hh, mm, ss)
				497	return s
				498
				499	weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
				500
				501	monthname = [None,
				502	'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
				503	'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
				504
				505	def address_string(self):
				506	"""Return the client address formatted for logging.
				507
				508	This version looks up the full hostname using gethostbyaddr(),
				509	and tries to find a name that contains at least one dot.
				510
				511	"""
				512
				513	host, port = self.client_address[:2]
				514	return socket.getfqdn(host)
				515
				516	# Essentially static class variables
				517
				518	# The version of the HTTP protocol we support.
				519	# Set this to HTTP/1.1 to enable automatic keepalive
				520	protocol_version = "HTTP/1.0"
				521
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	522	# MessageClass used to parse headers
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	523	MessageClass = http.client.HTTPMessage
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	524
				525	# Table mapping response codes to messages; entries have the
				526	# form {code: (shortmessage, longmessage)}.
				527	# See RFC 2616.
				528	responses = {
				529	100: ('Continue', 'Request received, please continue'),
				530	101: ('Switching Protocols',
				531	'Switching to new protocol; obey Upgrade header'),
				532
				533	200: ('OK', 'Request fulfilled, document follows'),
				534	201: ('Created', 'Document created, URL follows'),
				535	202: ('Accepted',
				536	'Request accepted, processing continues off-line'),
				537	203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
				538	204: ('No Content', 'Request fulfilled, nothing follows'),
				539	205: ('Reset Content', 'Clear input form for further input.'),
				540	206: ('Partial Content', 'Partial content follows.'),
				541
				542	300: ('Multiple Choices',
				543	'Object has several resources -- see URI list'),
				544	301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
				545	302: ('Found', 'Object moved temporarily -- see URI list'),
				546	303: ('See Other', 'Object moved -- see Method and URL list'),
				547	304: ('Not Modified',
				548	'Document has not changed since given time'),
				549	305: ('Use Proxy',
				550	'You must use proxy specified in Location to access this '
				551	'resource.'),
				552	307: ('Temporary Redirect',
				553	'Object moved temporarily -- see URI list'),
				554
				555	400: ('Bad Request',
				556	'Bad request syntax or unsupported method'),
				557	401: ('Unauthorized',
				558	'No permission -- see authorization schemes'),
				559	402: ('Payment Required',
				560	'No payment -- see charging schemes'),
				561	403: ('Forbidden',
				562	'Request forbidden -- authorization will not help'),
				563	404: ('Not Found', 'Nothing matches the given URI'),
				564	405: ('Method Not Allowed',
Senthil Kumaran	613c61c	2010-02-22 11:02:53 +0000	[diff] [blame]	565	'Specified method is invalid for this resource.'),
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	566	406: ('Not Acceptable', 'URI not available in preferred format.'),
				567	407: ('Proxy Authentication Required', 'You must authenticate with '
				568	'this proxy before proceeding.'),
				569	408: ('Request Timeout', 'Request timed out; try again later.'),
				570	409: ('Conflict', 'Request conflict.'),
				571	410: ('Gone',
				572	'URI no longer exists and has been permanently removed.'),
				573	411: ('Length Required', 'Client must specify Content-Length.'),
				574	412: ('Precondition Failed', 'Precondition in headers is false.'),
				575	413: ('Request Entity Too Large', 'Entity is too large.'),
				576	414: ('Request-URI Too Long', 'URI is too long.'),
				577	415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
				578	416: ('Requested Range Not Satisfiable',
				579	'Cannot satisfy request range.'),
				580	417: ('Expectation Failed',
				581	'Expect condition could not be satisfied.'),
				582
				583	500: ('Internal Server Error', 'Server got itself in trouble'),
				584	501: ('Not Implemented',
				585	'Server does not support this operation'),
				586	502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
				587	503: ('Service Unavailable',
				588	'The server cannot process the request due to a high load'),
				589	504: ('Gateway Timeout',
				590	'The gateway server did not receive a timely response'),
				591	505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
				592	}
				593
				594
				595	class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
				596
				597	"""Simple HTTP request handler with GET and HEAD commands.
				598
				599	This serves files from the current directory and any of its
				600	subdirectories. The MIME type for files is determined by
				601	calling the .guess_type() method.
				602
				603	The GET and HEAD requests are identical except that the HEAD
				604	request omits the actual contents of the file.
				605
				606	"""
				607
				608	server_version = "SimpleHTTP/" + __version__
				609
				610	def do_GET(self):
				611	"""Serve a GET request."""
				612	f = self.send_head()
				613	if f:
				614	self.copyfile(f, self.wfile)
				615	f.close()
				616
				617	def do_HEAD(self):
				618	"""Serve a HEAD request."""
				619	f = self.send_head()
				620	if f:
				621	f.close()
				622
				623	def send_head(self):
				624	"""Common code for GET and HEAD commands.
				625
				626	This sends the response code and MIME headers.
				627
				628	Return value is either a file object (which has to be copied
				629	to the outputfile by the caller unless the command was HEAD,
				630	and must be closed by the caller under all circumstances), or
				631	None, in which case the caller has nothing further to do.
				632
				633	"""
				634	path = self.translate_path(self.path)
				635	f = None
				636	if os.path.isdir(path):
				637	if not self.path.endswith('/'):
				638	# redirect browser - doing basically what apache does
				639	self.send_response(301)
				640	self.send_header("Location", self.path + "/")
				641	self.end_headers()
				642	return None
				643	for index in "index.html", "index.htm":
				644	index = os.path.join(path, index)
				645	if os.path.exists(index):
				646	path = index
				647	break
				648	else:
				649	return self.list_directory(path)
				650	ctype = self.guess_type(path)
				651	try:
				652	f = open(path, 'rb')
				653	except IOError:
				654	self.send_error(404, "File not found")
				655	return None
				656	self.send_response(200)
				657	self.send_header("Content-type", ctype)
				658	fs = os.fstat(f.fileno())
				659	self.send_header("Content-Length", str(fs[6]))
				660	self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
				661	self.end_headers()
				662	return f
				663
				664	def list_directory(self, path):
				665	"""Helper to produce a directory listing (absent index.html).
				666
				667	Return value is either a file object, or None (indicating an
				668	error). In either case, the headers are sent, making the
				669	interface the same as for send_head().
				670
				671	"""
				672	try:
				673	list = os.listdir(path)
				674	except os.error:
				675	self.send_error(404, "No permission to list directory")
				676	return None
				677	list.sort(key=lambda a: a.lower())
				678	r = []
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	679	displaypath = cgi.escape(urllib.parse.unquote(self.path))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	680	r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
				681	r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
				682	r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
				683	r.append("<hr>\n<ul>\n")
				684	for name in list:
				685	fullname = os.path.join(path, name)
				686	displayname = linkname = name
				687	# Append / for directories or @ for symbolic links
				688	if os.path.isdir(fullname):
				689	displayname = name + "/"
				690	linkname = name + "/"
				691	if os.path.islink(fullname):
				692	displayname = name + "@"
				693	# Note: a link to a directory displays with @ and links with /
				694	r.append('<li><a href="%s">%s</a>\n'
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	695	% (urllib.parse.quote(linkname), cgi.escape(displayname)))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	696	r.append("</ul>\n<hr>\n</body>\n</html>\n")
				697	enc = sys.getfilesystemencoding()
				698	encoded = ''.join(r).encode(enc)
				699	f = io.BytesIO()
				700	f.write(encoded)
				701	f.seek(0)
				702	self.send_response(200)
				703	self.send_header("Content-type", "text/html; charset=%s" % enc)
				704	self.send_header("Content-Length", str(len(encoded)))
				705	self.end_headers()
				706	return f
				707
				708	def translate_path(self, path):
				709	"""Translate a /-separated PATH to the local filename syntax.
				710
				711	Components that mean special things to the local file system
				712	(e.g. drive or directory names) are ignored. (XXX They should
				713	probably be diagnosed.)
				714
				715	"""
				716	# abandon query parameters
				717	path = path.split('?',1)[0]
				718	path = path.split('#',1)[0]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	719	path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	720	words = path.split('/')
				721	words = filter(None, words)
				722	path = os.getcwd()
				723	for word in words:
				724	drive, word = os.path.splitdrive(word)
				725	head, word = os.path.split(word)
				726	if word in (os.curdir, os.pardir): continue
				727	path = os.path.join(path, word)
				728	return path
				729
				730	def copyfile(self, source, outputfile):
				731	"""Copy all data between two file objects.
				732
				733	The SOURCE argument is a file object open for reading
				734	(or anything with a read() method) and the DESTINATION
				735	argument is a file object open for writing (or
				736	anything with a write() method).
				737
				738	The only reason for overriding this would be to change
				739	the block size or perhaps to replace newlines by CRLF
				740	-- note however that this the default server uses this
				741	to copy binary data as well.
				742
				743	"""
				744	shutil.copyfileobj(source, outputfile)
				745
				746	def guess_type(self, path):
				747	"""Guess the type of a file.
				748
				749	Argument is a PATH (a filename).
				750
				751	Return value is a string of the form type/subtype,
				752	usable for a MIME Content-type header.
				753
				754	The default implementation looks the file's extension
				755	up in the table self.extensions_map, using application/octet-stream
				756	as a default; however it would be permissible (if
				757	slow) to look inside the data to make a better guess.
				758
				759	"""
				760
				761	base, ext = posixpath.splitext(path)
				762	if ext in self.extensions_map:
				763	return self.extensions_map[ext]
				764	ext = ext.lower()
				765	if ext in self.extensions_map:
				766	return self.extensions_map[ext]
				767	else:
				768	return self.extensions_map['']
				769
				770	if not mimetypes.inited:
				771	mimetypes.init() # try to read system mime.types
				772	extensions_map = mimetypes.types_map.copy()
				773	extensions_map.update({
				774	'': 'application/octet-stream', # Default
				775	'.py': 'text/plain',
				776	'.c': 'text/plain',
				777	'.h': 'text/plain',
				778	})
				779
				780
				781	# Utilities for CGIHTTPRequestHandler
				782
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	783	# TODO(gregory.p.smith): Move this into an appropriate library.
				784	def _url_collapse_path_split(path):
				785	"""
				786	Given a URL path, remove extra '/'s and '.' path elements and collapse
				787	any '..' references.
				788
				789	Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
				790
				791	Returns: A tuple of (head, tail) where tail is everything after the final /
				792	and head is everything before it. Head will always start with a '/' and,
				793	if it contains anything else, never have a trailing '/'.
				794
				795	Raises: IndexError if too many '..' occur within the path.
				796	"""
				797	# Similar to os.path.split(os.path.normpath(path)) but specific to URL
				798	# path semantics rather than local operating system semantics.
				799	path_parts = []
				800	for part in path.split('/'):
				801	if part == '.':
				802	path_parts.append('')
				803	else:
				804	path_parts.append(part)
				805	# Filter out blank non trailing parts before consuming the '..'.
				806	path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
				807	if path_parts:
				808	tail_part = path_parts.pop()
				809	else:
				810	tail_part = ''
				811	head_parts = []
				812	for part in path_parts:
				813	if part == '..':
				814	head_parts.pop()
				815	else:
				816	head_parts.append(part)
				817	if tail_part and tail_part == '..':
				818	head_parts.pop()
				819	tail_part = ''
				820	return ('/' + '/'.join(head_parts), tail_part)
				821
				822
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	823	nobody = None
				824
				825	def nobody_uid():
				826	"""Internal routine to get nobody's uid"""
				827	global nobody
				828	if nobody:
				829	return nobody
				830	try:
				831	import pwd
				832	except ImportError:
				833	return -1
				834	try:
				835	nobody = pwd.getpwnam('nobody')[2]
				836	except KeyError:
				837	nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
				838	return nobody
				839
				840
				841	def executable(path):
				842	"""Test for executable file."""
				843	try:
				844	st = os.stat(path)
				845	except os.error:
				846	return False
				847	return st.st_mode & 0o111 != 0
				848
				849
				850	class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
				851
				852	"""Complete HTTP server with GET, HEAD and POST commands.
				853
				854	GET and HEAD also support running CGI scripts.
				855
				856	The POST command is only implemented for CGI scripts.
				857
				858	"""
				859
				860	# Determine platform specifics
				861	have_fork = hasattr(os, 'fork')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	862
				863	# Make rfile unbuffered -- we need to read one line and then pass
				864	# the rest to a subprocess, so we can't use buffered input.
				865	rbufsize = 0
				866
				867	def do_POST(self):
				868	"""Serve a POST request.
				869
				870	This is only implemented for CGI scripts.
				871
				872	"""
				873
				874	if self.is_cgi():
				875	self.run_cgi()
				876	else:
				877	self.send_error(501, "Can only POST to CGI scripts")
				878
				879	def send_head(self):
				880	"""Version of send_head that support CGI scripts"""
				881	if self.is_cgi():
				882	return self.run_cgi()
				883	else:
				884	return SimpleHTTPRequestHandler.send_head(self)
				885
				886	def is_cgi(self):
				887	"""Test whether self.path corresponds to a CGI script.
				888
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	889	Returns True and updates the cgi_info attribute to the tuple
				890	(dir, rest) if self.path requires running a CGI script.
				891	Returns False otherwise.
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	892
Benjamin Peterson	a7deeee	2009-05-08 20:54:42 +0000	[diff] [blame]	893	If any exception is raised, the caller should assume that
				894	self.path was rejected as invalid and act accordingly.
				895
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	896	The default implementation tests whether the normalized url
				897	path begins with one of the strings in self.cgi_directories
				898	(and the next character is a '/' or the end of the string).
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	899
				900	"""
				901
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	902	splitpath = _url_collapse_path_split(self.path)
				903	if splitpath[0] in self.cgi_directories:
				904	self.cgi_info = splitpath
				905	return True
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	906	return False
				907
				908	cgi_directories = ['/cgi-bin', '/htbin']
				909
				910	def is_executable(self, path):
				911	"""Test whether argument path is an executable file."""
				912	return executable(path)
				913
				914	def is_python(self, path):
				915	"""Test whether argument path is a Python script."""
				916	head, tail = os.path.splitext(path)
				917	return tail.lower() in (".py", ".pyw")
				918
				919	def run_cgi(self):
				920	"""Execute a CGI script."""
				921	path = self.path
				922	dir, rest = self.cgi_info
				923
				924	i = path.find('/', len(dir) + 1)
				925	while i >= 0:
				926	nextdir = path[:i]
				927	nextrest = path[i+1:]
				928
				929	scriptdir = self.translate_path(nextdir)
				930	if os.path.isdir(scriptdir):
				931	dir, rest = nextdir, nextrest
				932	i = path.find('/', len(dir) + 1)
				933	else:
				934	break
				935
				936	# find an explicit query string, if present.
				937	i = rest.rfind('?')
				938	if i >= 0:
				939	rest, query = rest[:i], rest[i+1:]
				940	else:
				941	query = ''
				942
				943	# dissect the part after the directory name into a script name &
				944	# a possible additional path, to be stored in PATH_INFO.
				945	i = rest.find('/')
				946	if i >= 0:
				947	script, rest = rest[:i], rest[i:]
				948	else:
				949	script, rest = rest, ''
				950
				951	scriptname = dir + '/' + script
				952	scriptfile = self.translate_path(scriptname)
				953	if not os.path.exists(scriptfile):
				954	self.send_error(404, "No such CGI script (%r)" % scriptname)
				955	return
				956	if not os.path.isfile(scriptfile):
				957	self.send_error(403, "CGI script is not a plain file (%r)" %
				958	scriptname)
				959	return
				960	ispy = self.is_python(scriptname)
				961	if not ispy:
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	962	if not self.is_executable(scriptfile):
				963	self.send_error(403, "CGI script is not executable (%r)" %
				964	scriptname)
				965	return
				966
				967	# Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
				968	# XXX Much of the following could be prepared ahead of time!
Senthil Kumaran	5e8826c	2010-10-03 18:04:52 +0000	[diff] [blame]	969	env = copy.deepcopy(os.environ)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	970	env['SERVER_SOFTWARE'] = self.version_string()
				971	env['SERVER_NAME'] = self.server.server_name
				972	env['GATEWAY_INTERFACE'] = 'CGI/1.1'
				973	env['SERVER_PROTOCOL'] = self.protocol_version
				974	env['SERVER_PORT'] = str(self.server.server_port)
				975	env['REQUEST_METHOD'] = self.command
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	976	uqrest = urllib.parse.unquote(rest)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	977	env['PATH_INFO'] = uqrest
				978	env['PATH_TRANSLATED'] = self.translate_path(uqrest)
				979	env['SCRIPT_NAME'] = scriptname
				980	if query:
				981	env['QUERY_STRING'] = query
				982	host = self.address_string()
				983	if host != self.client_address[0]:
				984	env['REMOTE_HOST'] = host
				985	env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	986	authorization = self.headers.get("authorization")
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	987	if authorization:
				988	authorization = authorization.split()
				989	if len(authorization) == 2:
				990	import base64, binascii
				991	env['AUTH_TYPE'] = authorization[0]
				992	if authorization[0].lower() == "basic":
				993	try:
				994	authorization = authorization[1].encode('ascii')
Georg Brandl	706824f	2009-06-04 09:42:55 +0000	[diff] [blame]	995	authorization = base64.decodebytes(authorization).\
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	996	decode('ascii')
				997	except (binascii.Error, UnicodeError):
				998	pass
				999	else:
				1000	authorization = authorization.split(':')
				1001	if len(authorization) == 2:
				1002	env['REMOTE_USER'] = authorization[0]
				1003	# XXX REMOTE_IDENT
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1004	if self.headers.get('content-type') is None:
				1005	env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1006	else:
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1007	env['CONTENT_TYPE'] = self.headers['content-type']
				1008	length = self.headers.get('content-length')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1009	if length:
				1010	env['CONTENT_LENGTH'] = length
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1011	referer = self.headers.get('referer')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1012	if referer:
				1013	env['HTTP_REFERER'] = referer
				1014	accept = []
				1015	for line in self.headers.getallmatchingheaders('accept'):
				1016	if line[:1] in "\t\n\r ":
				1017	accept.append(line.strip())
				1018	else:
				1019	accept = accept + line[7:].split(',')
				1020	env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1021	ua = self.headers.get('user-agent')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1022	if ua:
				1023	env['HTTP_USER_AGENT'] = ua
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1024	co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl	caa78fe	2010-08-01 19:07:28 +0000	[diff] [blame]	1025	cookie_str = ', '.join(co)
				1026	if cookie_str:
				1027	env['HTTP_COOKIE'] = cookie_str
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1028	# XXX Other HTTP_* headers
				1029	# Since we're setting the env in the parent, provide empty
				1030	# values to override previously set values
				1031	for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
				1032	'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
				1033	env.setdefault(k, "")
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1034
				1035	self.send_response(200, "Script output follows")
				1036
				1037	decoded_query = query.replace('+', ' ')
				1038
				1039	if self.have_fork:
				1040	# Unix -- fork as we should
				1041	args = [script]
				1042	if '=' not in decoded_query:
				1043	args.append(decoded_query)
				1044	nobody = nobody_uid()
				1045	self.wfile.flush() # Always flush before forking
				1046	pid = os.fork()
				1047	if pid != 0:
				1048	# Parent
				1049	pid, sts = os.waitpid(pid, 0)
				1050	# throw away additional data [see bug #427345]
				1051	while select.select([self.rfile], [], [], 0)[0]:
				1052	if not self.rfile.read(1):
				1053	break
				1054	if sts:
				1055	self.log_error("CGI script exit status %#x", sts)
				1056	return
				1057	# Child
				1058	try:
				1059	try:
				1060	os.setuid(nobody)
				1061	except os.error:
				1062	pass
				1063	os.dup2(self.rfile.fileno(), 0)
				1064	os.dup2(self.wfile.fileno(), 1)
Senthil Kumaran	5e8826c	2010-10-03 18:04:52 +0000	[diff] [blame]	1065	os.execve(scriptfile, args, env)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1066	except:
				1067	self.server.handle_error(self.request, self.client_address)
				1068	os._exit(127)
				1069
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1070	else:
				1071	# Non-Unix -- use subprocess
				1072	import subprocess
Senthil Kumaran	ca5130c	2009-11-11 04:21:22 +0000	[diff] [blame]	1073	cmdline = [scriptfile]
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1074	if self.is_python(scriptfile):
				1075	interp = sys.executable
				1076	if interp.lower().endswith("w.exe"):
				1077	# On Windows, use python.exe, not pythonw.exe
				1078	interp = interp[:-5] + interp[-4:]
Senthil Kumaran	ca5130c	2009-11-11 04:21:22 +0000	[diff] [blame]	1079	cmdline = [interp, '-u'] + cmdline
				1080	if '=' not in query:
				1081	cmdline.append(query)
				1082	self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1083	try:
				1084	nbytes = int(length)
				1085	except (TypeError, ValueError):
				1086	nbytes = 0
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1087	p = subprocess.Popen(cmdline,
				1088	stdin=subprocess.PIPE,
				1089	stdout=subprocess.PIPE,
Senthil Kumaran	5e8826c	2010-10-03 18:04:52 +0000	[diff] [blame]	1090	stderr=subprocess.PIPE,
				1091	env = env
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1092	)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1093	if self.command.lower() == "post" and nbytes > 0:
				1094	data = self.rfile.read(nbytes)
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1095	else:
				1096	data = None
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1097	# throw away additional data [see bug #427345]
				1098	while select.select([self.rfile._sock], [], [], 0)[0]:
				1099	if not self.rfile._sock.recv(1):
				1100	break
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1101	stdout, stderr = p.communicate(data)
				1102	self.wfile.write(stdout)
				1103	if stderr:
				1104	self.log_error('%s', stderr)
Brian Curtin	938ece7	2010-11-05 15:08:19 +0000	[diff] [blame]	1105	p.stderr.close()
				1106	p.stdout.close()
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1107	status = p.returncode
				1108	if status:
				1109	self.log_error("CGI script exit status %#x", status)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1110	else:
				1111	self.log_message("CGI script exited OK")
				1112
				1113
				1114	def test(HandlerClass = BaseHTTPRequestHandler,
				1115	ServerClass = HTTPServer, protocol="HTTP/1.0"):
				1116	"""Test the HTTP request handler class.
				1117
				1118	This runs an HTTP server on port 8000 (or the first command line
				1119	argument).
				1120
				1121	"""
				1122
				1123	if sys.argv[1:]:
				1124	port = int(sys.argv[1])
				1125	else:
				1126	port = 8000
				1127	server_address = ('', port)
				1128
				1129	HandlerClass.protocol_version = protocol
				1130	httpd = ServerClass(server_address, HandlerClass)
				1131
				1132	sa = httpd.socket.getsockname()
				1133	print("Serving HTTP on", sa[0], "port", sa[1], "...")
Alexandre Vassalotti	b5292a2	2009-04-03 07:16:55 +0000	[diff] [blame]	1134	try:
				1135	httpd.serve_forever()
				1136	except KeyboardInterrupt:
				1137	print("\nKeyboard interrupt received, exiting.")
				1138	httpd.server_close()
				1139	sys.exit(0)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1140
				1141	if __name__ == '__main__':
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1142	test(HandlerClass=SimpleHTTPRequestHandler)