Blame - Lib/http/server.py - platform/external/python/cpython3

blob: a4acff0dfa18d245beda9cbd03b0e1bf82968f70 [file] [log] [blame]

Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1	"""HTTP server classes.
				2
				3	Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
				4	SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
				5	and CGIHTTPRequestHandler for CGI scripts.
				6
				7	It does, however, optionally implement HTTP/1.1 persistent connections,
				8	as of version 0.3.
				9
				10	Notes on CGIHTTPRequestHandler
				11	------------------------------
				12
				13	This class implements GET and POST requests to cgi-bin scripts.
				14
				15	If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	16	subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	17
				18	In all cases, the implementation is intentionally naive -- all
				19	requests are executed synchronously.
				20
				21	SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
				22	-- it may execute arbitrary Python code or external programs.
				23
				24	Note that status code 200 is sent prior to execution of a CGI script, so
				25	scripts cannot send other status codes such as 302 (redirect).
				26
				27	XXX To do:
				28
				29	- log requests even later (to capture byte count)
				30	- log user-agent header and other interesting goodies
				31	- send error log to separate file
				32	"""
				33
				34
				35	# See also:
				36	#
				37	# HTTP Working Group T. Berners-Lee
				38	# INTERNET-DRAFT R. T. Fielding
				39	# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
				40	# Expires September 8, 1995 March 8, 1995
				41	#
				42	# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
				43	#
				44	# and
				45	#
				46	# Network Working Group R. Fielding
				47	# Request for Comments: 2616 et al
				48	# Obsoletes: 2068 June 1999
				49	# Category: Standards Track
				50	#
				51	# URL: http://www.faqs.org/rfcs/rfc2616.html
				52
				53	# Log files
				54	# ---------
				55	#
				56	# Here's a quote from the NCSA httpd docs about log file format.
				57	#
				58	# \| The logfile format is as follows. Each line consists of:
				59	# \|
				60	# \| host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
				61	# \|
				62	# \| host: Either the DNS name or the IP number of the remote client
				63	# \| rfc931: Any information returned by identd for this person,
				64	# \| - otherwise.
				65	# \| authuser: If user sent a userid for authentication, the user name,
				66	# \| - otherwise.
				67	# \| DD: Day
				68	# \| Mon: Month (calendar name)
				69	# \| YYYY: Year
				70	# \| hh: hour (24-hour format, the machine's timezone)
				71	# \| mm: minutes
				72	# \| ss: seconds
				73	# \| request: The first line of the HTTP request as sent by the client.
				74	# \| ddd: the status code returned by the server, - if not available.
				75	# \| bbbb: the total number of bytes sent,
				76	# \| not including the HTTP/1.0 header, - if not available
				77	# \|
				78	# \| You can determine the name of the file accessed through request.
				79	#
				80	# (Actually, the latter is only true if you know the server configuration
				81	# at the time the request was made!)
				82
				83	__version__ = "0.6"
				84
				85	__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
				86
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	87	import cgi
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	88	import email.message
				89	import email.parser
Jeremy Hylton	914ab45	2009-03-27 17:16:06 +0000	[diff] [blame]	90	import http.client
				91	import io
				92	import mimetypes
				93	import os
				94	import posixpath
				95	import select
				96	import shutil
				97	import socket # For gethostbyaddr()
				98	import socketserver
				99	import sys
				100	import time
				101	import urllib.parse
Senthil Kumaran	5e8826c	2010-10-03 18:04:52 +0000	[diff] [blame^]	102	import copy
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	103
				104	# Default error message template
				105	DEFAULT_ERROR_MESSAGE = """\
				106	<head>
				107	<title>Error response</title>
				108	</head>
				109	<body>
				110	<h1>Error response</h1>
				111	<p>Error code %(code)d.
				112	<p>Message: %(message)s.
				113	<p>Error code explanation: %(code)s = %(explain)s.
				114	</body>
				115	"""
				116
				117	DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
				118
				119	def _quote_html(html):
				120	return html.replace("&", "&").replace("<", "<").replace(">", ">")
				121
				122	class HTTPServer(socketserver.TCPServer):
				123
				124	allow_reuse_address = 1 # Seems to make sense in testing environment
				125
				126	def server_bind(self):
				127	"""Override server_bind to store the server name."""
				128	socketserver.TCPServer.server_bind(self)
				129	host, port = self.socket.getsockname()[:2]
				130	self.server_name = socket.getfqdn(host)
				131	self.server_port = port
				132
				133
				134	class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
				135
				136	"""HTTP request handler base class.
				137
				138	The following explanation of HTTP serves to guide you through the
				139	code as well as to expose any misunderstandings I may have about
				140	HTTP (so you don't need to read the code to figure out I'm wrong
				141	:-).
				142
				143	HTTP (HyperText Transfer Protocol) is an extensible protocol on
				144	top of a reliable stream transport (e.g. TCP/IP). The protocol
				145	recognizes three parts to a request:
				146
				147	1. One line identifying the request type and path
				148	2. An optional set of RFC-822-style headers
				149	3. An optional data part
				150
				151	The headers and data are separated by a blank line.
				152
				153	The first line of the request has the form
				154
				155	<command> <path> <version>
				156
				157	where <command> is a (case-sensitive) keyword such as GET or POST,
				158	<path> is a string containing path information for the request,
				159	and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
				160	<path> is encoded using the URL encoding scheme (using %xx to signify
				161	the ASCII character with hex code xx).
				162
				163	The specification specifies that lines are separated by CRLF but
				164	for compatibility with the widest range of clients recommends
				165	servers also handle LF. Similarly, whitespace in the request line
				166	is treated sensibly (allowing multiple spaces between components
				167	and allowing trailing whitespace).
				168
				169	Similarly, for output, lines ought to be separated by CRLF pairs
				170	but most clients grok LF characters just fine.
				171
				172	If the first line of the request has the form
				173
				174	<command> <path>
				175
				176	(i.e. <version> is left out) then this is assumed to be an HTTP
				177	0.9 request; this form has no optional headers and data part and
				178	the reply consists of just the data.
				179
				180	The reply form of the HTTP 1.x protocol again has three parts:
				181
				182	1. One line giving the response code
				183	2. An optional set of RFC-822-style headers
				184	3. The data
				185
				186	Again, the headers and data are separated by a blank line.
				187
				188	The response code line has the form
				189
				190	<version> <responsecode> <responsestring>
				191
				192	where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
				193	<responsecode> is a 3-digit response code indicating success or
				194	failure of the request, and <responsestring> is an optional
				195	human-readable string explaining what the response code means.
				196
				197	This server parses the request and the headers, and then calls a
				198	function specific to the request type (<command>). Specifically,
				199	a request SPAM will be handled by a method do_SPAM(). If no
				200	such method exists the server sends an error response to the
				201	client. If it exists, it is called with no arguments:
				202
				203	do_SPAM()
				204
				205	Note that the request name is case sensitive (i.e. SPAM and spam
				206	are different requests).
				207
				208	The various request details are stored in instance variables:
				209
				210	- client_address is the client IP address in the form (host,
				211	port);
				212
				213	- command, path and version are the broken-down request line;
				214
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	215	- headers is an instance of email.message.Message (or a derived
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	216	class) containing the header information;
				217
				218	- rfile is a file object open for reading positioned at the
				219	start of the optional input data part;
				220
				221	- wfile is a file object open for writing.
				222
				223	IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
				224
				225	The first thing to be written must be the response line. Then
				226	follow 0 or more header lines, then a blank line, and then the
				227	actual data (if any). The meaning of the header lines depends on
				228	the command executed by the server; in most cases, when data is
				229	returned, there should be at least one header line of the form
				230
				231	Content-type: <type>/<subtype>
				232
				233	where <type> and <subtype> should be registered MIME types,
				234	e.g. "text/html" or "text/plain".
				235
				236	"""
				237
				238	# The Python system version, truncated to its first component.
				239	sys_version = "Python/" + sys.version.split()[0]
				240
				241	# The server software version. You may want to override this.
				242	# The format is multiple whitespace-separated strings,
				243	# where each string is of the form name[/version].
				244	server_version = "BaseHTTP/" + __version__
				245
				246	error_message_format = DEFAULT_ERROR_MESSAGE
				247	error_content_type = DEFAULT_ERROR_CONTENT_TYPE
				248
				249	# The default request version. This only affects responses up until
				250	# the point where the request line is parsed, so it mainly decides what
				251	# the client gets back when sending a malformed request line.
				252	# Most web servers default to HTTP 0.9, i.e. don't send a status line.
				253	default_request_version = "HTTP/0.9"
				254
				255	def parse_request(self):
				256	"""Parse a request (internal).
				257
				258	The request should be stored in self.raw_requestline; the results
				259	are in self.command, self.path, self.request_version and
				260	self.headers.
				261
				262	Return True for success, False for failure; on failure, an
				263	error is sent back.
				264
				265	"""
				266	self.command = None # set in case of error on the first line
				267	self.request_version = version = self.default_request_version
				268	self.close_connection = 1
				269	requestline = str(self.raw_requestline, 'iso-8859-1')
				270	if requestline[-2:] == '\r\n':
				271	requestline = requestline[:-2]
				272	elif requestline[-1:] == '\n':
				273	requestline = requestline[:-1]
				274	self.requestline = requestline
				275	words = requestline.split()
				276	if len(words) == 3:
				277	[command, path, version] = words
				278	if version[:5] != 'HTTP/':
				279	self.send_error(400, "Bad request version (%r)" % version)
				280	return False
				281	try:
				282	base_version_number = version.split('/', 1)[1]
				283	version_number = base_version_number.split(".")
				284	# RFC 2145 section 3.1 says there can be only one "." and
				285	# - major and minor numbers MUST be treated as
				286	# separate integers;
				287	# - HTTP/2.4 is a lower version than HTTP/2.13, which in
				288	# turn is lower than HTTP/12.3;
				289	# - Leading zeros MUST be ignored by recipients.
				290	if len(version_number) != 2:
				291	raise ValueError
				292	version_number = int(version_number[0]), int(version_number[1])
				293	except (ValueError, IndexError):
				294	self.send_error(400, "Bad request version (%r)" % version)
				295	return False
				296	if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
				297	self.close_connection = 0
				298	if version_number >= (2, 0):
				299	self.send_error(505,
				300	"Invalid HTTP Version (%s)" % base_version_number)
				301	return False
				302	elif len(words) == 2:
				303	[command, path] = words
				304	self.close_connection = 1
				305	if command != 'GET':
				306	self.send_error(400,
				307	"Bad HTTP/0.9 request type (%r)" % command)
				308	return False
				309	elif not words:
				310	return False
				311	else:
				312	self.send_error(400, "Bad request syntax (%r)" % requestline)
				313	return False
				314	self.command, self.path, self.request_version = command, path, version
				315
				316	# Examine the headers and look for a Connection directive.
Jeremy Hylton	98eb6c2	2009-03-27 18:31:36 +0000	[diff] [blame]	317	self.headers = http.client.parse_headers(self.rfile,
				318	_class=self.MessageClass)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	319
				320	conntype = self.headers.get('Connection', "")
				321	if conntype.lower() == 'close':
				322	self.close_connection = 1
				323	elif (conntype.lower() == 'keep-alive' and
				324	self.protocol_version >= "HTTP/1.1"):
				325	self.close_connection = 0
				326	return True
				327
				328	def handle_one_request(self):
				329	"""Handle a single HTTP request.
				330
				331	You normally don't need to override this method; see the class
				332	__doc__ string for information on how to handle specific HTTP
				333	commands such as GET and POST.
				334
				335	"""
				336	self.raw_requestline = self.rfile.readline()
				337	if not self.raw_requestline:
				338	self.close_connection = 1
				339	return
				340	if not self.parse_request(): # An error code has been sent, just exit
				341	return
				342	mname = 'do_' + self.command
				343	if not hasattr(self, mname):
				344	self.send_error(501, "Unsupported method (%r)" % self.command)
				345	return
				346	method = getattr(self, mname)
				347	method()
				348
				349	def handle(self):
				350	"""Handle multiple requests if necessary."""
				351	self.close_connection = 1
				352
				353	self.handle_one_request()
				354	while not self.close_connection:
				355	self.handle_one_request()
				356
				357	def send_error(self, code, message=None):
				358	"""Send and log an error reply.
				359
				360	Arguments are the error code, and a detailed message.
				361	The detailed message defaults to the short entry matching the
				362	response code.
				363
				364	This sends an error response (so it must be called before any
				365	output has been generated), logs the error, and finally sends
				366	a piece of HTML explaining the error to the user.
				367
				368	"""
				369
				370	try:
				371	shortmsg, longmsg = self.responses[code]
				372	except KeyError:
				373	shortmsg, longmsg = '???', '???'
				374	if message is None:
				375	message = shortmsg
				376	explain = longmsg
				377	self.log_error("code %d, message %s", code, message)
				378	# using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
				379	content = (self.error_message_format %
				380	{'code': code, 'message': _quote_html(message), 'explain': explain})
				381	self.send_response(code, message)
				382	self.send_header("Content-Type", self.error_content_type)
				383	self.send_header('Connection', 'close')
				384	self.end_headers()
				385	if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
				386	self.wfile.write(content.encode('UTF-8', 'replace'))
				387
				388	def send_response(self, code, message=None):
				389	"""Send the response header and log the response code.
				390
				391	Also send two standard headers with the server software
				392	version and the current date.
				393
				394	"""
				395	self.log_request(code)
				396	if message is None:
				397	if code in self.responses:
				398	message = self.responses[code][0]
				399	else:
				400	message = ''
				401	if self.request_version != 'HTTP/0.9':
				402	self.wfile.write(("%s %d %s\r\n" %
				403	(self.protocol_version, code, message)).encode('ASCII', 'strict'))
				404	# print (self.protocol_version, code, message)
				405	self.send_header('Server', self.version_string())
				406	self.send_header('Date', self.date_time_string())
				407
				408	def send_header(self, keyword, value):
				409	"""Send a MIME header."""
				410	if self.request_version != 'HTTP/0.9':
				411	self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict'))
				412
				413	if keyword.lower() == 'connection':
				414	if value.lower() == 'close':
				415	self.close_connection = 1
				416	elif value.lower() == 'keep-alive':
				417	self.close_connection = 0
				418
				419	def end_headers(self):
				420	"""Send the blank line ending the MIME headers."""
				421	if self.request_version != 'HTTP/0.9':
				422	self.wfile.write(b"\r\n")
				423
				424	def log_request(self, code='-', size='-'):
				425	"""Log an accepted request.
				426
				427	This is called by send_response().
				428
				429	"""
				430
				431	self.log_message('"%s" %s %s',
				432	self.requestline, str(code), str(size))
				433
				434	def log_error(self, format, *args):
				435	"""Log an error.
				436
				437	This is called when a request cannot be fulfilled. By
				438	default it passes the message on to log_message().
				439
				440	Arguments are the same as for log_message().
				441
				442	XXX This should go to the separate error log.
				443
				444	"""
				445
				446	self.log_message(format, *args)
				447
				448	def log_message(self, format, *args):
				449	"""Log an arbitrary message.
				450
				451	This is used by all other logging functions. Override
				452	it if you have specific logging wishes.
				453
				454	The first argument, FORMAT, is a format string for the
				455	message to be logged. If the format string contains
				456	any % escapes requiring parameters, they should be
				457	specified as subsequent arguments (it's just like
				458	printf!).
				459
				460	The client host and current date/time are prefixed to
				461	every message.
				462
				463	"""
				464
				465	sys.stderr.write("%s - - [%s] %s\n" %
				466	(self.address_string(),
				467	self.log_date_time_string(),
				468	format%args))
				469
				470	def version_string(self):
				471	"""Return the server software version string."""
				472	return self.server_version + ' ' + self.sys_version
				473
				474	def date_time_string(self, timestamp=None):
				475	"""Return the current date and time formatted for a message header."""
				476	if timestamp is None:
				477	timestamp = time.time()
				478	year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
				479	s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
				480	self.weekdayname[wd],
				481	day, self.monthname[month], year,
				482	hh, mm, ss)
				483	return s
				484
				485	def log_date_time_string(self):
				486	"""Return the current time formatted for logging."""
				487	now = time.time()
				488	year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
				489	s = "%02d/%3s/%04d %02d:%02d:%02d" % (
				490	day, self.monthname[month], year, hh, mm, ss)
				491	return s
				492
				493	weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
				494
				495	monthname = [None,
				496	'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
				497	'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
				498
				499	def address_string(self):
				500	"""Return the client address formatted for logging.
				501
				502	This version looks up the full hostname using gethostbyaddr(),
				503	and tries to find a name that contains at least one dot.
				504
				505	"""
				506
				507	host, port = self.client_address[:2]
				508	return socket.getfqdn(host)
				509
				510	# Essentially static class variables
				511
				512	# The version of the HTTP protocol we support.
				513	# Set this to HTTP/1.1 to enable automatic keepalive
				514	protocol_version = "HTTP/1.0"
				515
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	516	# MessageClass used to parse headers
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	517	MessageClass = http.client.HTTPMessage
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	518
				519	# Table mapping response codes to messages; entries have the
				520	# form {code: (shortmessage, longmessage)}.
				521	# See RFC 2616.
				522	responses = {
				523	100: ('Continue', 'Request received, please continue'),
				524	101: ('Switching Protocols',
				525	'Switching to new protocol; obey Upgrade header'),
				526
				527	200: ('OK', 'Request fulfilled, document follows'),
				528	201: ('Created', 'Document created, URL follows'),
				529	202: ('Accepted',
				530	'Request accepted, processing continues off-line'),
				531	203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
				532	204: ('No Content', 'Request fulfilled, nothing follows'),
				533	205: ('Reset Content', 'Clear input form for further input.'),
				534	206: ('Partial Content', 'Partial content follows.'),
				535
				536	300: ('Multiple Choices',
				537	'Object has several resources -- see URI list'),
				538	301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
				539	302: ('Found', 'Object moved temporarily -- see URI list'),
				540	303: ('See Other', 'Object moved -- see Method and URL list'),
				541	304: ('Not Modified',
				542	'Document has not changed since given time'),
				543	305: ('Use Proxy',
				544	'You must use proxy specified in Location to access this '
				545	'resource.'),
				546	307: ('Temporary Redirect',
				547	'Object moved temporarily -- see URI list'),
				548
				549	400: ('Bad Request',
				550	'Bad request syntax or unsupported method'),
				551	401: ('Unauthorized',
				552	'No permission -- see authorization schemes'),
				553	402: ('Payment Required',
				554	'No payment -- see charging schemes'),
				555	403: ('Forbidden',
				556	'Request forbidden -- authorization will not help'),
				557	404: ('Not Found', 'Nothing matches the given URI'),
				558	405: ('Method Not Allowed',
Senthil Kumaran	613c61c	2010-02-22 11:02:53 +0000	[diff] [blame]	559	'Specified method is invalid for this resource.'),
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	560	406: ('Not Acceptable', 'URI not available in preferred format.'),
				561	407: ('Proxy Authentication Required', 'You must authenticate with '
				562	'this proxy before proceeding.'),
				563	408: ('Request Timeout', 'Request timed out; try again later.'),
				564	409: ('Conflict', 'Request conflict.'),
				565	410: ('Gone',
				566	'URI no longer exists and has been permanently removed.'),
				567	411: ('Length Required', 'Client must specify Content-Length.'),
				568	412: ('Precondition Failed', 'Precondition in headers is false.'),
				569	413: ('Request Entity Too Large', 'Entity is too large.'),
				570	414: ('Request-URI Too Long', 'URI is too long.'),
				571	415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
				572	416: ('Requested Range Not Satisfiable',
				573	'Cannot satisfy request range.'),
				574	417: ('Expectation Failed',
				575	'Expect condition could not be satisfied.'),
				576
				577	500: ('Internal Server Error', 'Server got itself in trouble'),
				578	501: ('Not Implemented',
				579	'Server does not support this operation'),
				580	502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
				581	503: ('Service Unavailable',
				582	'The server cannot process the request due to a high load'),
				583	504: ('Gateway Timeout',
				584	'The gateway server did not receive a timely response'),
				585	505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
				586	}
				587
				588
				589	class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
				590
				591	"""Simple HTTP request handler with GET and HEAD commands.
				592
				593	This serves files from the current directory and any of its
				594	subdirectories. The MIME type for files is determined by
				595	calling the .guess_type() method.
				596
				597	The GET and HEAD requests are identical except that the HEAD
				598	request omits the actual contents of the file.
				599
				600	"""
				601
				602	server_version = "SimpleHTTP/" + __version__
				603
				604	def do_GET(self):
				605	"""Serve a GET request."""
				606	f = self.send_head()
				607	if f:
				608	self.copyfile(f, self.wfile)
				609	f.close()
				610
				611	def do_HEAD(self):
				612	"""Serve a HEAD request."""
				613	f = self.send_head()
				614	if f:
				615	f.close()
				616
				617	def send_head(self):
				618	"""Common code for GET and HEAD commands.
				619
				620	This sends the response code and MIME headers.
				621
				622	Return value is either a file object (which has to be copied
				623	to the outputfile by the caller unless the command was HEAD,
				624	and must be closed by the caller under all circumstances), or
				625	None, in which case the caller has nothing further to do.
				626
				627	"""
				628	path = self.translate_path(self.path)
				629	f = None
				630	if os.path.isdir(path):
				631	if not self.path.endswith('/'):
				632	# redirect browser - doing basically what apache does
				633	self.send_response(301)
				634	self.send_header("Location", self.path + "/")
				635	self.end_headers()
				636	return None
				637	for index in "index.html", "index.htm":
				638	index = os.path.join(path, index)
				639	if os.path.exists(index):
				640	path = index
				641	break
				642	else:
				643	return self.list_directory(path)
				644	ctype = self.guess_type(path)
				645	try:
				646	f = open(path, 'rb')
				647	except IOError:
				648	self.send_error(404, "File not found")
				649	return None
				650	self.send_response(200)
				651	self.send_header("Content-type", ctype)
				652	fs = os.fstat(f.fileno())
				653	self.send_header("Content-Length", str(fs[6]))
				654	self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
				655	self.end_headers()
				656	return f
				657
				658	def list_directory(self, path):
				659	"""Helper to produce a directory listing (absent index.html).
				660
				661	Return value is either a file object, or None (indicating an
				662	error). In either case, the headers are sent, making the
				663	interface the same as for send_head().
				664
				665	"""
				666	try:
				667	list = os.listdir(path)
				668	except os.error:
				669	self.send_error(404, "No permission to list directory")
				670	return None
				671	list.sort(key=lambda a: a.lower())
				672	r = []
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	673	displaypath = cgi.escape(urllib.parse.unquote(self.path))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	674	r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
				675	r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
				676	r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
				677	r.append("<hr>\n<ul>\n")
				678	for name in list:
				679	fullname = os.path.join(path, name)
				680	displayname = linkname = name
				681	# Append / for directories or @ for symbolic links
				682	if os.path.isdir(fullname):
				683	displayname = name + "/"
				684	linkname = name + "/"
				685	if os.path.islink(fullname):
				686	displayname = name + "@"
				687	# Note: a link to a directory displays with @ and links with /
				688	r.append('<li><a href="%s">%s</a>\n'
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	689	% (urllib.parse.quote(linkname), cgi.escape(displayname)))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	690	r.append("</ul>\n<hr>\n</body>\n</html>\n")
				691	enc = sys.getfilesystemencoding()
				692	encoded = ''.join(r).encode(enc)
				693	f = io.BytesIO()
				694	f.write(encoded)
				695	f.seek(0)
				696	self.send_response(200)
				697	self.send_header("Content-type", "text/html; charset=%s" % enc)
				698	self.send_header("Content-Length", str(len(encoded)))
				699	self.end_headers()
				700	return f
				701
				702	def translate_path(self, path):
				703	"""Translate a /-separated PATH to the local filename syntax.
				704
				705	Components that mean special things to the local file system
				706	(e.g. drive or directory names) are ignored. (XXX They should
				707	probably be diagnosed.)
				708
				709	"""
				710	# abandon query parameters
				711	path = path.split('?',1)[0]
				712	path = path.split('#',1)[0]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	713	path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	714	words = path.split('/')
				715	words = filter(None, words)
				716	path = os.getcwd()
				717	for word in words:
				718	drive, word = os.path.splitdrive(word)
				719	head, word = os.path.split(word)
				720	if word in (os.curdir, os.pardir): continue
				721	path = os.path.join(path, word)
				722	return path
				723
				724	def copyfile(self, source, outputfile):
				725	"""Copy all data between two file objects.
				726
				727	The SOURCE argument is a file object open for reading
				728	(or anything with a read() method) and the DESTINATION
				729	argument is a file object open for writing (or
				730	anything with a write() method).
				731
				732	The only reason for overriding this would be to change
				733	the block size or perhaps to replace newlines by CRLF
				734	-- note however that this the default server uses this
				735	to copy binary data as well.
				736
				737	"""
				738	shutil.copyfileobj(source, outputfile)
				739
				740	def guess_type(self, path):
				741	"""Guess the type of a file.
				742
				743	Argument is a PATH (a filename).
				744
				745	Return value is a string of the form type/subtype,
				746	usable for a MIME Content-type header.
				747
				748	The default implementation looks the file's extension
				749	up in the table self.extensions_map, using application/octet-stream
				750	as a default; however it would be permissible (if
				751	slow) to look inside the data to make a better guess.
				752
				753	"""
				754
				755	base, ext = posixpath.splitext(path)
				756	if ext in self.extensions_map:
				757	return self.extensions_map[ext]
				758	ext = ext.lower()
				759	if ext in self.extensions_map:
				760	return self.extensions_map[ext]
				761	else:
				762	return self.extensions_map['']
				763
				764	if not mimetypes.inited:
				765	mimetypes.init() # try to read system mime.types
				766	extensions_map = mimetypes.types_map.copy()
				767	extensions_map.update({
				768	'': 'application/octet-stream', # Default
				769	'.py': 'text/plain',
				770	'.c': 'text/plain',
				771	'.h': 'text/plain',
				772	})
				773
				774
				775	# Utilities for CGIHTTPRequestHandler
				776
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	777	# TODO(gregory.p.smith): Move this into an appropriate library.
				778	def _url_collapse_path_split(path):
				779	"""
				780	Given a URL path, remove extra '/'s and '.' path elements and collapse
				781	any '..' references.
				782
				783	Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
				784
				785	Returns: A tuple of (head, tail) where tail is everything after the final /
				786	and head is everything before it. Head will always start with a '/' and,
				787	if it contains anything else, never have a trailing '/'.
				788
				789	Raises: IndexError if too many '..' occur within the path.
				790	"""
				791	# Similar to os.path.split(os.path.normpath(path)) but specific to URL
				792	# path semantics rather than local operating system semantics.
				793	path_parts = []
				794	for part in path.split('/'):
				795	if part == '.':
				796	path_parts.append('')
				797	else:
				798	path_parts.append(part)
				799	# Filter out blank non trailing parts before consuming the '..'.
				800	path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
				801	if path_parts:
				802	tail_part = path_parts.pop()
				803	else:
				804	tail_part = ''
				805	head_parts = []
				806	for part in path_parts:
				807	if part == '..':
				808	head_parts.pop()
				809	else:
				810	head_parts.append(part)
				811	if tail_part and tail_part == '..':
				812	head_parts.pop()
				813	tail_part = ''
				814	return ('/' + '/'.join(head_parts), tail_part)
				815
				816
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	817	nobody = None
				818
				819	def nobody_uid():
				820	"""Internal routine to get nobody's uid"""
				821	global nobody
				822	if nobody:
				823	return nobody
				824	try:
				825	import pwd
				826	except ImportError:
				827	return -1
				828	try:
				829	nobody = pwd.getpwnam('nobody')[2]
				830	except KeyError:
				831	nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
				832	return nobody
				833
				834
				835	def executable(path):
				836	"""Test for executable file."""
				837	try:
				838	st = os.stat(path)
				839	except os.error:
				840	return False
				841	return st.st_mode & 0o111 != 0
				842
				843
				844	class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
				845
				846	"""Complete HTTP server with GET, HEAD and POST commands.
				847
				848	GET and HEAD also support running CGI scripts.
				849
				850	The POST command is only implemented for CGI scripts.
				851
				852	"""
				853
				854	# Determine platform specifics
				855	have_fork = hasattr(os, 'fork')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	856
				857	# Make rfile unbuffered -- we need to read one line and then pass
				858	# the rest to a subprocess, so we can't use buffered input.
				859	rbufsize = 0
				860
				861	def do_POST(self):
				862	"""Serve a POST request.
				863
				864	This is only implemented for CGI scripts.
				865
				866	"""
				867
				868	if self.is_cgi():
				869	self.run_cgi()
				870	else:
				871	self.send_error(501, "Can only POST to CGI scripts")
				872
				873	def send_head(self):
				874	"""Version of send_head that support CGI scripts"""
				875	if self.is_cgi():
				876	return self.run_cgi()
				877	else:
				878	return SimpleHTTPRequestHandler.send_head(self)
				879
				880	def is_cgi(self):
				881	"""Test whether self.path corresponds to a CGI script.
				882
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	883	Returns True and updates the cgi_info attribute to the tuple
				884	(dir, rest) if self.path requires running a CGI script.
				885	Returns False otherwise.
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	886
Benjamin Peterson	a7deeee	2009-05-08 20:54:42 +0000	[diff] [blame]	887	If any exception is raised, the caller should assume that
				888	self.path was rejected as invalid and act accordingly.
				889
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	890	The default implementation tests whether the normalized url
				891	path begins with one of the strings in self.cgi_directories
				892	(and the next character is a '/' or the end of the string).
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	893
				894	"""
				895
Benjamin Peterson	ad71f0f	2009-04-11 20:12:10 +0000	[diff] [blame]	896	splitpath = _url_collapse_path_split(self.path)
				897	if splitpath[0] in self.cgi_directories:
				898	self.cgi_info = splitpath
				899	return True
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	900	return False
				901
				902	cgi_directories = ['/cgi-bin', '/htbin']
				903
				904	def is_executable(self, path):
				905	"""Test whether argument path is an executable file."""
				906	return executable(path)
				907
				908	def is_python(self, path):
				909	"""Test whether argument path is a Python script."""
				910	head, tail = os.path.splitext(path)
				911	return tail.lower() in (".py", ".pyw")
				912
				913	def run_cgi(self):
				914	"""Execute a CGI script."""
				915	path = self.path
				916	dir, rest = self.cgi_info
				917
				918	i = path.find('/', len(dir) + 1)
				919	while i >= 0:
				920	nextdir = path[:i]
				921	nextrest = path[i+1:]
				922
				923	scriptdir = self.translate_path(nextdir)
				924	if os.path.isdir(scriptdir):
				925	dir, rest = nextdir, nextrest
				926	i = path.find('/', len(dir) + 1)
				927	else:
				928	break
				929
				930	# find an explicit query string, if present.
				931	i = rest.rfind('?')
				932	if i >= 0:
				933	rest, query = rest[:i], rest[i+1:]
				934	else:
				935	query = ''
				936
				937	# dissect the part after the directory name into a script name &
				938	# a possible additional path, to be stored in PATH_INFO.
				939	i = rest.find('/')
				940	if i >= 0:
				941	script, rest = rest[:i], rest[i:]
				942	else:
				943	script, rest = rest, ''
				944
				945	scriptname = dir + '/' + script
				946	scriptfile = self.translate_path(scriptname)
				947	if not os.path.exists(scriptfile):
				948	self.send_error(404, "No such CGI script (%r)" % scriptname)
				949	return
				950	if not os.path.isfile(scriptfile):
				951	self.send_error(403, "CGI script is not a plain file (%r)" %
				952	scriptname)
				953	return
				954	ispy = self.is_python(scriptname)
				955	if not ispy:
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	956	if not self.is_executable(scriptfile):
				957	self.send_error(403, "CGI script is not executable (%r)" %
				958	scriptname)
				959	return
				960
				961	# Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
				962	# XXX Much of the following could be prepared ahead of time!
Senthil Kumaran	5e8826c	2010-10-03 18:04:52 +0000	[diff] [blame^]	963	env = copy.deepcopy(os.environ)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	964	env['SERVER_SOFTWARE'] = self.version_string()
				965	env['SERVER_NAME'] = self.server.server_name
				966	env['GATEWAY_INTERFACE'] = 'CGI/1.1'
				967	env['SERVER_PROTOCOL'] = self.protocol_version
				968	env['SERVER_PORT'] = str(self.server.server_port)
				969	env['REQUEST_METHOD'] = self.command
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	970	uqrest = urllib.parse.unquote(rest)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	971	env['PATH_INFO'] = uqrest
				972	env['PATH_TRANSLATED'] = self.translate_path(uqrest)
				973	env['SCRIPT_NAME'] = scriptname
				974	if query:
				975	env['QUERY_STRING'] = query
				976	host = self.address_string()
				977	if host != self.client_address[0]:
				978	env['REMOTE_HOST'] = host
				979	env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	980	authorization = self.headers.get("authorization")
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	981	if authorization:
				982	authorization = authorization.split()
				983	if len(authorization) == 2:
				984	import base64, binascii
				985	env['AUTH_TYPE'] = authorization[0]
				986	if authorization[0].lower() == "basic":
				987	try:
				988	authorization = authorization[1].encode('ascii')
Georg Brandl	706824f	2009-06-04 09:42:55 +0000	[diff] [blame]	989	authorization = base64.decodebytes(authorization).\
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	990	decode('ascii')
				991	except (binascii.Error, UnicodeError):
				992	pass
				993	else:
				994	authorization = authorization.split(':')
				995	if len(authorization) == 2:
				996	env['REMOTE_USER'] = authorization[0]
				997	# XXX REMOTE_IDENT
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	998	if self.headers.get('content-type') is None:
				999	env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1000	else:
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1001	env['CONTENT_TYPE'] = self.headers['content-type']
				1002	length = self.headers.get('content-length')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1003	if length:
				1004	env['CONTENT_LENGTH'] = length
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1005	referer = self.headers.get('referer')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1006	if referer:
				1007	env['HTTP_REFERER'] = referer
				1008	accept = []
				1009	for line in self.headers.getallmatchingheaders('accept'):
				1010	if line[:1] in "\t\n\r ":
				1011	accept.append(line.strip())
				1012	else:
				1013	accept = accept + line[7:].split(',')
				1014	env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1015	ua = self.headers.get('user-agent')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1016	if ua:
				1017	env['HTTP_USER_AGENT'] = ua
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	1018	co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl	caa78fe	2010-08-01 19:07:28 +0000	[diff] [blame]	1019	cookie_str = ', '.join(co)
				1020	if cookie_str:
				1021	env['HTTP_COOKIE'] = cookie_str
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1022	# XXX Other HTTP_* headers
				1023	# Since we're setting the env in the parent, provide empty
				1024	# values to override previously set values
				1025	for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
				1026	'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
				1027	env.setdefault(k, "")
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1028
				1029	self.send_response(200, "Script output follows")
				1030
				1031	decoded_query = query.replace('+', ' ')
				1032
				1033	if self.have_fork:
				1034	# Unix -- fork as we should
				1035	args = [script]
				1036	if '=' not in decoded_query:
				1037	args.append(decoded_query)
				1038	nobody = nobody_uid()
				1039	self.wfile.flush() # Always flush before forking
				1040	pid = os.fork()
				1041	if pid != 0:
				1042	# Parent
				1043	pid, sts = os.waitpid(pid, 0)
				1044	# throw away additional data [see bug #427345]
				1045	while select.select([self.rfile], [], [], 0)[0]:
				1046	if not self.rfile.read(1):
				1047	break
				1048	if sts:
				1049	self.log_error("CGI script exit status %#x", sts)
				1050	return
				1051	# Child
				1052	try:
				1053	try:
				1054	os.setuid(nobody)
				1055	except os.error:
				1056	pass
				1057	os.dup2(self.rfile.fileno(), 0)
				1058	os.dup2(self.wfile.fileno(), 1)
Senthil Kumaran	5e8826c	2010-10-03 18:04:52 +0000	[diff] [blame^]	1059	os.execve(scriptfile, args, env)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1060	except:
				1061	self.server.handle_error(self.request, self.client_address)
				1062	os._exit(127)
				1063
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1064	else:
				1065	# Non-Unix -- use subprocess
				1066	import subprocess
Senthil Kumaran	ca5130c	2009-11-11 04:21:22 +0000	[diff] [blame]	1067	cmdline = [scriptfile]
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1068	if self.is_python(scriptfile):
				1069	interp = sys.executable
				1070	if interp.lower().endswith("w.exe"):
				1071	# On Windows, use python.exe, not pythonw.exe
				1072	interp = interp[:-5] + interp[-4:]
Senthil Kumaran	ca5130c	2009-11-11 04:21:22 +0000	[diff] [blame]	1073	cmdline = [interp, '-u'] + cmdline
				1074	if '=' not in query:
				1075	cmdline.append(query)
				1076	self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1077	try:
				1078	nbytes = int(length)
				1079	except (TypeError, ValueError):
				1080	nbytes = 0
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1081	p = subprocess.Popen(cmdline,
				1082	stdin=subprocess.PIPE,
				1083	stdout=subprocess.PIPE,
Senthil Kumaran	5e8826c	2010-10-03 18:04:52 +0000	[diff] [blame^]	1084	stderr=subprocess.PIPE,
				1085	env = env
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1086	)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1087	if self.command.lower() == "post" and nbytes > 0:
				1088	data = self.rfile.read(nbytes)
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1089	else:
				1090	data = None
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1091	# throw away additional data [see bug #427345]
				1092	while select.select([self.rfile._sock], [], [], 0)[0]:
				1093	if not self.rfile._sock.recv(1):
				1094	break
Amaury Forgeot d'Arc	cb0d2d7	2008-06-18 22:19:22 +0000	[diff] [blame]	1095	stdout, stderr = p.communicate(data)
				1096	self.wfile.write(stdout)
				1097	if stderr:
				1098	self.log_error('%s', stderr)
				1099	status = p.returncode
				1100	if status:
				1101	self.log_error("CGI script exit status %#x", status)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1102	else:
				1103	self.log_message("CGI script exited OK")
				1104
				1105
				1106	def test(HandlerClass = BaseHTTPRequestHandler,
				1107	ServerClass = HTTPServer, protocol="HTTP/1.0"):
				1108	"""Test the HTTP request handler class.
				1109
				1110	This runs an HTTP server on port 8000 (or the first command line
				1111	argument).
				1112
				1113	"""
				1114
				1115	if sys.argv[1:]:
				1116	port = int(sys.argv[1])
				1117	else:
				1118	port = 8000
				1119	server_address = ('', port)
				1120
				1121	HandlerClass.protocol_version = protocol
				1122	httpd = ServerClass(server_address, HandlerClass)
				1123
				1124	sa = httpd.socket.getsockname()
				1125	print("Serving HTTP on", sa[0], "port", sa[1], "...")
Alexandre Vassalotti	b5292a2	2009-04-03 07:16:55 +0000	[diff] [blame]	1126	try:
				1127	httpd.serve_forever()
				1128	except KeyboardInterrupt:
				1129	print("\nKeyboard interrupt received, exiting.")
				1130	httpd.server_close()
				1131	sys.exit(0)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1132
				1133	if __name__ == '__main__':
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1134	test(HandlerClass=SimpleHTTPRequestHandler)