Blame - Lib/http/server.py - platform/external/python/cpython3

blob: 35ade6c015245f96ca4462f477cc6e59972a1621 [file] [log] [blame]

Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	1	"""HTTP server classes.
				2
				3	Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
				4	SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
				5	and CGIHTTPRequestHandler for CGI scripts.
				6
				7	It does, however, optionally implement HTTP/1.1 persistent connections,
				8	as of version 0.3.
				9
				10	Notes on CGIHTTPRequestHandler
				11	------------------------------
				12
				13	This class implements GET and POST requests to cgi-bin scripts.
				14
				15	If the os.fork() function is not present (e.g. on Windows),
				16	os.popen2() is used as a fallback, with slightly altered semantics; if
				17	that function is not present either (e.g. on Macintosh), only Python
				18	scripts are supported, and they are executed by the current process.
				19
				20	In all cases, the implementation is intentionally naive -- all
				21	requests are executed synchronously.
				22
				23	SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
				24	-- it may execute arbitrary Python code or external programs.
				25
				26	Note that status code 200 is sent prior to execution of a CGI script, so
				27	scripts cannot send other status codes such as 302 (redirect).
				28
				29	XXX To do:
				30
				31	- log requests even later (to capture byte count)
				32	- log user-agent header and other interesting goodies
				33	- send error log to separate file
				34	"""
				35
				36
				37	# See also:
				38	#
				39	# HTTP Working Group T. Berners-Lee
				40	# INTERNET-DRAFT R. T. Fielding
				41	# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
				42	# Expires September 8, 1995 March 8, 1995
				43	#
				44	# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
				45	#
				46	# and
				47	#
				48	# Network Working Group R. Fielding
				49	# Request for Comments: 2616 et al
				50	# Obsoletes: 2068 June 1999
				51	# Category: Standards Track
				52	#
				53	# URL: http://www.faqs.org/rfcs/rfc2616.html
				54
				55	# Log files
				56	# ---------
				57	#
				58	# Here's a quote from the NCSA httpd docs about log file format.
				59	#
				60	# \| The logfile format is as follows. Each line consists of:
				61	# \|
				62	# \| host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
				63	# \|
				64	# \| host: Either the DNS name or the IP number of the remote client
				65	# \| rfc931: Any information returned by identd for this person,
				66	# \| - otherwise.
				67	# \| authuser: If user sent a userid for authentication, the user name,
				68	# \| - otherwise.
				69	# \| DD: Day
				70	# \| Mon: Month (calendar name)
				71	# \| YYYY: Year
				72	# \| hh: hour (24-hour format, the machine's timezone)
				73	# \| mm: minutes
				74	# \| ss: seconds
				75	# \| request: The first line of the HTTP request as sent by the client.
				76	# \| ddd: the status code returned by the server, - if not available.
				77	# \| bbbb: the total number of bytes sent,
				78	# \| not including the HTTP/1.0 header, - if not available
				79	# \|
				80	# \| You can determine the name of the file accessed through request.
				81	#
				82	# (Actually, the latter is only true if you know the server configuration
				83	# at the time the request was made!)
				84
				85	__version__ = "0.6"
				86
				87	__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
				88
				89	import io
				90	import os
				91	import sys
				92	import cgi
				93	import time
				94	import socket # For gethostbyaddr()
				95	import shutil
				96	import urllib
				97	import select
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	98	import mimetypes
				99	import posixpath
				100	import socketserver
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	101	import email.message
				102	import email.parser
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	103
				104	# Default error message template
				105	DEFAULT_ERROR_MESSAGE = """\
				106	<head>
				107	<title>Error response</title>
				108	</head>
				109	<body>
				110	<h1>Error response</h1>
				111	<p>Error code %(code)d.
				112	<p>Message: %(message)s.
				113	<p>Error code explanation: %(code)s = %(explain)s.
				114	</body>
				115	"""
				116
				117	DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
				118
				119	def _quote_html(html):
				120	return html.replace("&", "&").replace("<", "<").replace(">", ">")
				121
				122	class HTTPServer(socketserver.TCPServer):
				123
				124	allow_reuse_address = 1 # Seems to make sense in testing environment
				125
				126	def server_bind(self):
				127	"""Override server_bind to store the server name."""
				128	socketserver.TCPServer.server_bind(self)
				129	host, port = self.socket.getsockname()[:2]
				130	self.server_name = socket.getfqdn(host)
				131	self.server_port = port
				132
				133
				134	class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
				135
				136	"""HTTP request handler base class.
				137
				138	The following explanation of HTTP serves to guide you through the
				139	code as well as to expose any misunderstandings I may have about
				140	HTTP (so you don't need to read the code to figure out I'm wrong
				141	:-).
				142
				143	HTTP (HyperText Transfer Protocol) is an extensible protocol on
				144	top of a reliable stream transport (e.g. TCP/IP). The protocol
				145	recognizes three parts to a request:
				146
				147	1. One line identifying the request type and path
				148	2. An optional set of RFC-822-style headers
				149	3. An optional data part
				150
				151	The headers and data are separated by a blank line.
				152
				153	The first line of the request has the form
				154
				155	<command> <path> <version>
				156
				157	where <command> is a (case-sensitive) keyword such as GET or POST,
				158	<path> is a string containing path information for the request,
				159	and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
				160	<path> is encoded using the URL encoding scheme (using %xx to signify
				161	the ASCII character with hex code xx).
				162
				163	The specification specifies that lines are separated by CRLF but
				164	for compatibility with the widest range of clients recommends
				165	servers also handle LF. Similarly, whitespace in the request line
				166	is treated sensibly (allowing multiple spaces between components
				167	and allowing trailing whitespace).
				168
				169	Similarly, for output, lines ought to be separated by CRLF pairs
				170	but most clients grok LF characters just fine.
				171
				172	If the first line of the request has the form
				173
				174	<command> <path>
				175
				176	(i.e. <version> is left out) then this is assumed to be an HTTP
				177	0.9 request; this form has no optional headers and data part and
				178	the reply consists of just the data.
				179
				180	The reply form of the HTTP 1.x protocol again has three parts:
				181
				182	1. One line giving the response code
				183	2. An optional set of RFC-822-style headers
				184	3. The data
				185
				186	Again, the headers and data are separated by a blank line.
				187
				188	The response code line has the form
				189
				190	<version> <responsecode> <responsestring>
				191
				192	where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
				193	<responsecode> is a 3-digit response code indicating success or
				194	failure of the request, and <responsestring> is an optional
				195	human-readable string explaining what the response code means.
				196
				197	This server parses the request and the headers, and then calls a
				198	function specific to the request type (<command>). Specifically,
				199	a request SPAM will be handled by a method do_SPAM(). If no
				200	such method exists the server sends an error response to the
				201	client. If it exists, it is called with no arguments:
				202
				203	do_SPAM()
				204
				205	Note that the request name is case sensitive (i.e. SPAM and spam
				206	are different requests).
				207
				208	The various request details are stored in instance variables:
				209
				210	- client_address is the client IP address in the form (host,
				211	port);
				212
				213	- command, path and version are the broken-down request line;
				214
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	215	- headers is an instance of email.message.Message (or a derived
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	216	class) containing the header information;
				217
				218	- rfile is a file object open for reading positioned at the
				219	start of the optional input data part;
				220
				221	- wfile is a file object open for writing.
				222
				223	IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
				224
				225	The first thing to be written must be the response line. Then
				226	follow 0 or more header lines, then a blank line, and then the
				227	actual data (if any). The meaning of the header lines depends on
				228	the command executed by the server; in most cases, when data is
				229	returned, there should be at least one header line of the form
				230
				231	Content-type: <type>/<subtype>
				232
				233	where <type> and <subtype> should be registered MIME types,
				234	e.g. "text/html" or "text/plain".
				235
				236	"""
				237
				238	# The Python system version, truncated to its first component.
				239	sys_version = "Python/" + sys.version.split()[0]
				240
				241	# The server software version. You may want to override this.
				242	# The format is multiple whitespace-separated strings,
				243	# where each string is of the form name[/version].
				244	server_version = "BaseHTTP/" + __version__
				245
				246	error_message_format = DEFAULT_ERROR_MESSAGE
				247	error_content_type = DEFAULT_ERROR_CONTENT_TYPE
				248
				249	# The default request version. This only affects responses up until
				250	# the point where the request line is parsed, so it mainly decides what
				251	# the client gets back when sending a malformed request line.
				252	# Most web servers default to HTTP 0.9, i.e. don't send a status line.
				253	default_request_version = "HTTP/0.9"
				254
				255	def parse_request(self):
				256	"""Parse a request (internal).
				257
				258	The request should be stored in self.raw_requestline; the results
				259	are in self.command, self.path, self.request_version and
				260	self.headers.
				261
				262	Return True for success, False for failure; on failure, an
				263	error is sent back.
				264
				265	"""
				266	self.command = None # set in case of error on the first line
				267	self.request_version = version = self.default_request_version
				268	self.close_connection = 1
				269	requestline = str(self.raw_requestline, 'iso-8859-1')
				270	if requestline[-2:] == '\r\n':
				271	requestline = requestline[:-2]
				272	elif requestline[-1:] == '\n':
				273	requestline = requestline[:-1]
				274	self.requestline = requestline
				275	words = requestline.split()
				276	if len(words) == 3:
				277	[command, path, version] = words
				278	if version[:5] != 'HTTP/':
				279	self.send_error(400, "Bad request version (%r)" % version)
				280	return False
				281	try:
				282	base_version_number = version.split('/', 1)[1]
				283	version_number = base_version_number.split(".")
				284	# RFC 2145 section 3.1 says there can be only one "." and
				285	# - major and minor numbers MUST be treated as
				286	# separate integers;
				287	# - HTTP/2.4 is a lower version than HTTP/2.13, which in
				288	# turn is lower than HTTP/12.3;
				289	# - Leading zeros MUST be ignored by recipients.
				290	if len(version_number) != 2:
				291	raise ValueError
				292	version_number = int(version_number[0]), int(version_number[1])
				293	except (ValueError, IndexError):
				294	self.send_error(400, "Bad request version (%r)" % version)
				295	return False
				296	if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
				297	self.close_connection = 0
				298	if version_number >= (2, 0):
				299	self.send_error(505,
				300	"Invalid HTTP Version (%s)" % base_version_number)
				301	return False
				302	elif len(words) == 2:
				303	[command, path] = words
				304	self.close_connection = 1
				305	if command != 'GET':
				306	self.send_error(400,
				307	"Bad HTTP/0.9 request type (%r)" % command)
				308	return False
				309	elif not words:
				310	return False
				311	else:
				312	self.send_error(400, "Bad request syntax (%r)" % requestline)
				313	return False
				314	self.command, self.path, self.request_version = command, path, version
				315
				316	# Examine the headers and look for a Connection directive.
				317
Georg Brandl	9f0f960	2008-06-12 22:23:59 +0000	[diff] [blame^]	318	# MessageClass wants to see strings rather than bytes.
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	319	# But a TextIOWrapper around self.rfile would buffer too many bytes
				320	# from the stream, bytes which we later need to read as bytes.
				321	# So we read the correct bytes here, as bytes, then use StringIO
				322	# to make them look like strings for MessageClass to parse.
				323	headers = []
				324	while True:
				325	line = self.rfile.readline()
				326	headers.append(line)
				327	if line in (b'\r\n', b'\n', b''):
				328	break
				329	hfile = io.StringIO(b''.join(headers).decode('iso-8859-1'))
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	330	self.headers = email.parser.Parser(_class=self.MessageClass).parse(hfile)
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	331
				332	conntype = self.headers.get('Connection', "")
				333	if conntype.lower() == 'close':
				334	self.close_connection = 1
				335	elif (conntype.lower() == 'keep-alive' and
				336	self.protocol_version >= "HTTP/1.1"):
				337	self.close_connection = 0
				338	return True
				339
				340	def handle_one_request(self):
				341	"""Handle a single HTTP request.
				342
				343	You normally don't need to override this method; see the class
				344	__doc__ string for information on how to handle specific HTTP
				345	commands such as GET and POST.
				346
				347	"""
				348	self.raw_requestline = self.rfile.readline()
				349	if not self.raw_requestline:
				350	self.close_connection = 1
				351	return
				352	if not self.parse_request(): # An error code has been sent, just exit
				353	return
				354	mname = 'do_' + self.command
				355	if not hasattr(self, mname):
				356	self.send_error(501, "Unsupported method (%r)" % self.command)
				357	return
				358	method = getattr(self, mname)
				359	method()
				360
				361	def handle(self):
				362	"""Handle multiple requests if necessary."""
				363	self.close_connection = 1
				364
				365	self.handle_one_request()
				366	while not self.close_connection:
				367	self.handle_one_request()
				368
				369	def send_error(self, code, message=None):
				370	"""Send and log an error reply.
				371
				372	Arguments are the error code, and a detailed message.
				373	The detailed message defaults to the short entry matching the
				374	response code.
				375
				376	This sends an error response (so it must be called before any
				377	output has been generated), logs the error, and finally sends
				378	a piece of HTML explaining the error to the user.
				379
				380	"""
				381
				382	try:
				383	shortmsg, longmsg = self.responses[code]
				384	except KeyError:
				385	shortmsg, longmsg = '???', '???'
				386	if message is None:
				387	message = shortmsg
				388	explain = longmsg
				389	self.log_error("code %d, message %s", code, message)
				390	# using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
				391	content = (self.error_message_format %
				392	{'code': code, 'message': _quote_html(message), 'explain': explain})
				393	self.send_response(code, message)
				394	self.send_header("Content-Type", self.error_content_type)
				395	self.send_header('Connection', 'close')
				396	self.end_headers()
				397	if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
				398	self.wfile.write(content.encode('UTF-8', 'replace'))
				399
				400	def send_response(self, code, message=None):
				401	"""Send the response header and log the response code.
				402
				403	Also send two standard headers with the server software
				404	version and the current date.
				405
				406	"""
				407	self.log_request(code)
				408	if message is None:
				409	if code in self.responses:
				410	message = self.responses[code][0]
				411	else:
				412	message = ''
				413	if self.request_version != 'HTTP/0.9':
				414	self.wfile.write(("%s %d %s\r\n" %
				415	(self.protocol_version, code, message)).encode('ASCII', 'strict'))
				416	# print (self.protocol_version, code, message)
				417	self.send_header('Server', self.version_string())
				418	self.send_header('Date', self.date_time_string())
				419
				420	def send_header(self, keyword, value):
				421	"""Send a MIME header."""
				422	if self.request_version != 'HTTP/0.9':
				423	self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict'))
				424
				425	if keyword.lower() == 'connection':
				426	if value.lower() == 'close':
				427	self.close_connection = 1
				428	elif value.lower() == 'keep-alive':
				429	self.close_connection = 0
				430
				431	def end_headers(self):
				432	"""Send the blank line ending the MIME headers."""
				433	if self.request_version != 'HTTP/0.9':
				434	self.wfile.write(b"\r\n")
				435
				436	def log_request(self, code='-', size='-'):
				437	"""Log an accepted request.
				438
				439	This is called by send_response().
				440
				441	"""
				442
				443	self.log_message('"%s" %s %s',
				444	self.requestline, str(code), str(size))
				445
				446	def log_error(self, format, *args):
				447	"""Log an error.
				448
				449	This is called when a request cannot be fulfilled. By
				450	default it passes the message on to log_message().
				451
				452	Arguments are the same as for log_message().
				453
				454	XXX This should go to the separate error log.
				455
				456	"""
				457
				458	self.log_message(format, *args)
				459
				460	def log_message(self, format, *args):
				461	"""Log an arbitrary message.
				462
				463	This is used by all other logging functions. Override
				464	it if you have specific logging wishes.
				465
				466	The first argument, FORMAT, is a format string for the
				467	message to be logged. If the format string contains
				468	any % escapes requiring parameters, they should be
				469	specified as subsequent arguments (it's just like
				470	printf!).
				471
				472	The client host and current date/time are prefixed to
				473	every message.
				474
				475	"""
				476
				477	sys.stderr.write("%s - - [%s] %s\n" %
				478	(self.address_string(),
				479	self.log_date_time_string(),
				480	format%args))
				481
				482	def version_string(self):
				483	"""Return the server software version string."""
				484	return self.server_version + ' ' + self.sys_version
				485
				486	def date_time_string(self, timestamp=None):
				487	"""Return the current date and time formatted for a message header."""
				488	if timestamp is None:
				489	timestamp = time.time()
				490	year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
				491	s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
				492	self.weekdayname[wd],
				493	day, self.monthname[month], year,
				494	hh, mm, ss)
				495	return s
				496
				497	def log_date_time_string(self):
				498	"""Return the current time formatted for logging."""
				499	now = time.time()
				500	year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
				501	s = "%02d/%3s/%04d %02d:%02d:%02d" % (
				502	day, self.monthname[month], year, hh, mm, ss)
				503	return s
				504
				505	weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
				506
				507	monthname = [None,
				508	'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
				509	'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
				510
				511	def address_string(self):
				512	"""Return the client address formatted for logging.
				513
				514	This version looks up the full hostname using gethostbyaddr(),
				515	and tries to find a name that contains at least one dot.
				516
				517	"""
				518
				519	host, port = self.client_address[:2]
				520	return socket.getfqdn(host)
				521
				522	# Essentially static class variables
				523
				524	# The version of the HTTP protocol we support.
				525	# Set this to HTTP/1.1 to enable automatic keepalive
				526	protocol_version = "HTTP/1.0"
				527
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	528	# MessageClass used to parse headers
				529	import http.client
				530	MessageClass = http.client.HTTPMessage
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	531
				532	# Table mapping response codes to messages; entries have the
				533	# form {code: (shortmessage, longmessage)}.
				534	# See RFC 2616.
				535	responses = {
				536	100: ('Continue', 'Request received, please continue'),
				537	101: ('Switching Protocols',
				538	'Switching to new protocol; obey Upgrade header'),
				539
				540	200: ('OK', 'Request fulfilled, document follows'),
				541	201: ('Created', 'Document created, URL follows'),
				542	202: ('Accepted',
				543	'Request accepted, processing continues off-line'),
				544	203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
				545	204: ('No Content', 'Request fulfilled, nothing follows'),
				546	205: ('Reset Content', 'Clear input form for further input.'),
				547	206: ('Partial Content', 'Partial content follows.'),
				548
				549	300: ('Multiple Choices',
				550	'Object has several resources -- see URI list'),
				551	301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
				552	302: ('Found', 'Object moved temporarily -- see URI list'),
				553	303: ('See Other', 'Object moved -- see Method and URL list'),
				554	304: ('Not Modified',
				555	'Document has not changed since given time'),
				556	305: ('Use Proxy',
				557	'You must use proxy specified in Location to access this '
				558	'resource.'),
				559	307: ('Temporary Redirect',
				560	'Object moved temporarily -- see URI list'),
				561
				562	400: ('Bad Request',
				563	'Bad request syntax or unsupported method'),
				564	401: ('Unauthorized',
				565	'No permission -- see authorization schemes'),
				566	402: ('Payment Required',
				567	'No payment -- see charging schemes'),
				568	403: ('Forbidden',
				569	'Request forbidden -- authorization will not help'),
				570	404: ('Not Found', 'Nothing matches the given URI'),
				571	405: ('Method Not Allowed',
				572	'Specified method is invalid for this server.'),
				573	406: ('Not Acceptable', 'URI not available in preferred format.'),
				574	407: ('Proxy Authentication Required', 'You must authenticate with '
				575	'this proxy before proceeding.'),
				576	408: ('Request Timeout', 'Request timed out; try again later.'),
				577	409: ('Conflict', 'Request conflict.'),
				578	410: ('Gone',
				579	'URI no longer exists and has been permanently removed.'),
				580	411: ('Length Required', 'Client must specify Content-Length.'),
				581	412: ('Precondition Failed', 'Precondition in headers is false.'),
				582	413: ('Request Entity Too Large', 'Entity is too large.'),
				583	414: ('Request-URI Too Long', 'URI is too long.'),
				584	415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
				585	416: ('Requested Range Not Satisfiable',
				586	'Cannot satisfy request range.'),
				587	417: ('Expectation Failed',
				588	'Expect condition could not be satisfied.'),
				589
				590	500: ('Internal Server Error', 'Server got itself in trouble'),
				591	501: ('Not Implemented',
				592	'Server does not support this operation'),
				593	502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
				594	503: ('Service Unavailable',
				595	'The server cannot process the request due to a high load'),
				596	504: ('Gateway Timeout',
				597	'The gateway server did not receive a timely response'),
				598	505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
				599	}
				600
				601
				602	class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
				603
				604	"""Simple HTTP request handler with GET and HEAD commands.
				605
				606	This serves files from the current directory and any of its
				607	subdirectories. The MIME type for files is determined by
				608	calling the .guess_type() method.
				609
				610	The GET and HEAD requests are identical except that the HEAD
				611	request omits the actual contents of the file.
				612
				613	"""
				614
				615	server_version = "SimpleHTTP/" + __version__
				616
				617	def do_GET(self):
				618	"""Serve a GET request."""
				619	f = self.send_head()
				620	if f:
				621	self.copyfile(f, self.wfile)
				622	f.close()
				623
				624	def do_HEAD(self):
				625	"""Serve a HEAD request."""
				626	f = self.send_head()
				627	if f:
				628	f.close()
				629
				630	def send_head(self):
				631	"""Common code for GET and HEAD commands.
				632
				633	This sends the response code and MIME headers.
				634
				635	Return value is either a file object (which has to be copied
				636	to the outputfile by the caller unless the command was HEAD,
				637	and must be closed by the caller under all circumstances), or
				638	None, in which case the caller has nothing further to do.
				639
				640	"""
				641	path = self.translate_path(self.path)
				642	f = None
				643	if os.path.isdir(path):
				644	if not self.path.endswith('/'):
				645	# redirect browser - doing basically what apache does
				646	self.send_response(301)
				647	self.send_header("Location", self.path + "/")
				648	self.end_headers()
				649	return None
				650	for index in "index.html", "index.htm":
				651	index = os.path.join(path, index)
				652	if os.path.exists(index):
				653	path = index
				654	break
				655	else:
				656	return self.list_directory(path)
				657	ctype = self.guess_type(path)
				658	try:
				659	f = open(path, 'rb')
				660	except IOError:
				661	self.send_error(404, "File not found")
				662	return None
				663	self.send_response(200)
				664	self.send_header("Content-type", ctype)
				665	fs = os.fstat(f.fileno())
				666	self.send_header("Content-Length", str(fs[6]))
				667	self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
				668	self.end_headers()
				669	return f
				670
				671	def list_directory(self, path):
				672	"""Helper to produce a directory listing (absent index.html).
				673
				674	Return value is either a file object, or None (indicating an
				675	error). In either case, the headers are sent, making the
				676	interface the same as for send_head().
				677
				678	"""
				679	try:
				680	list = os.listdir(path)
				681	except os.error:
				682	self.send_error(404, "No permission to list directory")
				683	return None
				684	list.sort(key=lambda a: a.lower())
				685	r = []
				686	displaypath = cgi.escape(urllib.unquote(self.path))
				687	r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
				688	r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
				689	r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
				690	r.append("<hr>\n<ul>\n")
				691	for name in list:
				692	fullname = os.path.join(path, name)
				693	displayname = linkname = name
				694	# Append / for directories or @ for symbolic links
				695	if os.path.isdir(fullname):
				696	displayname = name + "/"
				697	linkname = name + "/"
				698	if os.path.islink(fullname):
				699	displayname = name + "@"
				700	# Note: a link to a directory displays with @ and links with /
				701	r.append('<li><a href="%s">%s</a>\n'
				702	% (urllib.quote(linkname), cgi.escape(displayname)))
				703	r.append("</ul>\n<hr>\n</body>\n</html>\n")
				704	enc = sys.getfilesystemencoding()
				705	encoded = ''.join(r).encode(enc)
				706	f = io.BytesIO()
				707	f.write(encoded)
				708	f.seek(0)
				709	self.send_response(200)
				710	self.send_header("Content-type", "text/html; charset=%s" % enc)
				711	self.send_header("Content-Length", str(len(encoded)))
				712	self.end_headers()
				713	return f
				714
				715	def translate_path(self, path):
				716	"""Translate a /-separated PATH to the local filename syntax.
				717
				718	Components that mean special things to the local file system
				719	(e.g. drive or directory names) are ignored. (XXX They should
				720	probably be diagnosed.)
				721
				722	"""
				723	# abandon query parameters
				724	path = path.split('?',1)[0]
				725	path = path.split('#',1)[0]
				726	path = posixpath.normpath(urllib.unquote(path))
				727	words = path.split('/')
				728	words = filter(None, words)
				729	path = os.getcwd()
				730	for word in words:
				731	drive, word = os.path.splitdrive(word)
				732	head, word = os.path.split(word)
				733	if word in (os.curdir, os.pardir): continue
				734	path = os.path.join(path, word)
				735	return path
				736
				737	def copyfile(self, source, outputfile):
				738	"""Copy all data between two file objects.
				739
				740	The SOURCE argument is a file object open for reading
				741	(or anything with a read() method) and the DESTINATION
				742	argument is a file object open for writing (or
				743	anything with a write() method).
				744
				745	The only reason for overriding this would be to change
				746	the block size or perhaps to replace newlines by CRLF
				747	-- note however that this the default server uses this
				748	to copy binary data as well.
				749
				750	"""
				751	shutil.copyfileobj(source, outputfile)
				752
				753	def guess_type(self, path):
				754	"""Guess the type of a file.
				755
				756	Argument is a PATH (a filename).
				757
				758	Return value is a string of the form type/subtype,
				759	usable for a MIME Content-type header.
				760
				761	The default implementation looks the file's extension
				762	up in the table self.extensions_map, using application/octet-stream
				763	as a default; however it would be permissible (if
				764	slow) to look inside the data to make a better guess.
				765
				766	"""
				767
				768	base, ext = posixpath.splitext(path)
				769	if ext in self.extensions_map:
				770	return self.extensions_map[ext]
				771	ext = ext.lower()
				772	if ext in self.extensions_map:
				773	return self.extensions_map[ext]
				774	else:
				775	return self.extensions_map['']
				776
				777	if not mimetypes.inited:
				778	mimetypes.init() # try to read system mime.types
				779	extensions_map = mimetypes.types_map.copy()
				780	extensions_map.update({
				781	'': 'application/octet-stream', # Default
				782	'.py': 'text/plain',
				783	'.c': 'text/plain',
				784	'.h': 'text/plain',
				785	})
				786
				787
				788	# Utilities for CGIHTTPRequestHandler
				789
				790	nobody = None
				791
				792	def nobody_uid():
				793	"""Internal routine to get nobody's uid"""
				794	global nobody
				795	if nobody:
				796	return nobody
				797	try:
				798	import pwd
				799	except ImportError:
				800	return -1
				801	try:
				802	nobody = pwd.getpwnam('nobody')[2]
				803	except KeyError:
				804	nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
				805	return nobody
				806
				807
				808	def executable(path):
				809	"""Test for executable file."""
				810	try:
				811	st = os.stat(path)
				812	except os.error:
				813	return False
				814	return st.st_mode & 0o111 != 0
				815
				816
				817	class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
				818
				819	"""Complete HTTP server with GET, HEAD and POST commands.
				820
				821	GET and HEAD also support running CGI scripts.
				822
				823	The POST command is only implemented for CGI scripts.
				824
				825	"""
				826
				827	# Determine platform specifics
				828	have_fork = hasattr(os, 'fork')
				829	have_popen2 = hasattr(os, 'popen2')
				830	have_popen3 = hasattr(os, 'popen3')
				831
				832	# Make rfile unbuffered -- we need to read one line and then pass
				833	# the rest to a subprocess, so we can't use buffered input.
				834	rbufsize = 0
				835
				836	def do_POST(self):
				837	"""Serve a POST request.
				838
				839	This is only implemented for CGI scripts.
				840
				841	"""
				842
				843	if self.is_cgi():
				844	self.run_cgi()
				845	else:
				846	self.send_error(501, "Can only POST to CGI scripts")
				847
				848	def send_head(self):
				849	"""Version of send_head that support CGI scripts"""
				850	if self.is_cgi():
				851	return self.run_cgi()
				852	else:
				853	return SimpleHTTPRequestHandler.send_head(self)
				854
				855	def is_cgi(self):
				856	"""Test whether self.path corresponds to a CGI script.
				857
				858	Return a tuple (dir, rest) if self.path requires running a
				859	CGI script, None if not. Note that rest begins with a
				860	slash if it is not empty.
				861
				862	The default implementation tests whether the path
				863	begins with one of the strings in the list
				864	self.cgi_directories (and the next character is a '/'
				865	or the end of the string).
				866
				867	"""
				868
				869	path = self.path
				870
				871	for x in self.cgi_directories:
				872	i = len(x)
				873	if path[:i] == x and (not path[i:] or path[i] == '/'):
				874	self.cgi_info = path[:i], path[i+1:]
				875	return True
				876	return False
				877
				878	cgi_directories = ['/cgi-bin', '/htbin']
				879
				880	def is_executable(self, path):
				881	"""Test whether argument path is an executable file."""
				882	return executable(path)
				883
				884	def is_python(self, path):
				885	"""Test whether argument path is a Python script."""
				886	head, tail = os.path.splitext(path)
				887	return tail.lower() in (".py", ".pyw")
				888
				889	def run_cgi(self):
				890	"""Execute a CGI script."""
				891	path = self.path
				892	dir, rest = self.cgi_info
				893
				894	i = path.find('/', len(dir) + 1)
				895	while i >= 0:
				896	nextdir = path[:i]
				897	nextrest = path[i+1:]
				898
				899	scriptdir = self.translate_path(nextdir)
				900	if os.path.isdir(scriptdir):
				901	dir, rest = nextdir, nextrest
				902	i = path.find('/', len(dir) + 1)
				903	else:
				904	break
				905
				906	# find an explicit query string, if present.
				907	i = rest.rfind('?')
				908	if i >= 0:
				909	rest, query = rest[:i], rest[i+1:]
				910	else:
				911	query = ''
				912
				913	# dissect the part after the directory name into a script name &
				914	# a possible additional path, to be stored in PATH_INFO.
				915	i = rest.find('/')
				916	if i >= 0:
				917	script, rest = rest[:i], rest[i:]
				918	else:
				919	script, rest = rest, ''
				920
				921	scriptname = dir + '/' + script
				922	scriptfile = self.translate_path(scriptname)
				923	if not os.path.exists(scriptfile):
				924	self.send_error(404, "No such CGI script (%r)" % scriptname)
				925	return
				926	if not os.path.isfile(scriptfile):
				927	self.send_error(403, "CGI script is not a plain file (%r)" %
				928	scriptname)
				929	return
				930	ispy = self.is_python(scriptname)
				931	if not ispy:
				932	if not (self.have_fork or self.have_popen2 or self.have_popen3):
				933	self.send_error(403, "CGI script is not a Python script (%r)" %
				934	scriptname)
				935	return
				936	if not self.is_executable(scriptfile):
				937	self.send_error(403, "CGI script is not executable (%r)" %
				938	scriptname)
				939	return
				940
				941	# Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
				942	# XXX Much of the following could be prepared ahead of time!
				943	env = {}
				944	env['SERVER_SOFTWARE'] = self.version_string()
				945	env['SERVER_NAME'] = self.server.server_name
				946	env['GATEWAY_INTERFACE'] = 'CGI/1.1'
				947	env['SERVER_PROTOCOL'] = self.protocol_version
				948	env['SERVER_PORT'] = str(self.server.server_port)
				949	env['REQUEST_METHOD'] = self.command
				950	uqrest = urllib.unquote(rest)
				951	env['PATH_INFO'] = uqrest
				952	env['PATH_TRANSLATED'] = self.translate_path(uqrest)
				953	env['SCRIPT_NAME'] = scriptname
				954	if query:
				955	env['QUERY_STRING'] = query
				956	host = self.address_string()
				957	if host != self.client_address[0]:
				958	env['REMOTE_HOST'] = host
				959	env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	960	authorization = self.headers.get("authorization")
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	961	if authorization:
				962	authorization = authorization.split()
				963	if len(authorization) == 2:
				964	import base64, binascii
				965	env['AUTH_TYPE'] = authorization[0]
				966	if authorization[0].lower() == "basic":
				967	try:
				968	authorization = authorization[1].encode('ascii')
				969	authorization = base64.decodestring(authorization).\
				970	decode('ascii')
				971	except (binascii.Error, UnicodeError):
				972	pass
				973	else:
				974	authorization = authorization.split(':')
				975	if len(authorization) == 2:
				976	env['REMOTE_USER'] = authorization[0]
				977	# XXX REMOTE_IDENT
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	978	if self.headers.get('content-type') is None:
				979	env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	980	else:
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	981	env['CONTENT_TYPE'] = self.headers['content-type']
				982	length = self.headers.get('content-length')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	983	if length:
				984	env['CONTENT_LENGTH'] = length
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	985	referer = self.headers.get('referer')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	986	if referer:
				987	env['HTTP_REFERER'] = referer
				988	accept = []
				989	for line in self.headers.getallmatchingheaders('accept'):
				990	if line[:1] in "\t\n\r ":
				991	accept.append(line.strip())
				992	else:
				993	accept = accept + line[7:].split(',')
				994	env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	995	ua = self.headers.get('user-agent')
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	996	if ua:
				997	env['HTTP_USER_AGENT'] = ua
Barry Warsaw	820c120	2008-06-12 04:06:45 +0000	[diff] [blame]	998	co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl	2442015	2008-05-26 16:32:26 +0000	[diff] [blame]	999	if co:
				1000	env['HTTP_COOKIE'] = ', '.join(co)
				1001	# XXX Other HTTP_* headers
				1002	# Since we're setting the env in the parent, provide empty
				1003	# values to override previously set values
				1004	for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
				1005	'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
				1006	env.setdefault(k, "")
				1007	os.environ.update(env)
				1008
				1009	self.send_response(200, "Script output follows")
				1010
				1011	decoded_query = query.replace('+', ' ')
				1012
				1013	if self.have_fork:
				1014	# Unix -- fork as we should
				1015	args = [script]
				1016	if '=' not in decoded_query:
				1017	args.append(decoded_query)
				1018	nobody = nobody_uid()
				1019	self.wfile.flush() # Always flush before forking
				1020	pid = os.fork()
				1021	if pid != 0:
				1022	# Parent
				1023	pid, sts = os.waitpid(pid, 0)
				1024	# throw away additional data [see bug #427345]
				1025	while select.select([self.rfile], [], [], 0)[0]:
				1026	if not self.rfile.read(1):
				1027	break
				1028	if sts:
				1029	self.log_error("CGI script exit status %#x", sts)
				1030	return
				1031	# Child
				1032	try:
				1033	try:
				1034	os.setuid(nobody)
				1035	except os.error:
				1036	pass
				1037	os.dup2(self.rfile.fileno(), 0)
				1038	os.dup2(self.wfile.fileno(), 1)
				1039	os.execve(scriptfile, args, os.environ)
				1040	except:
				1041	self.server.handle_error(self.request, self.client_address)
				1042	os._exit(127)
				1043
				1044	elif self.have_popen2 or self.have_popen3:
				1045	# Windows -- use popen2 or popen3 to create a subprocess
				1046	import shutil
				1047	if self.have_popen3:
				1048	popenx = os.popen3
				1049	else:
				1050	popenx = os.popen2
				1051	cmdline = scriptfile
				1052	if self.is_python(scriptfile):
				1053	interp = sys.executable
				1054	if interp.lower().endswith("w.exe"):
				1055	# On Windows, use python.exe, not pythonw.exe
				1056	interp = interp[:-5] + interp[-4:]
				1057	cmdline = "%s -u %s" % (interp, cmdline)
				1058	if '=' not in query and '"' not in query:
				1059	cmdline = '%s "%s"' % (cmdline, query)
				1060	self.log_message("command: %s", cmdline)
				1061	try:
				1062	nbytes = int(length)
				1063	except (TypeError, ValueError):
				1064	nbytes = 0
				1065	files = popenx(cmdline, 'b')
				1066	fi = files[0]
				1067	fo = files[1]
				1068	if self.have_popen3:
				1069	fe = files[2]
				1070	if self.command.lower() == "post" and nbytes > 0:
				1071	data = self.rfile.read(nbytes)
				1072	fi.write(data)
				1073	# throw away additional data [see bug #427345]
				1074	while select.select([self.rfile._sock], [], [], 0)[0]:
				1075	if not self.rfile._sock.recv(1):
				1076	break
				1077	fi.close()
				1078	shutil.copyfileobj(fo, self.wfile)
				1079	if self.have_popen3:
				1080	errors = fe.read()
				1081	fe.close()
				1082	if errors:
				1083	self.log_error('%s', errors)
				1084	sts = fo.close()
				1085	if sts:
				1086	self.log_error("CGI script exit status %#x", sts)
				1087	else:
				1088	self.log_message("CGI script exited OK")
				1089
				1090	else:
				1091	# Other O.S. -- execute script in this process
				1092	save_argv = sys.argv
				1093	save_stdin = sys.stdin
				1094	save_stdout = sys.stdout
				1095	save_stderr = sys.stderr
				1096	try:
				1097	save_cwd = os.getcwd()
				1098	try:
				1099	sys.argv = [scriptfile]
				1100	if '=' not in decoded_query:
				1101	sys.argv.append(decoded_query)
				1102	sys.stdout = self.wfile
				1103	sys.stdin = self.rfile
				1104	exec(open(scriptfile).read(), {"__name__": "__main__"})
				1105	finally:
				1106	sys.argv = save_argv
				1107	sys.stdin = save_stdin
				1108	sys.stdout = save_stdout
				1109	sys.stderr = save_stderr
				1110	os.chdir(save_cwd)
				1111	except SystemExit as sts:
				1112	self.log_error("CGI script exit status %s", str(sts))
				1113	else:
				1114	self.log_message("CGI script exited OK")
				1115
				1116
				1117	def test(HandlerClass = BaseHTTPRequestHandler,
				1118	ServerClass = HTTPServer, protocol="HTTP/1.0"):
				1119	"""Test the HTTP request handler class.
				1120
				1121	This runs an HTTP server on port 8000 (or the first command line
				1122	argument).
				1123
				1124	"""
				1125
				1126	if sys.argv[1:]:
				1127	port = int(sys.argv[1])
				1128	else:
				1129	port = 8000
				1130	server_address = ('', port)
				1131
				1132	HandlerClass.protocol_version = protocol
				1133	httpd = ServerClass(server_address, HandlerClass)
				1134
				1135	sa = httpd.socket.getsockname()
				1136	print("Serving HTTP on", sa[0], "port", sa[1], "...")
				1137	httpd.serve_forever()
				1138
				1139
				1140	if __name__ == '__main__':
				1141	test(HandlerClass=BaseHTTPRequestHandler)
				1142	test(HandlerClass=SimpleHTTPRequestHandler)
				1143	test(HandlerClass=CGIHTTPRequestHandler)