blob: 543abe0c9f949a46eb1bb5e39ff467a75455dee1 [file] [log] [blame]
Georg Brandl24420152008-05-26 16:32:26 +00001"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +000016subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl24420152008-05-26 16:32:26 +000017
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group T. Berners-Lee
38# INTERNET-DRAFT R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40# Expires September 8, 1995 March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group R. Fielding
47# Request for Comments: 2616 et al
48# Obsoletes: 2068 June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# | host: Either the DNS name or the IP number of the remote client
63# | rfc931: Any information returned by identd for this person,
64# | - otherwise.
65# | authuser: If user sent a userid for authentication, the user name,
66# | - otherwise.
67# | DD: Day
68# | Mon: Month (calendar name)
69# | YYYY: Year
70# | hh: hour (24-hour format, the machine's timezone)
71# | mm: minutes
72# | ss: seconds
73# | request: The first line of the HTTP request as sent by the client.
74# | ddd: the status code returned by the server, - if not available.
75# | bbbb: the total number of bytes sent,
76# | *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
86
Georg Brandl1f7fffb2010-10-15 15:57:45 +000087import html
Barry Warsaw820c1202008-06-12 04:06:45 +000088import email.message
89import email.parser
Jeremy Hylton914ab452009-03-27 17:16:06 +000090import http.client
91import io
92import mimetypes
93import os
94import posixpath
95import select
96import shutil
97import socket # For gethostbyaddr()
98import socketserver
99import sys
100import time
101import urllib.parse
Senthil Kumaran42713722010-10-03 17:55:45 +0000102import copy
Georg Brandl24420152008-05-26 16:32:26 +0000103
104# Default error message template
105DEFAULT_ERROR_MESSAGE = """\
106<head>
107<title>Error response</title>
108</head>
109<body>
110<h1>Error response</h1>
111<p>Error code %(code)d.
112<p>Message: %(message)s.
113<p>Error code explanation: %(code)s = %(explain)s.
114</body>
115"""
116
117DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
118
119def _quote_html(html):
120 return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
121
122class HTTPServer(socketserver.TCPServer):
123
124 allow_reuse_address = 1 # Seems to make sense in testing environment
125
126 def server_bind(self):
127 """Override server_bind to store the server name."""
128 socketserver.TCPServer.server_bind(self)
129 host, port = self.socket.getsockname()[:2]
130 self.server_name = socket.getfqdn(host)
131 self.server_port = port
132
133
134class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
135
136 """HTTP request handler base class.
137
138 The following explanation of HTTP serves to guide you through the
139 code as well as to expose any misunderstandings I may have about
140 HTTP (so you don't need to read the code to figure out I'm wrong
141 :-).
142
143 HTTP (HyperText Transfer Protocol) is an extensible protocol on
144 top of a reliable stream transport (e.g. TCP/IP). The protocol
145 recognizes three parts to a request:
146
147 1. One line identifying the request type and path
148 2. An optional set of RFC-822-style headers
149 3. An optional data part
150
151 The headers and data are separated by a blank line.
152
153 The first line of the request has the form
154
155 <command> <path> <version>
156
157 where <command> is a (case-sensitive) keyword such as GET or POST,
158 <path> is a string containing path information for the request,
159 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
160 <path> is encoded using the URL encoding scheme (using %xx to signify
161 the ASCII character with hex code xx).
162
163 The specification specifies that lines are separated by CRLF but
164 for compatibility with the widest range of clients recommends
165 servers also handle LF. Similarly, whitespace in the request line
166 is treated sensibly (allowing multiple spaces between components
167 and allowing trailing whitespace).
168
169 Similarly, for output, lines ought to be separated by CRLF pairs
170 but most clients grok LF characters just fine.
171
172 If the first line of the request has the form
173
174 <command> <path>
175
176 (i.e. <version> is left out) then this is assumed to be an HTTP
177 0.9 request; this form has no optional headers and data part and
178 the reply consists of just the data.
179
180 The reply form of the HTTP 1.x protocol again has three parts:
181
182 1. One line giving the response code
183 2. An optional set of RFC-822-style headers
184 3. The data
185
186 Again, the headers and data are separated by a blank line.
187
188 The response code line has the form
189
190 <version> <responsecode> <responsestring>
191
192 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
193 <responsecode> is a 3-digit response code indicating success or
194 failure of the request, and <responsestring> is an optional
195 human-readable string explaining what the response code means.
196
197 This server parses the request and the headers, and then calls a
198 function specific to the request type (<command>). Specifically,
199 a request SPAM will be handled by a method do_SPAM(). If no
200 such method exists the server sends an error response to the
201 client. If it exists, it is called with no arguments:
202
203 do_SPAM()
204
205 Note that the request name is case sensitive (i.e. SPAM and spam
206 are different requests).
207
208 The various request details are stored in instance variables:
209
210 - client_address is the client IP address in the form (host,
211 port);
212
213 - command, path and version are the broken-down request line;
214
Barry Warsaw820c1202008-06-12 04:06:45 +0000215 - headers is an instance of email.message.Message (or a derived
Georg Brandl24420152008-05-26 16:32:26 +0000216 class) containing the header information;
217
218 - rfile is a file object open for reading positioned at the
219 start of the optional input data part;
220
221 - wfile is a file object open for writing.
222
223 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
224
225 The first thing to be written must be the response line. Then
226 follow 0 or more header lines, then a blank line, and then the
227 actual data (if any). The meaning of the header lines depends on
228 the command executed by the server; in most cases, when data is
229 returned, there should be at least one header line of the form
230
231 Content-type: <type>/<subtype>
232
233 where <type> and <subtype> should be registered MIME types,
234 e.g. "text/html" or "text/plain".
235
236 """
237
238 # The Python system version, truncated to its first component.
239 sys_version = "Python/" + sys.version.split()[0]
240
241 # The server software version. You may want to override this.
242 # The format is multiple whitespace-separated strings,
243 # where each string is of the form name[/version].
244 server_version = "BaseHTTP/" + __version__
245
246 error_message_format = DEFAULT_ERROR_MESSAGE
247 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
248
249 # The default request version. This only affects responses up until
250 # the point where the request line is parsed, so it mainly decides what
251 # the client gets back when sending a malformed request line.
252 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
253 default_request_version = "HTTP/0.9"
254
255 def parse_request(self):
256 """Parse a request (internal).
257
258 The request should be stored in self.raw_requestline; the results
259 are in self.command, self.path, self.request_version and
260 self.headers.
261
262 Return True for success, False for failure; on failure, an
263 error is sent back.
264
265 """
266 self.command = None # set in case of error on the first line
267 self.request_version = version = self.default_request_version
268 self.close_connection = 1
269 requestline = str(self.raw_requestline, 'iso-8859-1')
270 if requestline[-2:] == '\r\n':
271 requestline = requestline[:-2]
272 elif requestline[-1:] == '\n':
273 requestline = requestline[:-1]
274 self.requestline = requestline
275 words = requestline.split()
276 if len(words) == 3:
277 [command, path, version] = words
278 if version[:5] != 'HTTP/':
279 self.send_error(400, "Bad request version (%r)" % version)
280 return False
281 try:
282 base_version_number = version.split('/', 1)[1]
283 version_number = base_version_number.split(".")
284 # RFC 2145 section 3.1 says there can be only one "." and
285 # - major and minor numbers MUST be treated as
286 # separate integers;
287 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
288 # turn is lower than HTTP/12.3;
289 # - Leading zeros MUST be ignored by recipients.
290 if len(version_number) != 2:
291 raise ValueError
292 version_number = int(version_number[0]), int(version_number[1])
293 except (ValueError, IndexError):
294 self.send_error(400, "Bad request version (%r)" % version)
295 return False
296 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
297 self.close_connection = 0
298 if version_number >= (2, 0):
299 self.send_error(505,
300 "Invalid HTTP Version (%s)" % base_version_number)
301 return False
302 elif len(words) == 2:
303 [command, path] = words
304 self.close_connection = 1
305 if command != 'GET':
306 self.send_error(400,
307 "Bad HTTP/0.9 request type (%r)" % command)
308 return False
309 elif not words:
310 return False
311 else:
312 self.send_error(400, "Bad request syntax (%r)" % requestline)
313 return False
314 self.command, self.path, self.request_version = command, path, version
315
316 # Examine the headers and look for a Connection directive.
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000317 try:
318 self.headers = http.client.parse_headers(self.rfile,
319 _class=self.MessageClass)
320 except http.client.LineTooLong:
321 self.send_error(400, "Line too long")
322 return False
Georg Brandl24420152008-05-26 16:32:26 +0000323
324 conntype = self.headers.get('Connection', "")
325 if conntype.lower() == 'close':
326 self.close_connection = 1
327 elif (conntype.lower() == 'keep-alive' and
328 self.protocol_version >= "HTTP/1.1"):
329 self.close_connection = 0
Senthil Kumaran0f476d42010-09-30 06:09:18 +0000330 # Examine the headers and look for an Expect directive
331 expect = self.headers.get('Expect', "")
332 if (expect.lower() == "100-continue" and
333 self.protocol_version >= "HTTP/1.1" and
334 self.request_version >= "HTTP/1.1"):
335 if not self.handle_expect_100():
336 return False
337 return True
338
339 def handle_expect_100(self):
340 """Decide what to do with an "Expect: 100-continue" header.
341
342 If the client is expecting a 100 Continue response, we must
343 respond with either a 100 Continue or a final response before
344 waiting for the request body. The default is to always respond
345 with a 100 Continue. You can behave differently (for example,
346 reject unauthorized requests) by overriding this method.
347
348 This method should either return True (possibly after sending
349 a 100 Continue response) or send an error response and return
350 False.
351
352 """
353 self.send_response_only(100)
Georg Brandl24420152008-05-26 16:32:26 +0000354 return True
355
356 def handle_one_request(self):
357 """Handle a single HTTP request.
358
359 You normally don't need to override this method; see the class
360 __doc__ string for information on how to handle specific HTTP
361 commands such as GET and POST.
362
363 """
Kristján Valur Jónsson985fc6a2009-07-01 10:01:31 +0000364 try:
Antoine Pitrouc4924372010-12-16 16:48:36 +0000365 self.raw_requestline = self.rfile.readline(65537)
366 if len(self.raw_requestline) > 65536:
367 self.requestline = ''
368 self.request_version = ''
369 self.command = ''
370 self.send_error(414)
371 return
Kristján Valur Jónsson985fc6a2009-07-01 10:01:31 +0000372 if not self.raw_requestline:
373 self.close_connection = 1
374 return
375 if not self.parse_request():
376 # An error code has been sent, just exit
377 return
378 mname = 'do_' + self.command
379 if not hasattr(self, mname):
380 self.send_error(501, "Unsupported method (%r)" % self.command)
381 return
382 method = getattr(self, mname)
383 method()
384 self.wfile.flush() #actually send the response if not already done.
385 except socket.timeout as e:
386 #a read or a write timed out. Discard this connection
387 self.log_error("Request timed out: %r", e)
Georg Brandl24420152008-05-26 16:32:26 +0000388 self.close_connection = 1
389 return
Georg Brandl24420152008-05-26 16:32:26 +0000390
391 def handle(self):
392 """Handle multiple requests if necessary."""
393 self.close_connection = 1
394
395 self.handle_one_request()
396 while not self.close_connection:
397 self.handle_one_request()
398
399 def send_error(self, code, message=None):
400 """Send and log an error reply.
401
402 Arguments are the error code, and a detailed message.
403 The detailed message defaults to the short entry matching the
404 response code.
405
406 This sends an error response (so it must be called before any
407 output has been generated), logs the error, and finally sends
408 a piece of HTML explaining the error to the user.
409
410 """
411
412 try:
413 shortmsg, longmsg = self.responses[code]
414 except KeyError:
415 shortmsg, longmsg = '???', '???'
416 if message is None:
417 message = shortmsg
418 explain = longmsg
419 self.log_error("code %d, message %s", code, message)
420 # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
421 content = (self.error_message_format %
422 {'code': code, 'message': _quote_html(message), 'explain': explain})
423 self.send_response(code, message)
424 self.send_header("Content-Type", self.error_content_type)
425 self.send_header('Connection', 'close')
426 self.end_headers()
427 if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
428 self.wfile.write(content.encode('UTF-8', 'replace'))
429
430 def send_response(self, code, message=None):
431 """Send the response header and log the response code.
432
433 Also send two standard headers with the server software
434 version and the current date.
435
436 """
437 self.log_request(code)
Senthil Kumaran0f476d42010-09-30 06:09:18 +0000438 self.send_response_only(code, message)
439 self.send_header('Server', self.version_string())
440 self.send_header('Date', self.date_time_string())
441
442 def send_response_only(self, code, message=None):
443 """Send the response header only."""
Georg Brandl24420152008-05-26 16:32:26 +0000444 if message is None:
445 if code in self.responses:
446 message = self.responses[code][0]
447 else:
448 message = ''
449 if self.request_version != 'HTTP/0.9':
450 self.wfile.write(("%s %d %s\r\n" %
Armin Ronacher8d96d772011-01-22 13:13:05 +0000451 (self.protocol_version, code, message)).encode('latin1', 'strict'))
Georg Brandl24420152008-05-26 16:32:26 +0000452
453 def send_header(self, keyword, value):
454 """Send a MIME header."""
455 if self.request_version != 'HTTP/0.9':
Senthil Kumarane4dad4f2010-11-21 14:36:14 +0000456 if not hasattr(self, '_headers_buffer'):
457 self._headers_buffer = []
458 self._headers_buffer.append(
Armin Ronacher8d96d772011-01-22 13:13:05 +0000459 ("%s: %s\r\n" % (keyword, value)).encode('latin1', 'strict'))
Georg Brandl24420152008-05-26 16:32:26 +0000460
461 if keyword.lower() == 'connection':
462 if value.lower() == 'close':
463 self.close_connection = 1
464 elif value.lower() == 'keep-alive':
465 self.close_connection = 0
466
467 def end_headers(self):
468 """Send the blank line ending the MIME headers."""
469 if self.request_version != 'HTTP/0.9':
Senthil Kumarane4dad4f2010-11-21 14:36:14 +0000470 self._headers_buffer.append(b"\r\n")
471 self.wfile.write(b"".join(self._headers_buffer))
472 self._headers_buffer = []
Georg Brandl24420152008-05-26 16:32:26 +0000473
474 def log_request(self, code='-', size='-'):
475 """Log an accepted request.
476
477 This is called by send_response().
478
479 """
480
481 self.log_message('"%s" %s %s',
482 self.requestline, str(code), str(size))
483
484 def log_error(self, format, *args):
485 """Log an error.
486
487 This is called when a request cannot be fulfilled. By
488 default it passes the message on to log_message().
489
490 Arguments are the same as for log_message().
491
492 XXX This should go to the separate error log.
493
494 """
495
496 self.log_message(format, *args)
497
498 def log_message(self, format, *args):
499 """Log an arbitrary message.
500
501 This is used by all other logging functions. Override
502 it if you have specific logging wishes.
503
504 The first argument, FORMAT, is a format string for the
505 message to be logged. If the format string contains
506 any % escapes requiring parameters, they should be
507 specified as subsequent arguments (it's just like
508 printf!).
509
510 The client host and current date/time are prefixed to
511 every message.
512
513 """
514
515 sys.stderr.write("%s - - [%s] %s\n" %
516 (self.address_string(),
517 self.log_date_time_string(),
518 format%args))
519
520 def version_string(self):
521 """Return the server software version string."""
522 return self.server_version + ' ' + self.sys_version
523
524 def date_time_string(self, timestamp=None):
525 """Return the current date and time formatted for a message header."""
526 if timestamp is None:
527 timestamp = time.time()
528 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
529 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
530 self.weekdayname[wd],
531 day, self.monthname[month], year,
532 hh, mm, ss)
533 return s
534
535 def log_date_time_string(self):
536 """Return the current time formatted for logging."""
537 now = time.time()
538 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
539 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
540 day, self.monthname[month], year, hh, mm, ss)
541 return s
542
543 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
544
545 monthname = [None,
546 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
547 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
548
549 def address_string(self):
550 """Return the client address formatted for logging.
551
552 This version looks up the full hostname using gethostbyaddr(),
553 and tries to find a name that contains at least one dot.
554
555 """
556
557 host, port = self.client_address[:2]
558 return socket.getfqdn(host)
559
560 # Essentially static class variables
561
562 # The version of the HTTP protocol we support.
563 # Set this to HTTP/1.1 to enable automatic keepalive
564 protocol_version = "HTTP/1.0"
565
Barry Warsaw820c1202008-06-12 04:06:45 +0000566 # MessageClass used to parse headers
Barry Warsaw820c1202008-06-12 04:06:45 +0000567 MessageClass = http.client.HTTPMessage
Georg Brandl24420152008-05-26 16:32:26 +0000568
569 # Table mapping response codes to messages; entries have the
570 # form {code: (shortmessage, longmessage)}.
571 # See RFC 2616.
572 responses = {
573 100: ('Continue', 'Request received, please continue'),
574 101: ('Switching Protocols',
575 'Switching to new protocol; obey Upgrade header'),
576
577 200: ('OK', 'Request fulfilled, document follows'),
578 201: ('Created', 'Document created, URL follows'),
579 202: ('Accepted',
580 'Request accepted, processing continues off-line'),
581 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
582 204: ('No Content', 'Request fulfilled, nothing follows'),
583 205: ('Reset Content', 'Clear input form for further input.'),
584 206: ('Partial Content', 'Partial content follows.'),
585
586 300: ('Multiple Choices',
587 'Object has several resources -- see URI list'),
588 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
589 302: ('Found', 'Object moved temporarily -- see URI list'),
590 303: ('See Other', 'Object moved -- see Method and URL list'),
591 304: ('Not Modified',
592 'Document has not changed since given time'),
593 305: ('Use Proxy',
594 'You must use proxy specified in Location to access this '
595 'resource.'),
596 307: ('Temporary Redirect',
597 'Object moved temporarily -- see URI list'),
598
599 400: ('Bad Request',
600 'Bad request syntax or unsupported method'),
601 401: ('Unauthorized',
602 'No permission -- see authorization schemes'),
603 402: ('Payment Required',
604 'No payment -- see charging schemes'),
605 403: ('Forbidden',
606 'Request forbidden -- authorization will not help'),
607 404: ('Not Found', 'Nothing matches the given URI'),
608 405: ('Method Not Allowed',
Senthil Kumaran7aa26212010-02-22 11:00:50 +0000609 'Specified method is invalid for this resource.'),
Georg Brandl24420152008-05-26 16:32:26 +0000610 406: ('Not Acceptable', 'URI not available in preferred format.'),
611 407: ('Proxy Authentication Required', 'You must authenticate with '
612 'this proxy before proceeding.'),
613 408: ('Request Timeout', 'Request timed out; try again later.'),
614 409: ('Conflict', 'Request conflict.'),
615 410: ('Gone',
616 'URI no longer exists and has been permanently removed.'),
617 411: ('Length Required', 'Client must specify Content-Length.'),
618 412: ('Precondition Failed', 'Precondition in headers is false.'),
619 413: ('Request Entity Too Large', 'Entity is too large.'),
620 414: ('Request-URI Too Long', 'URI is too long.'),
621 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
622 416: ('Requested Range Not Satisfiable',
623 'Cannot satisfy request range.'),
624 417: ('Expectation Failed',
625 'Expect condition could not be satisfied.'),
626
627 500: ('Internal Server Error', 'Server got itself in trouble'),
628 501: ('Not Implemented',
629 'Server does not support this operation'),
630 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
631 503: ('Service Unavailable',
632 'The server cannot process the request due to a high load'),
633 504: ('Gateway Timeout',
634 'The gateway server did not receive a timely response'),
635 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
636 }
637
638
639class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
640
641 """Simple HTTP request handler with GET and HEAD commands.
642
643 This serves files from the current directory and any of its
644 subdirectories. The MIME type for files is determined by
645 calling the .guess_type() method.
646
647 The GET and HEAD requests are identical except that the HEAD
648 request omits the actual contents of the file.
649
650 """
651
652 server_version = "SimpleHTTP/" + __version__
653
654 def do_GET(self):
655 """Serve a GET request."""
656 f = self.send_head()
657 if f:
658 self.copyfile(f, self.wfile)
659 f.close()
660
661 def do_HEAD(self):
662 """Serve a HEAD request."""
663 f = self.send_head()
664 if f:
665 f.close()
666
667 def send_head(self):
668 """Common code for GET and HEAD commands.
669
670 This sends the response code and MIME headers.
671
672 Return value is either a file object (which has to be copied
673 to the outputfile by the caller unless the command was HEAD,
674 and must be closed by the caller under all circumstances), or
675 None, in which case the caller has nothing further to do.
676
677 """
678 path = self.translate_path(self.path)
679 f = None
680 if os.path.isdir(path):
681 if not self.path.endswith('/'):
682 # redirect browser - doing basically what apache does
683 self.send_response(301)
684 self.send_header("Location", self.path + "/")
685 self.end_headers()
686 return None
687 for index in "index.html", "index.htm":
688 index = os.path.join(path, index)
689 if os.path.exists(index):
690 path = index
691 break
692 else:
693 return self.list_directory(path)
694 ctype = self.guess_type(path)
695 try:
696 f = open(path, 'rb')
697 except IOError:
698 self.send_error(404, "File not found")
699 return None
700 self.send_response(200)
701 self.send_header("Content-type", ctype)
702 fs = os.fstat(f.fileno())
703 self.send_header("Content-Length", str(fs[6]))
704 self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
705 self.end_headers()
706 return f
707
708 def list_directory(self, path):
709 """Helper to produce a directory listing (absent index.html).
710
711 Return value is either a file object, or None (indicating an
712 error). In either case, the headers are sent, making the
713 interface the same as for send_head().
714
715 """
716 try:
717 list = os.listdir(path)
718 except os.error:
719 self.send_error(404, "No permission to list directory")
720 return None
721 list.sort(key=lambda a: a.lower())
722 r = []
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000723 displaypath = html.escape(urllib.parse.unquote(self.path))
Georg Brandl24420152008-05-26 16:32:26 +0000724 r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
725 r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
726 r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
727 r.append("<hr>\n<ul>\n")
728 for name in list:
729 fullname = os.path.join(path, name)
730 displayname = linkname = name
731 # Append / for directories or @ for symbolic links
732 if os.path.isdir(fullname):
733 displayname = name + "/"
734 linkname = name + "/"
735 if os.path.islink(fullname):
736 displayname = name + "@"
737 # Note: a link to a directory displays with @ and links with /
738 r.append('<li><a href="%s">%s</a>\n'
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000739 % (urllib.parse.quote(linkname), html.escape(displayname)))
Georg Brandl24420152008-05-26 16:32:26 +0000740 r.append("</ul>\n<hr>\n</body>\n</html>\n")
741 enc = sys.getfilesystemencoding()
742 encoded = ''.join(r).encode(enc)
743 f = io.BytesIO()
744 f.write(encoded)
745 f.seek(0)
746 self.send_response(200)
747 self.send_header("Content-type", "text/html; charset=%s" % enc)
748 self.send_header("Content-Length", str(len(encoded)))
749 self.end_headers()
750 return f
751
752 def translate_path(self, path):
753 """Translate a /-separated PATH to the local filename syntax.
754
755 Components that mean special things to the local file system
756 (e.g. drive or directory names) are ignored. (XXX They should
757 probably be diagnosed.)
758
759 """
760 # abandon query parameters
761 path = path.split('?',1)[0]
762 path = path.split('#',1)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000763 path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl24420152008-05-26 16:32:26 +0000764 words = path.split('/')
765 words = filter(None, words)
766 path = os.getcwd()
767 for word in words:
768 drive, word = os.path.splitdrive(word)
769 head, word = os.path.split(word)
770 if word in (os.curdir, os.pardir): continue
771 path = os.path.join(path, word)
772 return path
773
774 def copyfile(self, source, outputfile):
775 """Copy all data between two file objects.
776
777 The SOURCE argument is a file object open for reading
778 (or anything with a read() method) and the DESTINATION
779 argument is a file object open for writing (or
780 anything with a write() method).
781
782 The only reason for overriding this would be to change
783 the block size or perhaps to replace newlines by CRLF
784 -- note however that this the default server uses this
785 to copy binary data as well.
786
787 """
788 shutil.copyfileobj(source, outputfile)
789
790 def guess_type(self, path):
791 """Guess the type of a file.
792
793 Argument is a PATH (a filename).
794
795 Return value is a string of the form type/subtype,
796 usable for a MIME Content-type header.
797
798 The default implementation looks the file's extension
799 up in the table self.extensions_map, using application/octet-stream
800 as a default; however it would be permissible (if
801 slow) to look inside the data to make a better guess.
802
803 """
804
805 base, ext = posixpath.splitext(path)
806 if ext in self.extensions_map:
807 return self.extensions_map[ext]
808 ext = ext.lower()
809 if ext in self.extensions_map:
810 return self.extensions_map[ext]
811 else:
812 return self.extensions_map['']
813
814 if not mimetypes.inited:
815 mimetypes.init() # try to read system mime.types
816 extensions_map = mimetypes.types_map.copy()
817 extensions_map.update({
818 '': 'application/octet-stream', # Default
819 '.py': 'text/plain',
820 '.c': 'text/plain',
821 '.h': 'text/plain',
822 })
823
824
825# Utilities for CGIHTTPRequestHandler
826
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000827# TODO(gregory.p.smith): Move this into an appropriate library.
828def _url_collapse_path_split(path):
829 """
830 Given a URL path, remove extra '/'s and '.' path elements and collapse
831 any '..' references.
832
833 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
834
835 Returns: A tuple of (head, tail) where tail is everything after the final /
836 and head is everything before it. Head will always start with a '/' and,
837 if it contains anything else, never have a trailing '/'.
838
839 Raises: IndexError if too many '..' occur within the path.
840 """
841 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
842 # path semantics rather than local operating system semantics.
843 path_parts = []
844 for part in path.split('/'):
845 if part == '.':
846 path_parts.append('')
847 else:
848 path_parts.append(part)
849 # Filter out blank non trailing parts before consuming the '..'.
850 path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
851 if path_parts:
852 tail_part = path_parts.pop()
853 else:
854 tail_part = ''
855 head_parts = []
856 for part in path_parts:
857 if part == '..':
858 head_parts.pop()
859 else:
860 head_parts.append(part)
861 if tail_part and tail_part == '..':
862 head_parts.pop()
863 tail_part = ''
864 return ('/' + '/'.join(head_parts), tail_part)
865
866
Georg Brandl24420152008-05-26 16:32:26 +0000867nobody = None
868
869def nobody_uid():
870 """Internal routine to get nobody's uid"""
871 global nobody
872 if nobody:
873 return nobody
874 try:
875 import pwd
876 except ImportError:
877 return -1
878 try:
879 nobody = pwd.getpwnam('nobody')[2]
880 except KeyError:
Georg Brandlcbd2ab12010-12-04 10:39:14 +0000881 nobody = 1 + max(x[2] for x in pwd.getpwall())
Georg Brandl24420152008-05-26 16:32:26 +0000882 return nobody
883
884
885def executable(path):
886 """Test for executable file."""
887 try:
888 st = os.stat(path)
889 except os.error:
890 return False
891 return st.st_mode & 0o111 != 0
892
893
894class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
895
896 """Complete HTTP server with GET, HEAD and POST commands.
897
898 GET and HEAD also support running CGI scripts.
899
900 The POST command is *only* implemented for CGI scripts.
901
902 """
903
904 # Determine platform specifics
905 have_fork = hasattr(os, 'fork')
Georg Brandl24420152008-05-26 16:32:26 +0000906
907 # Make rfile unbuffered -- we need to read one line and then pass
908 # the rest to a subprocess, so we can't use buffered input.
909 rbufsize = 0
910
911 def do_POST(self):
912 """Serve a POST request.
913
914 This is only implemented for CGI scripts.
915
916 """
917
918 if self.is_cgi():
919 self.run_cgi()
920 else:
921 self.send_error(501, "Can only POST to CGI scripts")
922
923 def send_head(self):
924 """Version of send_head that support CGI scripts"""
925 if self.is_cgi():
926 return self.run_cgi()
927 else:
928 return SimpleHTTPRequestHandler.send_head(self)
929
930 def is_cgi(self):
931 """Test whether self.path corresponds to a CGI script.
932
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000933 Returns True and updates the cgi_info attribute to the tuple
934 (dir, rest) if self.path requires running a CGI script.
935 Returns False otherwise.
Georg Brandl24420152008-05-26 16:32:26 +0000936
Benjamin Petersona7deeee2009-05-08 20:54:42 +0000937 If any exception is raised, the caller should assume that
938 self.path was rejected as invalid and act accordingly.
939
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000940 The default implementation tests whether the normalized url
941 path begins with one of the strings in self.cgi_directories
942 (and the next character is a '/' or the end of the string).
Georg Brandl24420152008-05-26 16:32:26 +0000943
944 """
945
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000946 splitpath = _url_collapse_path_split(self.path)
947 if splitpath[0] in self.cgi_directories:
948 self.cgi_info = splitpath
949 return True
Georg Brandl24420152008-05-26 16:32:26 +0000950 return False
951
952 cgi_directories = ['/cgi-bin', '/htbin']
953
954 def is_executable(self, path):
955 """Test whether argument path is an executable file."""
956 return executable(path)
957
958 def is_python(self, path):
959 """Test whether argument path is a Python script."""
960 head, tail = os.path.splitext(path)
961 return tail.lower() in (".py", ".pyw")
962
963 def run_cgi(self):
964 """Execute a CGI script."""
965 path = self.path
966 dir, rest = self.cgi_info
967
968 i = path.find('/', len(dir) + 1)
969 while i >= 0:
970 nextdir = path[:i]
971 nextrest = path[i+1:]
972
973 scriptdir = self.translate_path(nextdir)
974 if os.path.isdir(scriptdir):
975 dir, rest = nextdir, nextrest
976 i = path.find('/', len(dir) + 1)
977 else:
978 break
979
980 # find an explicit query string, if present.
981 i = rest.rfind('?')
982 if i >= 0:
983 rest, query = rest[:i], rest[i+1:]
984 else:
985 query = ''
986
987 # dissect the part after the directory name into a script name &
988 # a possible additional path, to be stored in PATH_INFO.
989 i = rest.find('/')
990 if i >= 0:
991 script, rest = rest[:i], rest[i:]
992 else:
993 script, rest = rest, ''
994
995 scriptname = dir + '/' + script
996 scriptfile = self.translate_path(scriptname)
997 if not os.path.exists(scriptfile):
998 self.send_error(404, "No such CGI script (%r)" % scriptname)
999 return
1000 if not os.path.isfile(scriptfile):
1001 self.send_error(403, "CGI script is not a plain file (%r)" %
1002 scriptname)
1003 return
1004 ispy = self.is_python(scriptname)
1005 if not ispy:
Georg Brandl24420152008-05-26 16:32:26 +00001006 if not self.is_executable(scriptfile):
1007 self.send_error(403, "CGI script is not executable (%r)" %
1008 scriptname)
1009 return
1010
1011 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1012 # XXX Much of the following could be prepared ahead of time!
Senthil Kumaran42713722010-10-03 17:55:45 +00001013 env = copy.deepcopy(os.environ)
Georg Brandl24420152008-05-26 16:32:26 +00001014 env['SERVER_SOFTWARE'] = self.version_string()
1015 env['SERVER_NAME'] = self.server.server_name
1016 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1017 env['SERVER_PROTOCOL'] = self.protocol_version
1018 env['SERVER_PORT'] = str(self.server.server_port)
1019 env['REQUEST_METHOD'] = self.command
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001020 uqrest = urllib.parse.unquote(rest)
Georg Brandl24420152008-05-26 16:32:26 +00001021 env['PATH_INFO'] = uqrest
1022 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1023 env['SCRIPT_NAME'] = scriptname
1024 if query:
1025 env['QUERY_STRING'] = query
1026 host = self.address_string()
1027 if host != self.client_address[0]:
1028 env['REMOTE_HOST'] = host
1029 env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw820c1202008-06-12 04:06:45 +00001030 authorization = self.headers.get("authorization")
Georg Brandl24420152008-05-26 16:32:26 +00001031 if authorization:
1032 authorization = authorization.split()
1033 if len(authorization) == 2:
1034 import base64, binascii
1035 env['AUTH_TYPE'] = authorization[0]
1036 if authorization[0].lower() == "basic":
1037 try:
1038 authorization = authorization[1].encode('ascii')
Georg Brandl706824f2009-06-04 09:42:55 +00001039 authorization = base64.decodebytes(authorization).\
Georg Brandl24420152008-05-26 16:32:26 +00001040 decode('ascii')
1041 except (binascii.Error, UnicodeError):
1042 pass
1043 else:
1044 authorization = authorization.split(':')
1045 if len(authorization) == 2:
1046 env['REMOTE_USER'] = authorization[0]
1047 # XXX REMOTE_IDENT
Barry Warsaw820c1202008-06-12 04:06:45 +00001048 if self.headers.get('content-type') is None:
1049 env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl24420152008-05-26 16:32:26 +00001050 else:
Barry Warsaw820c1202008-06-12 04:06:45 +00001051 env['CONTENT_TYPE'] = self.headers['content-type']
1052 length = self.headers.get('content-length')
Georg Brandl24420152008-05-26 16:32:26 +00001053 if length:
1054 env['CONTENT_LENGTH'] = length
Barry Warsaw820c1202008-06-12 04:06:45 +00001055 referer = self.headers.get('referer')
Georg Brandl24420152008-05-26 16:32:26 +00001056 if referer:
1057 env['HTTP_REFERER'] = referer
1058 accept = []
1059 for line in self.headers.getallmatchingheaders('accept'):
1060 if line[:1] in "\t\n\r ":
1061 accept.append(line.strip())
1062 else:
1063 accept = accept + line[7:].split(',')
1064 env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw820c1202008-06-12 04:06:45 +00001065 ua = self.headers.get('user-agent')
Georg Brandl24420152008-05-26 16:32:26 +00001066 if ua:
1067 env['HTTP_USER_AGENT'] = ua
Barry Warsaw820c1202008-06-12 04:06:45 +00001068 co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl62e2ca22010-07-31 21:54:24 +00001069 cookie_str = ', '.join(co)
1070 if cookie_str:
1071 env['HTTP_COOKIE'] = cookie_str
Georg Brandl24420152008-05-26 16:32:26 +00001072 # XXX Other HTTP_* headers
1073 # Since we're setting the env in the parent, provide empty
1074 # values to override previously set values
1075 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1076 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1077 env.setdefault(k, "")
Georg Brandl24420152008-05-26 16:32:26 +00001078
1079 self.send_response(200, "Script output follows")
1080
1081 decoded_query = query.replace('+', ' ')
1082
1083 if self.have_fork:
1084 # Unix -- fork as we should
1085 args = [script]
1086 if '=' not in decoded_query:
1087 args.append(decoded_query)
1088 nobody = nobody_uid()
1089 self.wfile.flush() # Always flush before forking
1090 pid = os.fork()
1091 if pid != 0:
1092 # Parent
1093 pid, sts = os.waitpid(pid, 0)
1094 # throw away additional data [see bug #427345]
1095 while select.select([self.rfile], [], [], 0)[0]:
1096 if not self.rfile.read(1):
1097 break
1098 if sts:
1099 self.log_error("CGI script exit status %#x", sts)
1100 return
1101 # Child
1102 try:
1103 try:
1104 os.setuid(nobody)
1105 except os.error:
1106 pass
1107 os.dup2(self.rfile.fileno(), 0)
1108 os.dup2(self.wfile.fileno(), 1)
Senthil Kumaran42713722010-10-03 17:55:45 +00001109 os.execve(scriptfile, args, env)
Georg Brandl24420152008-05-26 16:32:26 +00001110 except:
1111 self.server.handle_error(self.request, self.client_address)
1112 os._exit(127)
1113
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001114 else:
1115 # Non-Unix -- use subprocess
1116 import subprocess
Senthil Kumarane29cd162009-11-11 04:17:53 +00001117 cmdline = [scriptfile]
Georg Brandl24420152008-05-26 16:32:26 +00001118 if self.is_python(scriptfile):
1119 interp = sys.executable
1120 if interp.lower().endswith("w.exe"):
1121 # On Windows, use python.exe, not pythonw.exe
1122 interp = interp[:-5] + interp[-4:]
Senthil Kumarane29cd162009-11-11 04:17:53 +00001123 cmdline = [interp, '-u'] + cmdline
1124 if '=' not in query:
1125 cmdline.append(query)
1126 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Georg Brandl24420152008-05-26 16:32:26 +00001127 try:
1128 nbytes = int(length)
1129 except (TypeError, ValueError):
1130 nbytes = 0
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001131 p = subprocess.Popen(cmdline,
1132 stdin=subprocess.PIPE,
1133 stdout=subprocess.PIPE,
Senthil Kumaran42713722010-10-03 17:55:45 +00001134 stderr=subprocess.PIPE,
1135 env = env
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001136 )
Georg Brandl24420152008-05-26 16:32:26 +00001137 if self.command.lower() == "post" and nbytes > 0:
1138 data = self.rfile.read(nbytes)
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001139 else:
1140 data = None
Georg Brandl24420152008-05-26 16:32:26 +00001141 # throw away additional data [see bug #427345]
1142 while select.select([self.rfile._sock], [], [], 0)[0]:
1143 if not self.rfile._sock.recv(1):
1144 break
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001145 stdout, stderr = p.communicate(data)
1146 self.wfile.write(stdout)
1147 if stderr:
1148 self.log_error('%s', stderr)
Brian Curtincbad4df2010-11-05 15:04:48 +00001149 p.stderr.close()
1150 p.stdout.close()
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001151 status = p.returncode
1152 if status:
1153 self.log_error("CGI script exit status %#x", status)
Georg Brandl24420152008-05-26 16:32:26 +00001154 else:
1155 self.log_message("CGI script exited OK")
1156
1157
1158def test(HandlerClass = BaseHTTPRequestHandler,
1159 ServerClass = HTTPServer, protocol="HTTP/1.0"):
1160 """Test the HTTP request handler class.
1161
1162 This runs an HTTP server on port 8000 (or the first command line
1163 argument).
1164
1165 """
1166
1167 if sys.argv[1:]:
1168 port = int(sys.argv[1])
1169 else:
1170 port = 8000
1171 server_address = ('', port)
1172
1173 HandlerClass.protocol_version = protocol
1174 httpd = ServerClass(server_address, HandlerClass)
1175
1176 sa = httpd.socket.getsockname()
1177 print("Serving HTTP on", sa[0], "port", sa[1], "...")
Alexandre Vassalottib5292a22009-04-03 07:16:55 +00001178 try:
1179 httpd.serve_forever()
1180 except KeyboardInterrupt:
1181 print("\nKeyboard interrupt received, exiting.")
1182 httpd.server_close()
1183 sys.exit(0)
Georg Brandl24420152008-05-26 16:32:26 +00001184
1185if __name__ == '__main__':
Georg Brandl24420152008-05-26 16:32:26 +00001186 test(HandlerClass=SimpleHTTPRequestHandler)