blob: c0245ecc06d68e57245a66c61996bfe80a581078 [file] [log] [blame]
Georg Brandl24420152008-05-26 16:32:26 +00001"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +000016subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl24420152008-05-26 16:32:26 +000017
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group T. Berners-Lee
38# INTERNET-DRAFT R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40# Expires September 8, 1995 March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group R. Fielding
47# Request for Comments: 2616 et al
48# Obsoletes: 2068 June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# | host: Either the DNS name or the IP number of the remote client
63# | rfc931: Any information returned by identd for this person,
64# | - otherwise.
65# | authuser: If user sent a userid for authentication, the user name,
66# | - otherwise.
67# | DD: Day
68# | Mon: Month (calendar name)
69# | YYYY: Year
70# | hh: hour (24-hour format, the machine's timezone)
71# | mm: minutes
72# | ss: seconds
73# | request: The first line of the HTTP request as sent by the client.
74# | ddd: the status code returned by the server, - if not available.
75# | bbbb: the total number of bytes sent,
76# | *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
86
Georg Brandl1f7fffb2010-10-15 15:57:45 +000087import html
Barry Warsaw820c1202008-06-12 04:06:45 +000088import email.message
89import email.parser
Jeremy Hylton914ab452009-03-27 17:16:06 +000090import http.client
91import io
92import mimetypes
93import os
94import posixpath
95import select
96import shutil
97import socket # For gethostbyaddr()
98import socketserver
99import sys
100import time
101import urllib.parse
Senthil Kumaran42713722010-10-03 17:55:45 +0000102import copy
Georg Brandl24420152008-05-26 16:32:26 +0000103
104# Default error message template
105DEFAULT_ERROR_MESSAGE = """\
Senthil Kumaranb253c9f2011-03-17 16:43:22 +0800106<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
107 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
108<html xmlns="http://www.w3.org/1999/xhtml">
109 <head>
110 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
111 <title>Error response</title>
112 </head>
113 <body>
114 <h1>Error response</h1>
115 <p>Error code: %(code)d</p>
116 <p>Message: %(message)s.</p>
117 <p>Error code explanation: %(code)s - %(explain)s.</p>
118 </body>
119</html>
Georg Brandl24420152008-05-26 16:32:26 +0000120"""
121
122DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
123
124def _quote_html(html):
125 return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
126
127class HTTPServer(socketserver.TCPServer):
128
129 allow_reuse_address = 1 # Seems to make sense in testing environment
130
131 def server_bind(self):
132 """Override server_bind to store the server name."""
133 socketserver.TCPServer.server_bind(self)
134 host, port = self.socket.getsockname()[:2]
135 self.server_name = socket.getfqdn(host)
136 self.server_port = port
137
138
139class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
140
141 """HTTP request handler base class.
142
143 The following explanation of HTTP serves to guide you through the
144 code as well as to expose any misunderstandings I may have about
145 HTTP (so you don't need to read the code to figure out I'm wrong
146 :-).
147
148 HTTP (HyperText Transfer Protocol) is an extensible protocol on
149 top of a reliable stream transport (e.g. TCP/IP). The protocol
150 recognizes three parts to a request:
151
152 1. One line identifying the request type and path
153 2. An optional set of RFC-822-style headers
154 3. An optional data part
155
156 The headers and data are separated by a blank line.
157
158 The first line of the request has the form
159
160 <command> <path> <version>
161
162 where <command> is a (case-sensitive) keyword such as GET or POST,
163 <path> is a string containing path information for the request,
164 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
165 <path> is encoded using the URL encoding scheme (using %xx to signify
166 the ASCII character with hex code xx).
167
168 The specification specifies that lines are separated by CRLF but
169 for compatibility with the widest range of clients recommends
170 servers also handle LF. Similarly, whitespace in the request line
171 is treated sensibly (allowing multiple spaces between components
172 and allowing trailing whitespace).
173
174 Similarly, for output, lines ought to be separated by CRLF pairs
175 but most clients grok LF characters just fine.
176
177 If the first line of the request has the form
178
179 <command> <path>
180
181 (i.e. <version> is left out) then this is assumed to be an HTTP
182 0.9 request; this form has no optional headers and data part and
183 the reply consists of just the data.
184
185 The reply form of the HTTP 1.x protocol again has three parts:
186
187 1. One line giving the response code
188 2. An optional set of RFC-822-style headers
189 3. The data
190
191 Again, the headers and data are separated by a blank line.
192
193 The response code line has the form
194
195 <version> <responsecode> <responsestring>
196
197 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
198 <responsecode> is a 3-digit response code indicating success or
199 failure of the request, and <responsestring> is an optional
200 human-readable string explaining what the response code means.
201
202 This server parses the request and the headers, and then calls a
203 function specific to the request type (<command>). Specifically,
204 a request SPAM will be handled by a method do_SPAM(). If no
205 such method exists the server sends an error response to the
206 client. If it exists, it is called with no arguments:
207
208 do_SPAM()
209
210 Note that the request name is case sensitive (i.e. SPAM and spam
211 are different requests).
212
213 The various request details are stored in instance variables:
214
215 - client_address is the client IP address in the form (host,
216 port);
217
218 - command, path and version are the broken-down request line;
219
Barry Warsaw820c1202008-06-12 04:06:45 +0000220 - headers is an instance of email.message.Message (or a derived
Georg Brandl24420152008-05-26 16:32:26 +0000221 class) containing the header information;
222
223 - rfile is a file object open for reading positioned at the
224 start of the optional input data part;
225
226 - wfile is a file object open for writing.
227
228 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
229
230 The first thing to be written must be the response line. Then
231 follow 0 or more header lines, then a blank line, and then the
232 actual data (if any). The meaning of the header lines depends on
233 the command executed by the server; in most cases, when data is
234 returned, there should be at least one header line of the form
235
236 Content-type: <type>/<subtype>
237
238 where <type> and <subtype> should be registered MIME types,
239 e.g. "text/html" or "text/plain".
240
241 """
242
243 # The Python system version, truncated to its first component.
244 sys_version = "Python/" + sys.version.split()[0]
245
246 # The server software version. You may want to override this.
247 # The format is multiple whitespace-separated strings,
248 # where each string is of the form name[/version].
249 server_version = "BaseHTTP/" + __version__
250
251 error_message_format = DEFAULT_ERROR_MESSAGE
252 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
253
254 # The default request version. This only affects responses up until
255 # the point where the request line is parsed, so it mainly decides what
256 # the client gets back when sending a malformed request line.
257 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
258 default_request_version = "HTTP/0.9"
259
260 def parse_request(self):
261 """Parse a request (internal).
262
263 The request should be stored in self.raw_requestline; the results
264 are in self.command, self.path, self.request_version and
265 self.headers.
266
267 Return True for success, False for failure; on failure, an
268 error is sent back.
269
270 """
271 self.command = None # set in case of error on the first line
272 self.request_version = version = self.default_request_version
273 self.close_connection = 1
274 requestline = str(self.raw_requestline, 'iso-8859-1')
275 if requestline[-2:] == '\r\n':
276 requestline = requestline[:-2]
277 elif requestline[-1:] == '\n':
278 requestline = requestline[:-1]
279 self.requestline = requestline
280 words = requestline.split()
281 if len(words) == 3:
282 [command, path, version] = words
283 if version[:5] != 'HTTP/':
284 self.send_error(400, "Bad request version (%r)" % version)
285 return False
286 try:
287 base_version_number = version.split('/', 1)[1]
288 version_number = base_version_number.split(".")
289 # RFC 2145 section 3.1 says there can be only one "." and
290 # - major and minor numbers MUST be treated as
291 # separate integers;
292 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
293 # turn is lower than HTTP/12.3;
294 # - Leading zeros MUST be ignored by recipients.
295 if len(version_number) != 2:
296 raise ValueError
297 version_number = int(version_number[0]), int(version_number[1])
298 except (ValueError, IndexError):
299 self.send_error(400, "Bad request version (%r)" % version)
300 return False
301 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
302 self.close_connection = 0
303 if version_number >= (2, 0):
304 self.send_error(505,
305 "Invalid HTTP Version (%s)" % base_version_number)
306 return False
307 elif len(words) == 2:
308 [command, path] = words
309 self.close_connection = 1
310 if command != 'GET':
311 self.send_error(400,
312 "Bad HTTP/0.9 request type (%r)" % command)
313 return False
314 elif not words:
315 return False
316 else:
317 self.send_error(400, "Bad request syntax (%r)" % requestline)
318 return False
319 self.command, self.path, self.request_version = command, path, version
320
321 # Examine the headers and look for a Connection directive.
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000322 try:
323 self.headers = http.client.parse_headers(self.rfile,
324 _class=self.MessageClass)
325 except http.client.LineTooLong:
326 self.send_error(400, "Line too long")
327 return False
Georg Brandl24420152008-05-26 16:32:26 +0000328
329 conntype = self.headers.get('Connection', "")
330 if conntype.lower() == 'close':
331 self.close_connection = 1
332 elif (conntype.lower() == 'keep-alive' and
333 self.protocol_version >= "HTTP/1.1"):
334 self.close_connection = 0
Senthil Kumaran0f476d42010-09-30 06:09:18 +0000335 # Examine the headers and look for an Expect directive
336 expect = self.headers.get('Expect', "")
337 if (expect.lower() == "100-continue" and
338 self.protocol_version >= "HTTP/1.1" and
339 self.request_version >= "HTTP/1.1"):
340 if not self.handle_expect_100():
341 return False
342 return True
343
344 def handle_expect_100(self):
345 """Decide what to do with an "Expect: 100-continue" header.
346
347 If the client is expecting a 100 Continue response, we must
348 respond with either a 100 Continue or a final response before
349 waiting for the request body. The default is to always respond
350 with a 100 Continue. You can behave differently (for example,
351 reject unauthorized requests) by overriding this method.
352
353 This method should either return True (possibly after sending
354 a 100 Continue response) or send an error response and return
355 False.
356
357 """
358 self.send_response_only(100)
Georg Brandl24420152008-05-26 16:32:26 +0000359 return True
360
361 def handle_one_request(self):
362 """Handle a single HTTP request.
363
364 You normally don't need to override this method; see the class
365 __doc__ string for information on how to handle specific HTTP
366 commands such as GET and POST.
367
368 """
Kristján Valur Jónsson985fc6a2009-07-01 10:01:31 +0000369 try:
Antoine Pitrouc4924372010-12-16 16:48:36 +0000370 self.raw_requestline = self.rfile.readline(65537)
371 if len(self.raw_requestline) > 65536:
372 self.requestline = ''
373 self.request_version = ''
374 self.command = ''
375 self.send_error(414)
376 return
Kristján Valur Jónsson985fc6a2009-07-01 10:01:31 +0000377 if not self.raw_requestline:
378 self.close_connection = 1
379 return
380 if not self.parse_request():
381 # An error code has been sent, just exit
382 return
383 mname = 'do_' + self.command
384 if not hasattr(self, mname):
385 self.send_error(501, "Unsupported method (%r)" % self.command)
386 return
387 method = getattr(self, mname)
388 method()
389 self.wfile.flush() #actually send the response if not already done.
390 except socket.timeout as e:
391 #a read or a write timed out. Discard this connection
392 self.log_error("Request timed out: %r", e)
Georg Brandl24420152008-05-26 16:32:26 +0000393 self.close_connection = 1
394 return
Georg Brandl24420152008-05-26 16:32:26 +0000395
396 def handle(self):
397 """Handle multiple requests if necessary."""
398 self.close_connection = 1
399
400 self.handle_one_request()
401 while not self.close_connection:
402 self.handle_one_request()
403
404 def send_error(self, code, message=None):
405 """Send and log an error reply.
406
407 Arguments are the error code, and a detailed message.
408 The detailed message defaults to the short entry matching the
409 response code.
410
411 This sends an error response (so it must be called before any
412 output has been generated), logs the error, and finally sends
413 a piece of HTML explaining the error to the user.
414
415 """
416
417 try:
418 shortmsg, longmsg = self.responses[code]
419 except KeyError:
420 shortmsg, longmsg = '???', '???'
421 if message is None:
422 message = shortmsg
423 explain = longmsg
424 self.log_error("code %d, message %s", code, message)
425 # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
426 content = (self.error_message_format %
427 {'code': code, 'message': _quote_html(message), 'explain': explain})
428 self.send_response(code, message)
429 self.send_header("Content-Type", self.error_content_type)
430 self.send_header('Connection', 'close')
431 self.end_headers()
432 if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
433 self.wfile.write(content.encode('UTF-8', 'replace'))
434
435 def send_response(self, code, message=None):
436 """Send the response header and log the response code.
437
438 Also send two standard headers with the server software
439 version and the current date.
440
441 """
442 self.log_request(code)
Senthil Kumaran0f476d42010-09-30 06:09:18 +0000443 self.send_response_only(code, message)
444 self.send_header('Server', self.version_string())
445 self.send_header('Date', self.date_time_string())
446
447 def send_response_only(self, code, message=None):
448 """Send the response header only."""
Georg Brandl24420152008-05-26 16:32:26 +0000449 if message is None:
450 if code in self.responses:
451 message = self.responses[code][0]
452 else:
453 message = ''
454 if self.request_version != 'HTTP/0.9':
455 self.wfile.write(("%s %d %s\r\n" %
Armin Ronacher8d96d772011-01-22 13:13:05 +0000456 (self.protocol_version, code, message)).encode('latin1', 'strict'))
Georg Brandl24420152008-05-26 16:32:26 +0000457
458 def send_header(self, keyword, value):
459 """Send a MIME header."""
460 if self.request_version != 'HTTP/0.9':
Senthil Kumarane4dad4f2010-11-21 14:36:14 +0000461 if not hasattr(self, '_headers_buffer'):
462 self._headers_buffer = []
463 self._headers_buffer.append(
Armin Ronacher8d96d772011-01-22 13:13:05 +0000464 ("%s: %s\r\n" % (keyword, value)).encode('latin1', 'strict'))
Georg Brandl24420152008-05-26 16:32:26 +0000465
466 if keyword.lower() == 'connection':
467 if value.lower() == 'close':
468 self.close_connection = 1
469 elif value.lower() == 'keep-alive':
470 self.close_connection = 0
471
472 def end_headers(self):
473 """Send the blank line ending the MIME headers."""
474 if self.request_version != 'HTTP/0.9':
Senthil Kumarane4dad4f2010-11-21 14:36:14 +0000475 self._headers_buffer.append(b"\r\n")
476 self.wfile.write(b"".join(self._headers_buffer))
477 self._headers_buffer = []
Georg Brandl24420152008-05-26 16:32:26 +0000478
479 def log_request(self, code='-', size='-'):
480 """Log an accepted request.
481
482 This is called by send_response().
483
484 """
485
486 self.log_message('"%s" %s %s',
487 self.requestline, str(code), str(size))
488
489 def log_error(self, format, *args):
490 """Log an error.
491
492 This is called when a request cannot be fulfilled. By
493 default it passes the message on to log_message().
494
495 Arguments are the same as for log_message().
496
497 XXX This should go to the separate error log.
498
499 """
500
501 self.log_message(format, *args)
502
503 def log_message(self, format, *args):
504 """Log an arbitrary message.
505
506 This is used by all other logging functions. Override
507 it if you have specific logging wishes.
508
509 The first argument, FORMAT, is a format string for the
510 message to be logged. If the format string contains
511 any % escapes requiring parameters, they should be
512 specified as subsequent arguments (it's just like
513 printf!).
514
515 The client host and current date/time are prefixed to
516 every message.
517
518 """
519
520 sys.stderr.write("%s - - [%s] %s\n" %
521 (self.address_string(),
522 self.log_date_time_string(),
523 format%args))
524
525 def version_string(self):
526 """Return the server software version string."""
527 return self.server_version + ' ' + self.sys_version
528
529 def date_time_string(self, timestamp=None):
530 """Return the current date and time formatted for a message header."""
531 if timestamp is None:
532 timestamp = time.time()
533 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
534 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
535 self.weekdayname[wd],
536 day, self.monthname[month], year,
537 hh, mm, ss)
538 return s
539
540 def log_date_time_string(self):
541 """Return the current time formatted for logging."""
542 now = time.time()
543 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
544 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
545 day, self.monthname[month], year, hh, mm, ss)
546 return s
547
548 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
549
550 monthname = [None,
551 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
552 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
553
554 def address_string(self):
555 """Return the client address formatted for logging.
556
557 This version looks up the full hostname using gethostbyaddr(),
558 and tries to find a name that contains at least one dot.
559
560 """
561
562 host, port = self.client_address[:2]
563 return socket.getfqdn(host)
564
565 # Essentially static class variables
566
567 # The version of the HTTP protocol we support.
568 # Set this to HTTP/1.1 to enable automatic keepalive
569 protocol_version = "HTTP/1.0"
570
Barry Warsaw820c1202008-06-12 04:06:45 +0000571 # MessageClass used to parse headers
Barry Warsaw820c1202008-06-12 04:06:45 +0000572 MessageClass = http.client.HTTPMessage
Georg Brandl24420152008-05-26 16:32:26 +0000573
574 # Table mapping response codes to messages; entries have the
575 # form {code: (shortmessage, longmessage)}.
576 # See RFC 2616.
577 responses = {
578 100: ('Continue', 'Request received, please continue'),
579 101: ('Switching Protocols',
580 'Switching to new protocol; obey Upgrade header'),
581
582 200: ('OK', 'Request fulfilled, document follows'),
583 201: ('Created', 'Document created, URL follows'),
584 202: ('Accepted',
585 'Request accepted, processing continues off-line'),
586 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
587 204: ('No Content', 'Request fulfilled, nothing follows'),
588 205: ('Reset Content', 'Clear input form for further input.'),
589 206: ('Partial Content', 'Partial content follows.'),
590
591 300: ('Multiple Choices',
592 'Object has several resources -- see URI list'),
593 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
594 302: ('Found', 'Object moved temporarily -- see URI list'),
595 303: ('See Other', 'Object moved -- see Method and URL list'),
596 304: ('Not Modified',
597 'Document has not changed since given time'),
598 305: ('Use Proxy',
599 'You must use proxy specified in Location to access this '
600 'resource.'),
601 307: ('Temporary Redirect',
602 'Object moved temporarily -- see URI list'),
603
604 400: ('Bad Request',
605 'Bad request syntax or unsupported method'),
606 401: ('Unauthorized',
607 'No permission -- see authorization schemes'),
608 402: ('Payment Required',
609 'No payment -- see charging schemes'),
610 403: ('Forbidden',
611 'Request forbidden -- authorization will not help'),
612 404: ('Not Found', 'Nothing matches the given URI'),
613 405: ('Method Not Allowed',
Senthil Kumaran7aa26212010-02-22 11:00:50 +0000614 'Specified method is invalid for this resource.'),
Georg Brandl24420152008-05-26 16:32:26 +0000615 406: ('Not Acceptable', 'URI not available in preferred format.'),
616 407: ('Proxy Authentication Required', 'You must authenticate with '
617 'this proxy before proceeding.'),
618 408: ('Request Timeout', 'Request timed out; try again later.'),
619 409: ('Conflict', 'Request conflict.'),
620 410: ('Gone',
621 'URI no longer exists and has been permanently removed.'),
622 411: ('Length Required', 'Client must specify Content-Length.'),
623 412: ('Precondition Failed', 'Precondition in headers is false.'),
624 413: ('Request Entity Too Large', 'Entity is too large.'),
625 414: ('Request-URI Too Long', 'URI is too long.'),
626 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
627 416: ('Requested Range Not Satisfiable',
628 'Cannot satisfy request range.'),
629 417: ('Expectation Failed',
630 'Expect condition could not be satisfied.'),
631
632 500: ('Internal Server Error', 'Server got itself in trouble'),
633 501: ('Not Implemented',
634 'Server does not support this operation'),
635 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
636 503: ('Service Unavailable',
637 'The server cannot process the request due to a high load'),
638 504: ('Gateway Timeout',
639 'The gateway server did not receive a timely response'),
640 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
641 }
642
643
644class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
645
646 """Simple HTTP request handler with GET and HEAD commands.
647
648 This serves files from the current directory and any of its
649 subdirectories. The MIME type for files is determined by
650 calling the .guess_type() method.
651
652 The GET and HEAD requests are identical except that the HEAD
653 request omits the actual contents of the file.
654
655 """
656
657 server_version = "SimpleHTTP/" + __version__
658
659 def do_GET(self):
660 """Serve a GET request."""
661 f = self.send_head()
662 if f:
663 self.copyfile(f, self.wfile)
664 f.close()
665
666 def do_HEAD(self):
667 """Serve a HEAD request."""
668 f = self.send_head()
669 if f:
670 f.close()
671
672 def send_head(self):
673 """Common code for GET and HEAD commands.
674
675 This sends the response code and MIME headers.
676
677 Return value is either a file object (which has to be copied
678 to the outputfile by the caller unless the command was HEAD,
679 and must be closed by the caller under all circumstances), or
680 None, in which case the caller has nothing further to do.
681
682 """
683 path = self.translate_path(self.path)
684 f = None
685 if os.path.isdir(path):
686 if not self.path.endswith('/'):
687 # redirect browser - doing basically what apache does
688 self.send_response(301)
689 self.send_header("Location", self.path + "/")
690 self.end_headers()
691 return None
692 for index in "index.html", "index.htm":
693 index = os.path.join(path, index)
694 if os.path.exists(index):
695 path = index
696 break
697 else:
698 return self.list_directory(path)
699 ctype = self.guess_type(path)
700 try:
701 f = open(path, 'rb')
702 except IOError:
703 self.send_error(404, "File not found")
704 return None
705 self.send_response(200)
706 self.send_header("Content-type", ctype)
707 fs = os.fstat(f.fileno())
708 self.send_header("Content-Length", str(fs[6]))
709 self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
710 self.end_headers()
711 return f
712
713 def list_directory(self, path):
714 """Helper to produce a directory listing (absent index.html).
715
716 Return value is either a file object, or None (indicating an
717 error). In either case, the headers are sent, making the
718 interface the same as for send_head().
719
720 """
721 try:
722 list = os.listdir(path)
723 except os.error:
724 self.send_error(404, "No permission to list directory")
725 return None
726 list.sort(key=lambda a: a.lower())
727 r = []
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000728 displaypath = html.escape(urllib.parse.unquote(self.path))
Georg Brandl24420152008-05-26 16:32:26 +0000729 r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
730 r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
731 r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
732 r.append("<hr>\n<ul>\n")
733 for name in list:
734 fullname = os.path.join(path, name)
735 displayname = linkname = name
736 # Append / for directories or @ for symbolic links
737 if os.path.isdir(fullname):
738 displayname = name + "/"
739 linkname = name + "/"
740 if os.path.islink(fullname):
741 displayname = name + "@"
742 # Note: a link to a directory displays with @ and links with /
743 r.append('<li><a href="%s">%s</a>\n'
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000744 % (urllib.parse.quote(linkname), html.escape(displayname)))
Georg Brandl24420152008-05-26 16:32:26 +0000745 r.append("</ul>\n<hr>\n</body>\n</html>\n")
746 enc = sys.getfilesystemencoding()
747 encoded = ''.join(r).encode(enc)
748 f = io.BytesIO()
749 f.write(encoded)
750 f.seek(0)
751 self.send_response(200)
752 self.send_header("Content-type", "text/html; charset=%s" % enc)
753 self.send_header("Content-Length", str(len(encoded)))
754 self.end_headers()
755 return f
756
757 def translate_path(self, path):
758 """Translate a /-separated PATH to the local filename syntax.
759
760 Components that mean special things to the local file system
761 (e.g. drive or directory names) are ignored. (XXX They should
762 probably be diagnosed.)
763
764 """
765 # abandon query parameters
766 path = path.split('?',1)[0]
767 path = path.split('#',1)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000768 path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl24420152008-05-26 16:32:26 +0000769 words = path.split('/')
770 words = filter(None, words)
771 path = os.getcwd()
772 for word in words:
773 drive, word = os.path.splitdrive(word)
774 head, word = os.path.split(word)
775 if word in (os.curdir, os.pardir): continue
776 path = os.path.join(path, word)
777 return path
778
779 def copyfile(self, source, outputfile):
780 """Copy all data between two file objects.
781
782 The SOURCE argument is a file object open for reading
783 (or anything with a read() method) and the DESTINATION
784 argument is a file object open for writing (or
785 anything with a write() method).
786
787 The only reason for overriding this would be to change
788 the block size or perhaps to replace newlines by CRLF
789 -- note however that this the default server uses this
790 to copy binary data as well.
791
792 """
793 shutil.copyfileobj(source, outputfile)
794
795 def guess_type(self, path):
796 """Guess the type of a file.
797
798 Argument is a PATH (a filename).
799
800 Return value is a string of the form type/subtype,
801 usable for a MIME Content-type header.
802
803 The default implementation looks the file's extension
804 up in the table self.extensions_map, using application/octet-stream
805 as a default; however it would be permissible (if
806 slow) to look inside the data to make a better guess.
807
808 """
809
810 base, ext = posixpath.splitext(path)
811 if ext in self.extensions_map:
812 return self.extensions_map[ext]
813 ext = ext.lower()
814 if ext in self.extensions_map:
815 return self.extensions_map[ext]
816 else:
817 return self.extensions_map['']
818
819 if not mimetypes.inited:
820 mimetypes.init() # try to read system mime.types
821 extensions_map = mimetypes.types_map.copy()
822 extensions_map.update({
823 '': 'application/octet-stream', # Default
824 '.py': 'text/plain',
825 '.c': 'text/plain',
826 '.h': 'text/plain',
827 })
828
829
830# Utilities for CGIHTTPRequestHandler
831
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000832# TODO(gregory.p.smith): Move this into an appropriate library.
833def _url_collapse_path_split(path):
834 """
835 Given a URL path, remove extra '/'s and '.' path elements and collapse
836 any '..' references.
837
838 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
839
840 Returns: A tuple of (head, tail) where tail is everything after the final /
841 and head is everything before it. Head will always start with a '/' and,
842 if it contains anything else, never have a trailing '/'.
843
844 Raises: IndexError if too many '..' occur within the path.
845 """
846 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
847 # path semantics rather than local operating system semantics.
848 path_parts = []
849 for part in path.split('/'):
850 if part == '.':
851 path_parts.append('')
852 else:
853 path_parts.append(part)
854 # Filter out blank non trailing parts before consuming the '..'.
855 path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
856 if path_parts:
857 tail_part = path_parts.pop()
858 else:
859 tail_part = ''
860 head_parts = []
861 for part in path_parts:
862 if part == '..':
863 head_parts.pop()
864 else:
865 head_parts.append(part)
866 if tail_part and tail_part == '..':
867 head_parts.pop()
868 tail_part = ''
869 return ('/' + '/'.join(head_parts), tail_part)
870
871
Georg Brandl24420152008-05-26 16:32:26 +0000872nobody = None
873
874def nobody_uid():
875 """Internal routine to get nobody's uid"""
876 global nobody
877 if nobody:
878 return nobody
879 try:
880 import pwd
881 except ImportError:
882 return -1
883 try:
884 nobody = pwd.getpwnam('nobody')[2]
885 except KeyError:
Georg Brandlcbd2ab12010-12-04 10:39:14 +0000886 nobody = 1 + max(x[2] for x in pwd.getpwall())
Georg Brandl24420152008-05-26 16:32:26 +0000887 return nobody
888
889
890def executable(path):
891 """Test for executable file."""
892 try:
893 st = os.stat(path)
894 except os.error:
895 return False
896 return st.st_mode & 0o111 != 0
897
898
899class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
900
901 """Complete HTTP server with GET, HEAD and POST commands.
902
903 GET and HEAD also support running CGI scripts.
904
905 The POST command is *only* implemented for CGI scripts.
906
907 """
908
909 # Determine platform specifics
910 have_fork = hasattr(os, 'fork')
Georg Brandl24420152008-05-26 16:32:26 +0000911
912 # Make rfile unbuffered -- we need to read one line and then pass
913 # the rest to a subprocess, so we can't use buffered input.
914 rbufsize = 0
915
916 def do_POST(self):
917 """Serve a POST request.
918
919 This is only implemented for CGI scripts.
920
921 """
922
923 if self.is_cgi():
924 self.run_cgi()
925 else:
926 self.send_error(501, "Can only POST to CGI scripts")
927
928 def send_head(self):
929 """Version of send_head that support CGI scripts"""
930 if self.is_cgi():
931 return self.run_cgi()
932 else:
933 return SimpleHTTPRequestHandler.send_head(self)
934
935 def is_cgi(self):
936 """Test whether self.path corresponds to a CGI script.
937
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000938 Returns True and updates the cgi_info attribute to the tuple
939 (dir, rest) if self.path requires running a CGI script.
940 Returns False otherwise.
Georg Brandl24420152008-05-26 16:32:26 +0000941
Benjamin Petersona7deeee2009-05-08 20:54:42 +0000942 If any exception is raised, the caller should assume that
943 self.path was rejected as invalid and act accordingly.
944
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000945 The default implementation tests whether the normalized url
946 path begins with one of the strings in self.cgi_directories
947 (and the next character is a '/' or the end of the string).
Georg Brandl24420152008-05-26 16:32:26 +0000948
949 """
950
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000951 splitpath = _url_collapse_path_split(self.path)
952 if splitpath[0] in self.cgi_directories:
953 self.cgi_info = splitpath
954 return True
Georg Brandl24420152008-05-26 16:32:26 +0000955 return False
956
957 cgi_directories = ['/cgi-bin', '/htbin']
958
959 def is_executable(self, path):
960 """Test whether argument path is an executable file."""
961 return executable(path)
962
963 def is_python(self, path):
964 """Test whether argument path is a Python script."""
965 head, tail = os.path.splitext(path)
966 return tail.lower() in (".py", ".pyw")
967
968 def run_cgi(self):
969 """Execute a CGI script."""
970 path = self.path
971 dir, rest = self.cgi_info
972
973 i = path.find('/', len(dir) + 1)
974 while i >= 0:
975 nextdir = path[:i]
976 nextrest = path[i+1:]
977
978 scriptdir = self.translate_path(nextdir)
979 if os.path.isdir(scriptdir):
980 dir, rest = nextdir, nextrest
981 i = path.find('/', len(dir) + 1)
982 else:
983 break
984
985 # find an explicit query string, if present.
986 i = rest.rfind('?')
987 if i >= 0:
988 rest, query = rest[:i], rest[i+1:]
989 else:
990 query = ''
991
992 # dissect the part after the directory name into a script name &
993 # a possible additional path, to be stored in PATH_INFO.
994 i = rest.find('/')
995 if i >= 0:
996 script, rest = rest[:i], rest[i:]
997 else:
998 script, rest = rest, ''
999
1000 scriptname = dir + '/' + script
1001 scriptfile = self.translate_path(scriptname)
1002 if not os.path.exists(scriptfile):
1003 self.send_error(404, "No such CGI script (%r)" % scriptname)
1004 return
1005 if not os.path.isfile(scriptfile):
1006 self.send_error(403, "CGI script is not a plain file (%r)" %
1007 scriptname)
1008 return
1009 ispy = self.is_python(scriptname)
1010 if not ispy:
Georg Brandl24420152008-05-26 16:32:26 +00001011 if not self.is_executable(scriptfile):
1012 self.send_error(403, "CGI script is not executable (%r)" %
1013 scriptname)
1014 return
1015
1016 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1017 # XXX Much of the following could be prepared ahead of time!
Senthil Kumaran42713722010-10-03 17:55:45 +00001018 env = copy.deepcopy(os.environ)
Georg Brandl24420152008-05-26 16:32:26 +00001019 env['SERVER_SOFTWARE'] = self.version_string()
1020 env['SERVER_NAME'] = self.server.server_name
1021 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1022 env['SERVER_PROTOCOL'] = self.protocol_version
1023 env['SERVER_PORT'] = str(self.server.server_port)
1024 env['REQUEST_METHOD'] = self.command
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001025 uqrest = urllib.parse.unquote(rest)
Georg Brandl24420152008-05-26 16:32:26 +00001026 env['PATH_INFO'] = uqrest
1027 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1028 env['SCRIPT_NAME'] = scriptname
1029 if query:
1030 env['QUERY_STRING'] = query
1031 host = self.address_string()
1032 if host != self.client_address[0]:
1033 env['REMOTE_HOST'] = host
1034 env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw820c1202008-06-12 04:06:45 +00001035 authorization = self.headers.get("authorization")
Georg Brandl24420152008-05-26 16:32:26 +00001036 if authorization:
1037 authorization = authorization.split()
1038 if len(authorization) == 2:
1039 import base64, binascii
1040 env['AUTH_TYPE'] = authorization[0]
1041 if authorization[0].lower() == "basic":
1042 try:
1043 authorization = authorization[1].encode('ascii')
Georg Brandl706824f2009-06-04 09:42:55 +00001044 authorization = base64.decodebytes(authorization).\
Georg Brandl24420152008-05-26 16:32:26 +00001045 decode('ascii')
1046 except (binascii.Error, UnicodeError):
1047 pass
1048 else:
1049 authorization = authorization.split(':')
1050 if len(authorization) == 2:
1051 env['REMOTE_USER'] = authorization[0]
1052 # XXX REMOTE_IDENT
Barry Warsaw820c1202008-06-12 04:06:45 +00001053 if self.headers.get('content-type') is None:
1054 env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl24420152008-05-26 16:32:26 +00001055 else:
Barry Warsaw820c1202008-06-12 04:06:45 +00001056 env['CONTENT_TYPE'] = self.headers['content-type']
1057 length = self.headers.get('content-length')
Georg Brandl24420152008-05-26 16:32:26 +00001058 if length:
1059 env['CONTENT_LENGTH'] = length
Barry Warsaw820c1202008-06-12 04:06:45 +00001060 referer = self.headers.get('referer')
Georg Brandl24420152008-05-26 16:32:26 +00001061 if referer:
1062 env['HTTP_REFERER'] = referer
1063 accept = []
1064 for line in self.headers.getallmatchingheaders('accept'):
1065 if line[:1] in "\t\n\r ":
1066 accept.append(line.strip())
1067 else:
1068 accept = accept + line[7:].split(',')
1069 env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw820c1202008-06-12 04:06:45 +00001070 ua = self.headers.get('user-agent')
Georg Brandl24420152008-05-26 16:32:26 +00001071 if ua:
1072 env['HTTP_USER_AGENT'] = ua
Barry Warsaw820c1202008-06-12 04:06:45 +00001073 co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl62e2ca22010-07-31 21:54:24 +00001074 cookie_str = ', '.join(co)
1075 if cookie_str:
1076 env['HTTP_COOKIE'] = cookie_str
Georg Brandl24420152008-05-26 16:32:26 +00001077 # XXX Other HTTP_* headers
1078 # Since we're setting the env in the parent, provide empty
1079 # values to override previously set values
1080 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1081 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1082 env.setdefault(k, "")
Georg Brandl24420152008-05-26 16:32:26 +00001083
1084 self.send_response(200, "Script output follows")
1085
1086 decoded_query = query.replace('+', ' ')
1087
1088 if self.have_fork:
1089 # Unix -- fork as we should
1090 args = [script]
1091 if '=' not in decoded_query:
1092 args.append(decoded_query)
1093 nobody = nobody_uid()
1094 self.wfile.flush() # Always flush before forking
1095 pid = os.fork()
1096 if pid != 0:
1097 # Parent
1098 pid, sts = os.waitpid(pid, 0)
1099 # throw away additional data [see bug #427345]
1100 while select.select([self.rfile], [], [], 0)[0]:
1101 if not self.rfile.read(1):
1102 break
1103 if sts:
1104 self.log_error("CGI script exit status %#x", sts)
1105 return
1106 # Child
1107 try:
1108 try:
1109 os.setuid(nobody)
1110 except os.error:
1111 pass
1112 os.dup2(self.rfile.fileno(), 0)
1113 os.dup2(self.wfile.fileno(), 1)
Senthil Kumaran42713722010-10-03 17:55:45 +00001114 os.execve(scriptfile, args, env)
Georg Brandl24420152008-05-26 16:32:26 +00001115 except:
1116 self.server.handle_error(self.request, self.client_address)
1117 os._exit(127)
1118
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001119 else:
1120 # Non-Unix -- use subprocess
1121 import subprocess
Senthil Kumarane29cd162009-11-11 04:17:53 +00001122 cmdline = [scriptfile]
Georg Brandl24420152008-05-26 16:32:26 +00001123 if self.is_python(scriptfile):
1124 interp = sys.executable
1125 if interp.lower().endswith("w.exe"):
1126 # On Windows, use python.exe, not pythonw.exe
1127 interp = interp[:-5] + interp[-4:]
Senthil Kumarane29cd162009-11-11 04:17:53 +00001128 cmdline = [interp, '-u'] + cmdline
1129 if '=' not in query:
1130 cmdline.append(query)
1131 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Georg Brandl24420152008-05-26 16:32:26 +00001132 try:
1133 nbytes = int(length)
1134 except (TypeError, ValueError):
1135 nbytes = 0
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001136 p = subprocess.Popen(cmdline,
1137 stdin=subprocess.PIPE,
1138 stdout=subprocess.PIPE,
Senthil Kumaran42713722010-10-03 17:55:45 +00001139 stderr=subprocess.PIPE,
1140 env = env
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001141 )
Georg Brandl24420152008-05-26 16:32:26 +00001142 if self.command.lower() == "post" and nbytes > 0:
1143 data = self.rfile.read(nbytes)
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001144 else:
1145 data = None
Georg Brandl24420152008-05-26 16:32:26 +00001146 # throw away additional data [see bug #427345]
1147 while select.select([self.rfile._sock], [], [], 0)[0]:
1148 if not self.rfile._sock.recv(1):
1149 break
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001150 stdout, stderr = p.communicate(data)
1151 self.wfile.write(stdout)
1152 if stderr:
1153 self.log_error('%s', stderr)
Brian Curtincbad4df2010-11-05 15:04:48 +00001154 p.stderr.close()
1155 p.stdout.close()
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001156 status = p.returncode
1157 if status:
1158 self.log_error("CGI script exit status %#x", status)
Georg Brandl24420152008-05-26 16:32:26 +00001159 else:
1160 self.log_message("CGI script exited OK")
1161
1162
1163def test(HandlerClass = BaseHTTPRequestHandler,
1164 ServerClass = HTTPServer, protocol="HTTP/1.0"):
1165 """Test the HTTP request handler class.
1166
1167 This runs an HTTP server on port 8000 (or the first command line
1168 argument).
1169
1170 """
1171
1172 if sys.argv[1:]:
1173 port = int(sys.argv[1])
1174 else:
1175 port = 8000
1176 server_address = ('', port)
1177
1178 HandlerClass.protocol_version = protocol
1179 httpd = ServerClass(server_address, HandlerClass)
1180
1181 sa = httpd.socket.getsockname()
1182 print("Serving HTTP on", sa[0], "port", sa[1], "...")
Alexandre Vassalottib5292a22009-04-03 07:16:55 +00001183 try:
1184 httpd.serve_forever()
1185 except KeyboardInterrupt:
1186 print("\nKeyboard interrupt received, exiting.")
1187 httpd.server_close()
1188 sys.exit(0)
Georg Brandl24420152008-05-26 16:32:26 +00001189
1190if __name__ == '__main__':
Georg Brandl24420152008-05-26 16:32:26 +00001191 test(HandlerClass=SimpleHTTPRequestHandler)