blob: 6aacbbd2f68a42c18be6abcd9f8f3485880980b6 [file] [log] [blame]
Georg Brandl24420152008-05-26 16:32:26 +00001"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +000016subprocess.Popen() is used as a fallback, with slightly altered semantics.
Georg Brandl24420152008-05-26 16:32:26 +000017
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group T. Berners-Lee
38# INTERNET-DRAFT R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40# Expires September 8, 1995 March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group R. Fielding
47# Request for Comments: 2616 et al
48# Obsoletes: 2068 June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# | host: Either the DNS name or the IP number of the remote client
63# | rfc931: Any information returned by identd for this person,
64# | - otherwise.
65# | authuser: If user sent a userid for authentication, the user name,
66# | - otherwise.
67# | DD: Day
68# | Mon: Month (calendar name)
69# | YYYY: Year
70# | hh: hour (24-hour format, the machine's timezone)
71# | mm: minutes
72# | ss: seconds
73# | request: The first line of the HTTP request as sent by the client.
74# | ddd: the status code returned by the server, - if not available.
75# | bbbb: the total number of bytes sent,
76# | *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
86
Georg Brandl1f7fffb2010-10-15 15:57:45 +000087import html
Barry Warsaw820c1202008-06-12 04:06:45 +000088import email.message
89import email.parser
Jeremy Hylton914ab452009-03-27 17:16:06 +000090import http.client
91import io
92import mimetypes
93import os
94import posixpath
95import select
96import shutil
97import socket # For gethostbyaddr()
98import socketserver
99import sys
100import time
101import urllib.parse
Senthil Kumaran42713722010-10-03 17:55:45 +0000102import copy
Georg Brandl24420152008-05-26 16:32:26 +0000103
104# Default error message template
105DEFAULT_ERROR_MESSAGE = """\
Senthil Kumaran1b407fe2011-03-20 10:44:30 +0800106<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
107 "http://www.w3.org/TR/html4/strict.dtd">
Senthil Kumaranb253c9f2011-03-17 16:43:22 +0800108 <head>
Senthil Kumaran1b407fe2011-03-20 10:44:30 +0800109 <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
Senthil Kumaranb253c9f2011-03-17 16:43:22 +0800110 <title>Error response</title>
111 </head>
112 <body>
113 <h1>Error response</h1>
114 <p>Error code: %(code)d</p>
115 <p>Message: %(message)s.</p>
116 <p>Error code explanation: %(code)s - %(explain)s.</p>
117 </body>
118</html>
Georg Brandl24420152008-05-26 16:32:26 +0000119"""
120
121DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
122
123def _quote_html(html):
124 return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
125
126class HTTPServer(socketserver.TCPServer):
127
128 allow_reuse_address = 1 # Seems to make sense in testing environment
129
130 def server_bind(self):
131 """Override server_bind to store the server name."""
132 socketserver.TCPServer.server_bind(self)
133 host, port = self.socket.getsockname()[:2]
134 self.server_name = socket.getfqdn(host)
135 self.server_port = port
136
137
138class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
139
140 """HTTP request handler base class.
141
142 The following explanation of HTTP serves to guide you through the
143 code as well as to expose any misunderstandings I may have about
144 HTTP (so you don't need to read the code to figure out I'm wrong
145 :-).
146
147 HTTP (HyperText Transfer Protocol) is an extensible protocol on
148 top of a reliable stream transport (e.g. TCP/IP). The protocol
149 recognizes three parts to a request:
150
151 1. One line identifying the request type and path
152 2. An optional set of RFC-822-style headers
153 3. An optional data part
154
155 The headers and data are separated by a blank line.
156
157 The first line of the request has the form
158
159 <command> <path> <version>
160
161 where <command> is a (case-sensitive) keyword such as GET or POST,
162 <path> is a string containing path information for the request,
163 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
164 <path> is encoded using the URL encoding scheme (using %xx to signify
165 the ASCII character with hex code xx).
166
167 The specification specifies that lines are separated by CRLF but
168 for compatibility with the widest range of clients recommends
169 servers also handle LF. Similarly, whitespace in the request line
170 is treated sensibly (allowing multiple spaces between components
171 and allowing trailing whitespace).
172
173 Similarly, for output, lines ought to be separated by CRLF pairs
174 but most clients grok LF characters just fine.
175
176 If the first line of the request has the form
177
178 <command> <path>
179
180 (i.e. <version> is left out) then this is assumed to be an HTTP
181 0.9 request; this form has no optional headers and data part and
182 the reply consists of just the data.
183
184 The reply form of the HTTP 1.x protocol again has three parts:
185
186 1. One line giving the response code
187 2. An optional set of RFC-822-style headers
188 3. The data
189
190 Again, the headers and data are separated by a blank line.
191
192 The response code line has the form
193
194 <version> <responsecode> <responsestring>
195
196 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
197 <responsecode> is a 3-digit response code indicating success or
198 failure of the request, and <responsestring> is an optional
199 human-readable string explaining what the response code means.
200
201 This server parses the request and the headers, and then calls a
202 function specific to the request type (<command>). Specifically,
203 a request SPAM will be handled by a method do_SPAM(). If no
204 such method exists the server sends an error response to the
205 client. If it exists, it is called with no arguments:
206
207 do_SPAM()
208
209 Note that the request name is case sensitive (i.e. SPAM and spam
210 are different requests).
211
212 The various request details are stored in instance variables:
213
214 - client_address is the client IP address in the form (host,
215 port);
216
217 - command, path and version are the broken-down request line;
218
Barry Warsaw820c1202008-06-12 04:06:45 +0000219 - headers is an instance of email.message.Message (or a derived
Georg Brandl24420152008-05-26 16:32:26 +0000220 class) containing the header information;
221
222 - rfile is a file object open for reading positioned at the
223 start of the optional input data part;
224
225 - wfile is a file object open for writing.
226
227 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
228
229 The first thing to be written must be the response line. Then
230 follow 0 or more header lines, then a blank line, and then the
231 actual data (if any). The meaning of the header lines depends on
232 the command executed by the server; in most cases, when data is
233 returned, there should be at least one header line of the form
234
235 Content-type: <type>/<subtype>
236
237 where <type> and <subtype> should be registered MIME types,
238 e.g. "text/html" or "text/plain".
239
240 """
241
242 # The Python system version, truncated to its first component.
243 sys_version = "Python/" + sys.version.split()[0]
244
245 # The server software version. You may want to override this.
246 # The format is multiple whitespace-separated strings,
247 # where each string is of the form name[/version].
248 server_version = "BaseHTTP/" + __version__
249
250 error_message_format = DEFAULT_ERROR_MESSAGE
251 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
252
253 # The default request version. This only affects responses up until
254 # the point where the request line is parsed, so it mainly decides what
255 # the client gets back when sending a malformed request line.
256 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
257 default_request_version = "HTTP/0.9"
258
259 def parse_request(self):
260 """Parse a request (internal).
261
262 The request should be stored in self.raw_requestline; the results
263 are in self.command, self.path, self.request_version and
264 self.headers.
265
266 Return True for success, False for failure; on failure, an
267 error is sent back.
268
269 """
270 self.command = None # set in case of error on the first line
271 self.request_version = version = self.default_request_version
272 self.close_connection = 1
273 requestline = str(self.raw_requestline, 'iso-8859-1')
274 if requestline[-2:] == '\r\n':
275 requestline = requestline[:-2]
276 elif requestline[-1:] == '\n':
277 requestline = requestline[:-1]
278 self.requestline = requestline
279 words = requestline.split()
280 if len(words) == 3:
281 [command, path, version] = words
282 if version[:5] != 'HTTP/':
283 self.send_error(400, "Bad request version (%r)" % version)
284 return False
285 try:
286 base_version_number = version.split('/', 1)[1]
287 version_number = base_version_number.split(".")
288 # RFC 2145 section 3.1 says there can be only one "." and
289 # - major and minor numbers MUST be treated as
290 # separate integers;
291 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
292 # turn is lower than HTTP/12.3;
293 # - Leading zeros MUST be ignored by recipients.
294 if len(version_number) != 2:
295 raise ValueError
296 version_number = int(version_number[0]), int(version_number[1])
297 except (ValueError, IndexError):
298 self.send_error(400, "Bad request version (%r)" % version)
299 return False
300 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
301 self.close_connection = 0
302 if version_number >= (2, 0):
303 self.send_error(505,
304 "Invalid HTTP Version (%s)" % base_version_number)
305 return False
306 elif len(words) == 2:
307 [command, path] = words
308 self.close_connection = 1
309 if command != 'GET':
310 self.send_error(400,
311 "Bad HTTP/0.9 request type (%r)" % command)
312 return False
313 elif not words:
314 return False
315 else:
316 self.send_error(400, "Bad request syntax (%r)" % requestline)
317 return False
318 self.command, self.path, self.request_version = command, path, version
319
320 # Examine the headers and look for a Connection directive.
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000321 try:
322 self.headers = http.client.parse_headers(self.rfile,
323 _class=self.MessageClass)
324 except http.client.LineTooLong:
325 self.send_error(400, "Line too long")
326 return False
Georg Brandl24420152008-05-26 16:32:26 +0000327
328 conntype = self.headers.get('Connection', "")
329 if conntype.lower() == 'close':
330 self.close_connection = 1
331 elif (conntype.lower() == 'keep-alive' and
332 self.protocol_version >= "HTTP/1.1"):
333 self.close_connection = 0
Senthil Kumaran0f476d42010-09-30 06:09:18 +0000334 # Examine the headers and look for an Expect directive
335 expect = self.headers.get('Expect', "")
336 if (expect.lower() == "100-continue" and
337 self.protocol_version >= "HTTP/1.1" and
338 self.request_version >= "HTTP/1.1"):
339 if not self.handle_expect_100():
340 return False
341 return True
342
343 def handle_expect_100(self):
344 """Decide what to do with an "Expect: 100-continue" header.
345
346 If the client is expecting a 100 Continue response, we must
347 respond with either a 100 Continue or a final response before
348 waiting for the request body. The default is to always respond
349 with a 100 Continue. You can behave differently (for example,
350 reject unauthorized requests) by overriding this method.
351
352 This method should either return True (possibly after sending
353 a 100 Continue response) or send an error response and return
354 False.
355
356 """
357 self.send_response_only(100)
Georg Brandl24420152008-05-26 16:32:26 +0000358 return True
359
360 def handle_one_request(self):
361 """Handle a single HTTP request.
362
363 You normally don't need to override this method; see the class
364 __doc__ string for information on how to handle specific HTTP
365 commands such as GET and POST.
366
367 """
Kristján Valur Jónsson985fc6a2009-07-01 10:01:31 +0000368 try:
Antoine Pitrouc4924372010-12-16 16:48:36 +0000369 self.raw_requestline = self.rfile.readline(65537)
370 if len(self.raw_requestline) > 65536:
371 self.requestline = ''
372 self.request_version = ''
373 self.command = ''
374 self.send_error(414)
375 return
Kristján Valur Jónsson985fc6a2009-07-01 10:01:31 +0000376 if not self.raw_requestline:
377 self.close_connection = 1
378 return
379 if not self.parse_request():
380 # An error code has been sent, just exit
381 return
382 mname = 'do_' + self.command
383 if not hasattr(self, mname):
384 self.send_error(501, "Unsupported method (%r)" % self.command)
385 return
386 method = getattr(self, mname)
387 method()
388 self.wfile.flush() #actually send the response if not already done.
389 except socket.timeout as e:
390 #a read or a write timed out. Discard this connection
391 self.log_error("Request timed out: %r", e)
Georg Brandl24420152008-05-26 16:32:26 +0000392 self.close_connection = 1
393 return
Georg Brandl24420152008-05-26 16:32:26 +0000394
395 def handle(self):
396 """Handle multiple requests if necessary."""
397 self.close_connection = 1
398
399 self.handle_one_request()
400 while not self.close_connection:
401 self.handle_one_request()
402
403 def send_error(self, code, message=None):
404 """Send and log an error reply.
405
406 Arguments are the error code, and a detailed message.
407 The detailed message defaults to the short entry matching the
408 response code.
409
410 This sends an error response (so it must be called before any
411 output has been generated), logs the error, and finally sends
412 a piece of HTML explaining the error to the user.
413
414 """
415
416 try:
417 shortmsg, longmsg = self.responses[code]
418 except KeyError:
419 shortmsg, longmsg = '???', '???'
420 if message is None:
421 message = shortmsg
422 explain = longmsg
423 self.log_error("code %d, message %s", code, message)
424 # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
425 content = (self.error_message_format %
426 {'code': code, 'message': _quote_html(message), 'explain': explain})
427 self.send_response(code, message)
428 self.send_header("Content-Type", self.error_content_type)
429 self.send_header('Connection', 'close')
430 self.end_headers()
431 if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
432 self.wfile.write(content.encode('UTF-8', 'replace'))
433
434 def send_response(self, code, message=None):
435 """Send the response header and log the response code.
436
437 Also send two standard headers with the server software
438 version and the current date.
439
440 """
441 self.log_request(code)
Senthil Kumaran0f476d42010-09-30 06:09:18 +0000442 self.send_response_only(code, message)
443 self.send_header('Server', self.version_string())
444 self.send_header('Date', self.date_time_string())
445
446 def send_response_only(self, code, message=None):
447 """Send the response header only."""
Georg Brandl24420152008-05-26 16:32:26 +0000448 if message is None:
449 if code in self.responses:
450 message = self.responses[code][0]
451 else:
452 message = ''
453 if self.request_version != 'HTTP/0.9':
454 self.wfile.write(("%s %d %s\r\n" %
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000455 (self.protocol_version, code, message)).encode('latin-1', 'strict'))
Georg Brandl24420152008-05-26 16:32:26 +0000456
457 def send_header(self, keyword, value):
458 """Send a MIME header."""
459 if self.request_version != 'HTTP/0.9':
Senthil Kumarane4dad4f2010-11-21 14:36:14 +0000460 if not hasattr(self, '_headers_buffer'):
461 self._headers_buffer = []
462 self._headers_buffer.append(
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000463 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
Georg Brandl24420152008-05-26 16:32:26 +0000464
465 if keyword.lower() == 'connection':
466 if value.lower() == 'close':
467 self.close_connection = 1
468 elif value.lower() == 'keep-alive':
469 self.close_connection = 0
470
471 def end_headers(self):
472 """Send the blank line ending the MIME headers."""
473 if self.request_version != 'HTTP/0.9':
Senthil Kumarane4dad4f2010-11-21 14:36:14 +0000474 self._headers_buffer.append(b"\r\n")
475 self.wfile.write(b"".join(self._headers_buffer))
476 self._headers_buffer = []
Georg Brandl24420152008-05-26 16:32:26 +0000477
478 def log_request(self, code='-', size='-'):
479 """Log an accepted request.
480
481 This is called by send_response().
482
483 """
484
485 self.log_message('"%s" %s %s',
486 self.requestline, str(code), str(size))
487
488 def log_error(self, format, *args):
489 """Log an error.
490
491 This is called when a request cannot be fulfilled. By
492 default it passes the message on to log_message().
493
494 Arguments are the same as for log_message().
495
496 XXX This should go to the separate error log.
497
498 """
499
500 self.log_message(format, *args)
501
502 def log_message(self, format, *args):
503 """Log an arbitrary message.
504
505 This is used by all other logging functions. Override
506 it if you have specific logging wishes.
507
508 The first argument, FORMAT, is a format string for the
509 message to be logged. If the format string contains
510 any % escapes requiring parameters, they should be
511 specified as subsequent arguments (it's just like
512 printf!).
513
514 The client host and current date/time are prefixed to
515 every message.
516
517 """
518
519 sys.stderr.write("%s - - [%s] %s\n" %
520 (self.address_string(),
521 self.log_date_time_string(),
522 format%args))
523
524 def version_string(self):
525 """Return the server software version string."""
526 return self.server_version + ' ' + self.sys_version
527
528 def date_time_string(self, timestamp=None):
529 """Return the current date and time formatted for a message header."""
530 if timestamp is None:
531 timestamp = time.time()
532 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
533 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
534 self.weekdayname[wd],
535 day, self.monthname[month], year,
536 hh, mm, ss)
537 return s
538
539 def log_date_time_string(self):
540 """Return the current time formatted for logging."""
541 now = time.time()
542 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
543 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
544 day, self.monthname[month], year, hh, mm, ss)
545 return s
546
547 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
548
549 monthname = [None,
550 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
551 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
552
553 def address_string(self):
554 """Return the client address formatted for logging.
555
556 This version looks up the full hostname using gethostbyaddr(),
557 and tries to find a name that contains at least one dot.
558
559 """
560
561 host, port = self.client_address[:2]
562 return socket.getfqdn(host)
563
564 # Essentially static class variables
565
566 # The version of the HTTP protocol we support.
567 # Set this to HTTP/1.1 to enable automatic keepalive
568 protocol_version = "HTTP/1.0"
569
Barry Warsaw820c1202008-06-12 04:06:45 +0000570 # MessageClass used to parse headers
Barry Warsaw820c1202008-06-12 04:06:45 +0000571 MessageClass = http.client.HTTPMessage
Georg Brandl24420152008-05-26 16:32:26 +0000572
573 # Table mapping response codes to messages; entries have the
574 # form {code: (shortmessage, longmessage)}.
575 # See RFC 2616.
576 responses = {
577 100: ('Continue', 'Request received, please continue'),
578 101: ('Switching Protocols',
579 'Switching to new protocol; obey Upgrade header'),
580
581 200: ('OK', 'Request fulfilled, document follows'),
582 201: ('Created', 'Document created, URL follows'),
583 202: ('Accepted',
584 'Request accepted, processing continues off-line'),
585 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
586 204: ('No Content', 'Request fulfilled, nothing follows'),
587 205: ('Reset Content', 'Clear input form for further input.'),
588 206: ('Partial Content', 'Partial content follows.'),
589
590 300: ('Multiple Choices',
591 'Object has several resources -- see URI list'),
592 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
593 302: ('Found', 'Object moved temporarily -- see URI list'),
594 303: ('See Other', 'Object moved -- see Method and URL list'),
595 304: ('Not Modified',
596 'Document has not changed since given time'),
597 305: ('Use Proxy',
598 'You must use proxy specified in Location to access this '
599 'resource.'),
600 307: ('Temporary Redirect',
601 'Object moved temporarily -- see URI list'),
602
603 400: ('Bad Request',
604 'Bad request syntax or unsupported method'),
605 401: ('Unauthorized',
606 'No permission -- see authorization schemes'),
607 402: ('Payment Required',
608 'No payment -- see charging schemes'),
609 403: ('Forbidden',
610 'Request forbidden -- authorization will not help'),
611 404: ('Not Found', 'Nothing matches the given URI'),
612 405: ('Method Not Allowed',
Senthil Kumaran7aa26212010-02-22 11:00:50 +0000613 'Specified method is invalid for this resource.'),
Georg Brandl24420152008-05-26 16:32:26 +0000614 406: ('Not Acceptable', 'URI not available in preferred format.'),
615 407: ('Proxy Authentication Required', 'You must authenticate with '
616 'this proxy before proceeding.'),
617 408: ('Request Timeout', 'Request timed out; try again later.'),
618 409: ('Conflict', 'Request conflict.'),
619 410: ('Gone',
620 'URI no longer exists and has been permanently removed.'),
621 411: ('Length Required', 'Client must specify Content-Length.'),
622 412: ('Precondition Failed', 'Precondition in headers is false.'),
623 413: ('Request Entity Too Large', 'Entity is too large.'),
624 414: ('Request-URI Too Long', 'URI is too long.'),
625 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
626 416: ('Requested Range Not Satisfiable',
627 'Cannot satisfy request range.'),
628 417: ('Expectation Failed',
629 'Expect condition could not be satisfied.'),
630
631 500: ('Internal Server Error', 'Server got itself in trouble'),
632 501: ('Not Implemented',
633 'Server does not support this operation'),
634 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
635 503: ('Service Unavailable',
636 'The server cannot process the request due to a high load'),
637 504: ('Gateway Timeout',
638 'The gateway server did not receive a timely response'),
639 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
640 }
641
642
643class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
644
645 """Simple HTTP request handler with GET and HEAD commands.
646
647 This serves files from the current directory and any of its
648 subdirectories. The MIME type for files is determined by
649 calling the .guess_type() method.
650
651 The GET and HEAD requests are identical except that the HEAD
652 request omits the actual contents of the file.
653
654 """
655
656 server_version = "SimpleHTTP/" + __version__
657
658 def do_GET(self):
659 """Serve a GET request."""
660 f = self.send_head()
661 if f:
662 self.copyfile(f, self.wfile)
663 f.close()
664
665 def do_HEAD(self):
666 """Serve a HEAD request."""
667 f = self.send_head()
668 if f:
669 f.close()
670
671 def send_head(self):
672 """Common code for GET and HEAD commands.
673
674 This sends the response code and MIME headers.
675
676 Return value is either a file object (which has to be copied
677 to the outputfile by the caller unless the command was HEAD,
678 and must be closed by the caller under all circumstances), or
679 None, in which case the caller has nothing further to do.
680
681 """
682 path = self.translate_path(self.path)
683 f = None
684 if os.path.isdir(path):
685 if not self.path.endswith('/'):
686 # redirect browser - doing basically what apache does
687 self.send_response(301)
688 self.send_header("Location", self.path + "/")
689 self.end_headers()
690 return None
691 for index in "index.html", "index.htm":
692 index = os.path.join(path, index)
693 if os.path.exists(index):
694 path = index
695 break
696 else:
697 return self.list_directory(path)
698 ctype = self.guess_type(path)
699 try:
700 f = open(path, 'rb')
701 except IOError:
702 self.send_error(404, "File not found")
703 return None
704 self.send_response(200)
705 self.send_header("Content-type", ctype)
706 fs = os.fstat(f.fileno())
707 self.send_header("Content-Length", str(fs[6]))
708 self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
709 self.end_headers()
710 return f
711
712 def list_directory(self, path):
713 """Helper to produce a directory listing (absent index.html).
714
715 Return value is either a file object, or None (indicating an
716 error). In either case, the headers are sent, making the
717 interface the same as for send_head().
718
719 """
720 try:
721 list = os.listdir(path)
722 except os.error:
723 self.send_error(404, "No permission to list directory")
724 return None
725 list.sort(key=lambda a: a.lower())
726 r = []
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000727 displaypath = html.escape(urllib.parse.unquote(self.path))
Georg Brandl24420152008-05-26 16:32:26 +0000728 r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
729 r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
730 r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
731 r.append("<hr>\n<ul>\n")
732 for name in list:
733 fullname = os.path.join(path, name)
734 displayname = linkname = name
735 # Append / for directories or @ for symbolic links
736 if os.path.isdir(fullname):
737 displayname = name + "/"
738 linkname = name + "/"
739 if os.path.islink(fullname):
740 displayname = name + "@"
741 # Note: a link to a directory displays with @ and links with /
742 r.append('<li><a href="%s">%s</a>\n'
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000743 % (urllib.parse.quote(linkname), html.escape(displayname)))
Georg Brandl24420152008-05-26 16:32:26 +0000744 r.append("</ul>\n<hr>\n</body>\n</html>\n")
745 enc = sys.getfilesystemencoding()
746 encoded = ''.join(r).encode(enc)
747 f = io.BytesIO()
748 f.write(encoded)
749 f.seek(0)
750 self.send_response(200)
751 self.send_header("Content-type", "text/html; charset=%s" % enc)
752 self.send_header("Content-Length", str(len(encoded)))
753 self.end_headers()
754 return f
755
756 def translate_path(self, path):
757 """Translate a /-separated PATH to the local filename syntax.
758
759 Components that mean special things to the local file system
760 (e.g. drive or directory names) are ignored. (XXX They should
761 probably be diagnosed.)
762
763 """
764 # abandon query parameters
765 path = path.split('?',1)[0]
766 path = path.split('#',1)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000767 path = posixpath.normpath(urllib.parse.unquote(path))
Georg Brandl24420152008-05-26 16:32:26 +0000768 words = path.split('/')
769 words = filter(None, words)
770 path = os.getcwd()
771 for word in words:
772 drive, word = os.path.splitdrive(word)
773 head, word = os.path.split(word)
774 if word in (os.curdir, os.pardir): continue
775 path = os.path.join(path, word)
776 return path
777
778 def copyfile(self, source, outputfile):
779 """Copy all data between two file objects.
780
781 The SOURCE argument is a file object open for reading
782 (or anything with a read() method) and the DESTINATION
783 argument is a file object open for writing (or
784 anything with a write() method).
785
786 The only reason for overriding this would be to change
787 the block size or perhaps to replace newlines by CRLF
788 -- note however that this the default server uses this
789 to copy binary data as well.
790
791 """
792 shutil.copyfileobj(source, outputfile)
793
794 def guess_type(self, path):
795 """Guess the type of a file.
796
797 Argument is a PATH (a filename).
798
799 Return value is a string of the form type/subtype,
800 usable for a MIME Content-type header.
801
802 The default implementation looks the file's extension
803 up in the table self.extensions_map, using application/octet-stream
804 as a default; however it would be permissible (if
805 slow) to look inside the data to make a better guess.
806
807 """
808
809 base, ext = posixpath.splitext(path)
810 if ext in self.extensions_map:
811 return self.extensions_map[ext]
812 ext = ext.lower()
813 if ext in self.extensions_map:
814 return self.extensions_map[ext]
815 else:
816 return self.extensions_map['']
817
818 if not mimetypes.inited:
819 mimetypes.init() # try to read system mime.types
820 extensions_map = mimetypes.types_map.copy()
821 extensions_map.update({
822 '': 'application/octet-stream', # Default
823 '.py': 'text/plain',
824 '.c': 'text/plain',
825 '.h': 'text/plain',
826 })
827
828
829# Utilities for CGIHTTPRequestHandler
830
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000831# TODO(gregory.p.smith): Move this into an appropriate library.
832def _url_collapse_path_split(path):
833 """
834 Given a URL path, remove extra '/'s and '.' path elements and collapse
835 any '..' references.
836
837 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
838
839 Returns: A tuple of (head, tail) where tail is everything after the final /
840 and head is everything before it. Head will always start with a '/' and,
841 if it contains anything else, never have a trailing '/'.
842
843 Raises: IndexError if too many '..' occur within the path.
844 """
845 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
846 # path semantics rather than local operating system semantics.
847 path_parts = []
848 for part in path.split('/'):
849 if part == '.':
850 path_parts.append('')
851 else:
852 path_parts.append(part)
853 # Filter out blank non trailing parts before consuming the '..'.
854 path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
855 if path_parts:
856 tail_part = path_parts.pop()
857 else:
858 tail_part = ''
859 head_parts = []
860 for part in path_parts:
861 if part == '..':
862 head_parts.pop()
863 else:
864 head_parts.append(part)
865 if tail_part and tail_part == '..':
866 head_parts.pop()
867 tail_part = ''
868 return ('/' + '/'.join(head_parts), tail_part)
869
870
Georg Brandl24420152008-05-26 16:32:26 +0000871nobody = None
872
873def nobody_uid():
874 """Internal routine to get nobody's uid"""
875 global nobody
876 if nobody:
877 return nobody
878 try:
879 import pwd
880 except ImportError:
881 return -1
882 try:
883 nobody = pwd.getpwnam('nobody')[2]
884 except KeyError:
Georg Brandlcbd2ab12010-12-04 10:39:14 +0000885 nobody = 1 + max(x[2] for x in pwd.getpwall())
Georg Brandl24420152008-05-26 16:32:26 +0000886 return nobody
887
888
889def executable(path):
890 """Test for executable file."""
891 try:
892 st = os.stat(path)
893 except os.error:
894 return False
895 return st.st_mode & 0o111 != 0
896
897
898class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
899
900 """Complete HTTP server with GET, HEAD and POST commands.
901
902 GET and HEAD also support running CGI scripts.
903
904 The POST command is *only* implemented for CGI scripts.
905
906 """
907
908 # Determine platform specifics
909 have_fork = hasattr(os, 'fork')
Georg Brandl24420152008-05-26 16:32:26 +0000910
911 # Make rfile unbuffered -- we need to read one line and then pass
912 # the rest to a subprocess, so we can't use buffered input.
913 rbufsize = 0
914
915 def do_POST(self):
916 """Serve a POST request.
917
918 This is only implemented for CGI scripts.
919
920 """
921
922 if self.is_cgi():
923 self.run_cgi()
924 else:
925 self.send_error(501, "Can only POST to CGI scripts")
926
927 def send_head(self):
928 """Version of send_head that support CGI scripts"""
929 if self.is_cgi():
930 return self.run_cgi()
931 else:
932 return SimpleHTTPRequestHandler.send_head(self)
933
934 def is_cgi(self):
935 """Test whether self.path corresponds to a CGI script.
936
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000937 Returns True and updates the cgi_info attribute to the tuple
938 (dir, rest) if self.path requires running a CGI script.
939 Returns False otherwise.
Georg Brandl24420152008-05-26 16:32:26 +0000940
Benjamin Petersona7deeee2009-05-08 20:54:42 +0000941 If any exception is raised, the caller should assume that
942 self.path was rejected as invalid and act accordingly.
943
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000944 The default implementation tests whether the normalized url
945 path begins with one of the strings in self.cgi_directories
946 (and the next character is a '/' or the end of the string).
Georg Brandl24420152008-05-26 16:32:26 +0000947
948 """
949
Benjamin Petersonad71f0f2009-04-11 20:12:10 +0000950 splitpath = _url_collapse_path_split(self.path)
951 if splitpath[0] in self.cgi_directories:
952 self.cgi_info = splitpath
953 return True
Georg Brandl24420152008-05-26 16:32:26 +0000954 return False
955
956 cgi_directories = ['/cgi-bin', '/htbin']
957
958 def is_executable(self, path):
959 """Test whether argument path is an executable file."""
960 return executable(path)
961
962 def is_python(self, path):
963 """Test whether argument path is a Python script."""
964 head, tail = os.path.splitext(path)
965 return tail.lower() in (".py", ".pyw")
966
967 def run_cgi(self):
968 """Execute a CGI script."""
969 path = self.path
970 dir, rest = self.cgi_info
971
972 i = path.find('/', len(dir) + 1)
973 while i >= 0:
974 nextdir = path[:i]
975 nextrest = path[i+1:]
976
977 scriptdir = self.translate_path(nextdir)
978 if os.path.isdir(scriptdir):
979 dir, rest = nextdir, nextrest
980 i = path.find('/', len(dir) + 1)
981 else:
982 break
983
984 # find an explicit query string, if present.
985 i = rest.rfind('?')
986 if i >= 0:
987 rest, query = rest[:i], rest[i+1:]
988 else:
989 query = ''
990
991 # dissect the part after the directory name into a script name &
992 # a possible additional path, to be stored in PATH_INFO.
993 i = rest.find('/')
994 if i >= 0:
995 script, rest = rest[:i], rest[i:]
996 else:
997 script, rest = rest, ''
998
999 scriptname = dir + '/' + script
1000 scriptfile = self.translate_path(scriptname)
1001 if not os.path.exists(scriptfile):
1002 self.send_error(404, "No such CGI script (%r)" % scriptname)
1003 return
1004 if not os.path.isfile(scriptfile):
1005 self.send_error(403, "CGI script is not a plain file (%r)" %
1006 scriptname)
1007 return
1008 ispy = self.is_python(scriptname)
1009 if not ispy:
Georg Brandl24420152008-05-26 16:32:26 +00001010 if not self.is_executable(scriptfile):
1011 self.send_error(403, "CGI script is not executable (%r)" %
1012 scriptname)
1013 return
1014
1015 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1016 # XXX Much of the following could be prepared ahead of time!
Senthil Kumaran42713722010-10-03 17:55:45 +00001017 env = copy.deepcopy(os.environ)
Georg Brandl24420152008-05-26 16:32:26 +00001018 env['SERVER_SOFTWARE'] = self.version_string()
1019 env['SERVER_NAME'] = self.server.server_name
1020 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1021 env['SERVER_PROTOCOL'] = self.protocol_version
1022 env['SERVER_PORT'] = str(self.server.server_port)
1023 env['REQUEST_METHOD'] = self.command
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001024 uqrest = urllib.parse.unquote(rest)
Georg Brandl24420152008-05-26 16:32:26 +00001025 env['PATH_INFO'] = uqrest
1026 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1027 env['SCRIPT_NAME'] = scriptname
1028 if query:
1029 env['QUERY_STRING'] = query
1030 host = self.address_string()
1031 if host != self.client_address[0]:
1032 env['REMOTE_HOST'] = host
1033 env['REMOTE_ADDR'] = self.client_address[0]
Barry Warsaw820c1202008-06-12 04:06:45 +00001034 authorization = self.headers.get("authorization")
Georg Brandl24420152008-05-26 16:32:26 +00001035 if authorization:
1036 authorization = authorization.split()
1037 if len(authorization) == 2:
1038 import base64, binascii
1039 env['AUTH_TYPE'] = authorization[0]
1040 if authorization[0].lower() == "basic":
1041 try:
1042 authorization = authorization[1].encode('ascii')
Georg Brandl706824f2009-06-04 09:42:55 +00001043 authorization = base64.decodebytes(authorization).\
Georg Brandl24420152008-05-26 16:32:26 +00001044 decode('ascii')
1045 except (binascii.Error, UnicodeError):
1046 pass
1047 else:
1048 authorization = authorization.split(':')
1049 if len(authorization) == 2:
1050 env['REMOTE_USER'] = authorization[0]
1051 # XXX REMOTE_IDENT
Barry Warsaw820c1202008-06-12 04:06:45 +00001052 if self.headers.get('content-type') is None:
1053 env['CONTENT_TYPE'] = self.headers.get_content_type()
Georg Brandl24420152008-05-26 16:32:26 +00001054 else:
Barry Warsaw820c1202008-06-12 04:06:45 +00001055 env['CONTENT_TYPE'] = self.headers['content-type']
1056 length = self.headers.get('content-length')
Georg Brandl24420152008-05-26 16:32:26 +00001057 if length:
1058 env['CONTENT_LENGTH'] = length
Barry Warsaw820c1202008-06-12 04:06:45 +00001059 referer = self.headers.get('referer')
Georg Brandl24420152008-05-26 16:32:26 +00001060 if referer:
1061 env['HTTP_REFERER'] = referer
1062 accept = []
1063 for line in self.headers.getallmatchingheaders('accept'):
1064 if line[:1] in "\t\n\r ":
1065 accept.append(line.strip())
1066 else:
1067 accept = accept + line[7:].split(',')
1068 env['HTTP_ACCEPT'] = ','.join(accept)
Barry Warsaw820c1202008-06-12 04:06:45 +00001069 ua = self.headers.get('user-agent')
Georg Brandl24420152008-05-26 16:32:26 +00001070 if ua:
1071 env['HTTP_USER_AGENT'] = ua
Barry Warsaw820c1202008-06-12 04:06:45 +00001072 co = filter(None, self.headers.get_all('cookie', []))
Georg Brandl62e2ca22010-07-31 21:54:24 +00001073 cookie_str = ', '.join(co)
1074 if cookie_str:
1075 env['HTTP_COOKIE'] = cookie_str
Georg Brandl24420152008-05-26 16:32:26 +00001076 # XXX Other HTTP_* headers
1077 # Since we're setting the env in the parent, provide empty
1078 # values to override previously set values
1079 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1080 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1081 env.setdefault(k, "")
Georg Brandl24420152008-05-26 16:32:26 +00001082
1083 self.send_response(200, "Script output follows")
1084
1085 decoded_query = query.replace('+', ' ')
1086
1087 if self.have_fork:
1088 # Unix -- fork as we should
1089 args = [script]
1090 if '=' not in decoded_query:
1091 args.append(decoded_query)
1092 nobody = nobody_uid()
1093 self.wfile.flush() # Always flush before forking
1094 pid = os.fork()
1095 if pid != 0:
1096 # Parent
1097 pid, sts = os.waitpid(pid, 0)
1098 # throw away additional data [see bug #427345]
1099 while select.select([self.rfile], [], [], 0)[0]:
1100 if not self.rfile.read(1):
1101 break
1102 if sts:
1103 self.log_error("CGI script exit status %#x", sts)
1104 return
1105 # Child
1106 try:
1107 try:
1108 os.setuid(nobody)
1109 except os.error:
1110 pass
1111 os.dup2(self.rfile.fileno(), 0)
1112 os.dup2(self.wfile.fileno(), 1)
Senthil Kumaran42713722010-10-03 17:55:45 +00001113 os.execve(scriptfile, args, env)
Georg Brandl24420152008-05-26 16:32:26 +00001114 except:
1115 self.server.handle_error(self.request, self.client_address)
1116 os._exit(127)
1117
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001118 else:
1119 # Non-Unix -- use subprocess
1120 import subprocess
Senthil Kumarane29cd162009-11-11 04:17:53 +00001121 cmdline = [scriptfile]
Georg Brandl24420152008-05-26 16:32:26 +00001122 if self.is_python(scriptfile):
1123 interp = sys.executable
1124 if interp.lower().endswith("w.exe"):
1125 # On Windows, use python.exe, not pythonw.exe
1126 interp = interp[:-5] + interp[-4:]
Senthil Kumarane29cd162009-11-11 04:17:53 +00001127 cmdline = [interp, '-u'] + cmdline
1128 if '=' not in query:
1129 cmdline.append(query)
1130 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
Georg Brandl24420152008-05-26 16:32:26 +00001131 try:
1132 nbytes = int(length)
1133 except (TypeError, ValueError):
1134 nbytes = 0
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001135 p = subprocess.Popen(cmdline,
1136 stdin=subprocess.PIPE,
1137 stdout=subprocess.PIPE,
Senthil Kumaran42713722010-10-03 17:55:45 +00001138 stderr=subprocess.PIPE,
1139 env = env
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001140 )
Georg Brandl24420152008-05-26 16:32:26 +00001141 if self.command.lower() == "post" and nbytes > 0:
1142 data = self.rfile.read(nbytes)
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001143 else:
1144 data = None
Georg Brandl24420152008-05-26 16:32:26 +00001145 # throw away additional data [see bug #427345]
1146 while select.select([self.rfile._sock], [], [], 0)[0]:
1147 if not self.rfile._sock.recv(1):
1148 break
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001149 stdout, stderr = p.communicate(data)
1150 self.wfile.write(stdout)
1151 if stderr:
1152 self.log_error('%s', stderr)
Brian Curtincbad4df2010-11-05 15:04:48 +00001153 p.stderr.close()
1154 p.stdout.close()
Amaury Forgeot d'Arccb0d2d72008-06-18 22:19:22 +00001155 status = p.returncode
1156 if status:
1157 self.log_error("CGI script exit status %#x", status)
Georg Brandl24420152008-05-26 16:32:26 +00001158 else:
1159 self.log_message("CGI script exited OK")
1160
1161
1162def test(HandlerClass = BaseHTTPRequestHandler,
1163 ServerClass = HTTPServer, protocol="HTTP/1.0"):
1164 """Test the HTTP request handler class.
1165
1166 This runs an HTTP server on port 8000 (or the first command line
1167 argument).
1168
1169 """
1170
1171 if sys.argv[1:]:
1172 port = int(sys.argv[1])
1173 else:
1174 port = 8000
1175 server_address = ('', port)
1176
1177 HandlerClass.protocol_version = protocol
1178 httpd = ServerClass(server_address, HandlerClass)
1179
1180 sa = httpd.socket.getsockname()
1181 print("Serving HTTP on", sa[0], "port", sa[1], "...")
Alexandre Vassalottib5292a22009-04-03 07:16:55 +00001182 try:
1183 httpd.serve_forever()
1184 except KeyboardInterrupt:
1185 print("\nKeyboard interrupt received, exiting.")
1186 httpd.server_close()
1187 sys.exit(0)
Georg Brandl24420152008-05-26 16:32:26 +00001188
1189if __name__ == '__main__':
Georg Brandl24420152008-05-26 16:32:26 +00001190 test(HandlerClass=SimpleHTTPRequestHandler)