blob: 2f17938feb245b2ea0c1d4457adb09a31c847a4d [file] [log] [blame]
Guido van Rossume7e578f1995-08-04 04:00:20 +00001"""HTTP server base class.
2
3Note: the class in this module doesn't implement any HTTP request; see
4SimpleHTTPServer for simple implementations of GET, HEAD and POST
5(including CGI scripts).
6
7Contents:
8
9- BaseHTTPRequestHandler: HTTP request handler base class
10- test: test function
11
12XXX To do:
13
14- send server version
15- log requests even later (to capture byte count)
16- log user-agent header and other interesting goodies
17- send error log to separate file
18- are request names really case sensitive?
19
20"""
21
22
23# See also:
24#
25# HTTP Working Group T. Berners-Lee
26# INTERNET-DRAFT R. T. Fielding
27# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
28# Expires September 8, 1995 March 8, 1995
29#
30# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
31
32
33# Log files
34# ---------
Tim Peters11cf6052001-01-14 21:54:20 +000035#
Guido van Rossume7e578f1995-08-04 04:00:20 +000036# Here's a quote from the NCSA httpd docs about log file format.
Tim Peters11cf6052001-01-14 21:54:20 +000037#
38# | The logfile format is as follows. Each line consists of:
39# |
40# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
41# |
42# | host: Either the DNS name or the IP number of the remote client
Guido van Rossume7e578f1995-08-04 04:00:20 +000043# | rfc931: Any information returned by identd for this person,
Tim Peters11cf6052001-01-14 21:54:20 +000044# | - otherwise.
Guido van Rossume7e578f1995-08-04 04:00:20 +000045# | authuser: If user sent a userid for authentication, the user name,
Tim Peters11cf6052001-01-14 21:54:20 +000046# | - otherwise.
47# | DD: Day
48# | Mon: Month (calendar name)
49# | YYYY: Year
50# | hh: hour (24-hour format, the machine's timezone)
51# | mm: minutes
52# | ss: seconds
53# | request: The first line of the HTTP request as sent by the client.
54# | ddd: the status code returned by the server, - if not available.
Guido van Rossume7e578f1995-08-04 04:00:20 +000055# | bbbb: the total number of bytes sent,
Tim Peters11cf6052001-01-14 21:54:20 +000056# | *not including the HTTP/1.0 header*, - if not available
57# |
Guido van Rossume7e578f1995-08-04 04:00:20 +000058# | You can determine the name of the file accessed through request.
Tim Peters11cf6052001-01-14 21:54:20 +000059#
Guido van Rossume7e578f1995-08-04 04:00:20 +000060# (Actually, the latter is only true if you know the server configuration
61# at the time the request was made!)
62
63
64__version__ = "0.2"
65
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000066__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
Guido van Rossume7e578f1995-08-04 04:00:20 +000067
68import sys
69import time
70import socket # For gethostbyaddr()
71import string
Guido van Rossume7e578f1995-08-04 04:00:20 +000072import mimetools
73import SocketServer
74
75# Default error message
76DEFAULT_ERROR_MESSAGE = """\
77<head>
78<title>Error response</title>
79</head>
80<body>
81<h1>Error response</h1>
82<p>Error code %(code)d.
83<p>Message: %(message)s.
84<p>Error code explanation: %(code)s = %(explain)s.
85</body>
86"""
87
88
89class HTTPServer(SocketServer.TCPServer):
90
Guido van Rossum18865de2000-05-09 14:54:13 +000091 allow_reuse_address = 1 # Seems to make sense in testing environment
92
Guido van Rossume7e578f1995-08-04 04:00:20 +000093 def server_bind(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000094 """Override server_bind to store the server name."""
95 SocketServer.TCPServer.server_bind(self)
96 host, port = self.socket.getsockname()
Peter Schneider-Kamp2d2785a2000-08-16 20:30:21 +000097 self.server_name = socket.getfqdn(host)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000098 self.server_port = port
Guido van Rossume7e578f1995-08-04 04:00:20 +000099
100
101class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
102
103 """HTTP request handler base class.
104
105 The following explanation of HTTP serves to guide you through the
106 code as well as to expose any misunderstandings I may have about
107 HTTP (so you don't need to read the code to figure out I'm wrong
108 :-).
109
110 HTTP (HyperText Transfer Protocol) is an extensible protocol on
111 top of a reliable stream transport (e.g. TCP/IP). The protocol
112 recognizes three parts to a request:
113
114 1. One line identifying the request type and path
115 2. An optional set of RFC-822-style headers
116 3. An optional data part
117
118 The headers and data are separated by a blank line.
119
120 The first line of the request has the form
121
122 <command> <path> <version>
123
124 where <command> is a (case-sensitive) keyword such as GET or POST,
125 <path> is a string containing path information for the request,
126 and <version> should be the string "HTTP/1.0". <path> is encoded
127 using the URL encoding scheme (using %xx to signify the ASCII
128 character with hex code xx).
129
130 The protocol is vague about whether lines are separated by LF
131 characters or by CRLF pairs -- for compatibility with the widest
132 range of clients, both should be accepted. Similarly, whitespace
133 in the request line should be treated sensibly (allowing multiple
134 spaces between components and allowing trailing whitespace).
135
136 Similarly, for output, lines ought to be separated by CRLF pairs
137 but most clients grok LF characters just fine.
138
139 If the first line of the request has the form
140
141 <command> <path>
142
143 (i.e. <version> is left out) then this is assumed to be an HTTP
144 0.9 request; this form has no optional headers and data part and
145 the reply consists of just the data.
146
147 The reply form of the HTTP 1.0 protocol again has three parts:
148
149 1. One line giving the response code
150 2. An optional set of RFC-822-style headers
151 3. The data
152
153 Again, the headers and data are separated by a blank line.
154
155 The response code line has the form
156
157 <version> <responsecode> <responsestring>
158
159 where <version> is the protocol version (always "HTTP/1.0"),
160 <responsecode> is a 3-digit response code indicating success or
161 failure of the request, and <responsestring> is an optional
162 human-readable string explaining what the response code means.
163
164 This server parses the request and the headers, and then calls a
165 function specific to the request type (<command>). Specifically,
Guido van Rossumba895d81999-09-15 15:28:25 +0000166 a request SPAM will be handled by a method do_SPAM(). If no
Guido van Rossume7e578f1995-08-04 04:00:20 +0000167 such method exists the server sends an error response to the
168 client. If it exists, it is called with no arguments:
169
170 do_SPAM()
171
172 Note that the request name is case sensitive (i.e. SPAM and spam
173 are different requests).
174
175 The various request details are stored in instance variables:
176
177 - client_address is the client IP address in the form (host,
178 port);
179
180 - command, path and version are the broken-down request line;
181
182 - headers is an instance of mimetools.Message (or a derived
183 class) containing the header information;
184
185 - rfile is a file object open for reading positioned at the
186 start of the optional input data part;
187
188 - wfile is a file object open for writing.
189
190 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
191
192 The first thing to be written must be the response line. Then
193 follow 0 or more header lines, then a blank line, and then the
194 actual data (if any). The meaning of the header lines depends on
195 the command executed by the server; in most cases, when data is
196 returned, there should be at least one header line of the form
197
198 Content-type: <type>/<subtype>
199
200 where <type> and <subtype> should be registered MIME types,
201 e.g. "text/html" or "text/plain".
202
203 """
204
205 # The Python system version, truncated to its first component.
206 sys_version = "Python/" + string.split(sys.version)[0]
207
208 # The server software version. You may want to override this.
209 # The format is multiple whitespace-separated strings,
210 # where each string is of the form name[/version].
211 server_version = "BaseHTTP/" + __version__
212
Guido van Rossumd65b5391999-10-26 13:01:36 +0000213 def parse_request(self):
214 """Parse a request (internal).
Guido van Rossume7e578f1995-08-04 04:00:20 +0000215
Guido van Rossumd65b5391999-10-26 13:01:36 +0000216 The request should be stored in self.raw_request; the results
217 are in self.command, self.path, self.request_version and
218 self.headers.
219
220 Return value is 1 for success, 0 for failure; on failure, an
221 error is sent back.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000222
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000223 """
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000224 self.request_version = version = "HTTP/0.9" # Default
225 requestline = self.raw_requestline
226 if requestline[-2:] == '\r\n':
227 requestline = requestline[:-2]
228 elif requestline[-1:] == '\n':
229 requestline = requestline[:-1]
230 self.requestline = requestline
231 words = string.split(requestline)
232 if len(words) == 3:
233 [command, path, version] = words
234 if version[:5] != 'HTTP/':
235 self.send_error(400, "Bad request version (%s)" % `version`)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000236 return 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000237 elif len(words) == 2:
238 [command, path] = words
239 if command != 'GET':
240 self.send_error(400,
241 "Bad HTTP/0.9 request type (%s)" % `command`)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000242 return 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000243 else:
244 self.send_error(400, "Bad request syntax (%s)" % `requestline`)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000245 return 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000246 self.command, self.path, self.request_version = command, path, version
247 self.headers = self.MessageClass(self.rfile, 0)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000248 return 1
249
250 def handle(self):
251 """Handle a single HTTP request.
252
253 You normally don't need to override this method; see the class
254 __doc__ string for information on how to handle specific HTTP
255 commands such as GET and POST.
256
257 """
258
259 self.raw_requestline = self.rfile.readline()
260 if not self.parse_request(): # An error code has been sent, just exit
261 return
262 mname = 'do_' + self.command
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000263 if not hasattr(self, mname):
Guido van Rossumd65b5391999-10-26 13:01:36 +0000264 self.send_error(501, "Unsupported method (%s)" % `self.command`)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000265 return
266 method = getattr(self, mname)
267 method()
Guido van Rossume7e578f1995-08-04 04:00:20 +0000268
269 def send_error(self, code, message=None):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000270 """Send and log an error reply.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000271
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000272 Arguments are the error code, and a detailed message.
273 The detailed message defaults to the short entry matching the
274 response code.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000275
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000276 This sends an error response (so it must be called before any
277 output has been generated), logs the error, and finally sends
278 a piece of HTML explaining the error to the user.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000279
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000280 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000281
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000282 try:
283 short, long = self.responses[code]
284 except KeyError:
285 short, long = '???', '???'
286 if not message:
287 message = short
288 explain = long
289 self.log_error("code %d, message %s", code, message)
290 self.send_response(code, message)
291 self.end_headers()
292 self.wfile.write(self.error_message_format %
293 {'code': code,
294 'message': message,
295 'explain': explain})
Guido van Rossume7e578f1995-08-04 04:00:20 +0000296
297 error_message_format = DEFAULT_ERROR_MESSAGE
298
299 def send_response(self, code, message=None):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000300 """Send the response header and log the response code.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000301
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000302 Also send two standard headers with the server software
303 version and the current date.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000304
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000305 """
306 self.log_request(code)
307 if message is None:
308 if self.responses.has_key(code):
309 message = self.responses[code][0]
310 else:
311 message = ''
312 if self.request_version != 'HTTP/0.9':
313 self.wfile.write("%s %s %s\r\n" %
314 (self.protocol_version, str(code), message))
315 self.send_header('Server', self.version_string())
316 self.send_header('Date', self.date_time_string())
Guido van Rossume7e578f1995-08-04 04:00:20 +0000317
318 def send_header(self, keyword, value):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000319 """Send a MIME header."""
320 if self.request_version != 'HTTP/0.9':
321 self.wfile.write("%s: %s\r\n" % (keyword, value))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000322
323 def end_headers(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000324 """Send the blank line ending the MIME headers."""
325 if self.request_version != 'HTTP/0.9':
326 self.wfile.write("\r\n")
Guido van Rossume7e578f1995-08-04 04:00:20 +0000327
328 def log_request(self, code='-', size='-'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000329 """Log an accepted request.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000330
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000331 This is called by send_reponse().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000332
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000333 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000334
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000335 self.log_message('"%s" %s %s',
336 self.requestline, str(code), str(size))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000337
338 def log_error(self, *args):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000339 """Log an error.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000340
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000341 This is called when a request cannot be fulfilled. By
342 default it passes the message on to log_message().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000343
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000344 Arguments are the same as for log_message().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000345
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000346 XXX This should go to the separate error log.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000347
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000348 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000349
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000350 apply(self.log_message, args)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000351
352 def log_message(self, format, *args):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000353 """Log an arbitrary message.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000354
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000355 This is used by all other logging functions. Override
356 it if you have specific logging wishes.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000357
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000358 The first argument, FORMAT, is a format string for the
359 message to be logged. If the format string contains
360 any % escapes requiring parameters, they should be
361 specified as subsequent arguments (it's just like
362 printf!).
Guido van Rossume7e578f1995-08-04 04:00:20 +0000363
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000364 The client host and current date/time are prefixed to
365 every message.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000366
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000367 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000368
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000369 sys.stderr.write("%s - - [%s] %s\n" %
370 (self.address_string(),
371 self.log_date_time_string(),
372 format%args))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000373
374 def version_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000375 """Return the server software version string."""
376 return self.server_version + ' ' + self.sys_version
Guido van Rossume7e578f1995-08-04 04:00:20 +0000377
378 def date_time_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000379 """Return the current date and time formatted for a message header."""
380 now = time.time()
381 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
382 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
383 self.weekdayname[wd],
384 day, self.monthname[month], year,
385 hh, mm, ss)
386 return s
Guido van Rossume7e578f1995-08-04 04:00:20 +0000387
388 def log_date_time_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000389 """Return the current time formatted for logging."""
390 now = time.time()
391 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
392 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
393 day, self.monthname[month], year, hh, mm, ss)
394 return s
Guido van Rossume7e578f1995-08-04 04:00:20 +0000395
396 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
397
398 monthname = [None,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000399 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
400 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossume7e578f1995-08-04 04:00:20 +0000401
402 def address_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000403 """Return the client address formatted for logging.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000404
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000405 This version looks up the full hostname using gethostbyaddr(),
406 and tries to find a name that contains at least one dot.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000407
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000408 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000409
Peter Schneider-Kamp2d2785a2000-08-16 20:30:21 +0000410 host, port = self.client_address
411 return socket.getfqdn(host)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000412
413 # Essentially static class variables
414
415 # The version of the HTTP protocol we support.
416 # Don't override unless you know what you're doing (hint: incoming
417 # requests are required to have exactly this version string).
418 protocol_version = "HTTP/1.0"
419
420 # The Message-like class used to parse headers
421 MessageClass = mimetools.Message
422
423 # Table mapping response codes to messages; entries have the
424 # form {code: (shortmessage, longmessage)}.
425 # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
426 responses = {
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000427 200: ('OK', 'Request fulfilled, document follows'),
428 201: ('Created', 'Document created, URL follows'),
429 202: ('Accepted',
430 'Request accepted, processing continues off-line'),
431 203: ('Partial information', 'Request fulfilled from cache'),
432 204: ('No response', 'Request fulfilled, nothing follows'),
Tim Peters11cf6052001-01-14 21:54:20 +0000433
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000434 301: ('Moved', 'Object moved permanently -- see URI list'),
435 302: ('Found', 'Object moved temporarily -- see URI list'),
436 303: ('Method', 'Object moved -- see Method and URL list'),
437 304: ('Not modified',
438 'Document has not changed singe given time'),
Tim Peters11cf6052001-01-14 21:54:20 +0000439
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000440 400: ('Bad request',
441 'Bad request syntax or unsupported method'),
442 401: ('Unauthorized',
443 'No permission -- see authorization schemes'),
444 402: ('Payment required',
445 'No payment -- see charging schemes'),
446 403: ('Forbidden',
447 'Request forbidden -- authorization will not help'),
448 404: ('Not found', 'Nothing matches the given URI'),
Tim Peters11cf6052001-01-14 21:54:20 +0000449
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000450 500: ('Internal error', 'Server got itself in trouble'),
451 501: ('Not implemented',
452 'Server does not support this operation'),
453 502: ('Service temporarily overloaded',
454 'The server cannot process the request due to a high load'),
455 503: ('Gateway timeout',
456 'The gateway server did not receive a timely response'),
Tim Peters11cf6052001-01-14 21:54:20 +0000457
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000458 }
Guido van Rossume7e578f1995-08-04 04:00:20 +0000459
460
461def test(HandlerClass = BaseHTTPRequestHandler,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000462 ServerClass = HTTPServer):
Guido van Rossume7e578f1995-08-04 04:00:20 +0000463 """Test the HTTP request handler class.
464
465 This runs an HTTP server on port 8000 (or the first command line
466 argument).
467
468 """
469
470 if sys.argv[1:]:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000471 port = string.atoi(sys.argv[1])
Guido van Rossume7e578f1995-08-04 04:00:20 +0000472 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000473 port = 8000
Guido van Rossume7e578f1995-08-04 04:00:20 +0000474 server_address = ('', port)
475
476 httpd = ServerClass(server_address, HandlerClass)
477
478 print "Serving HTTP on port", port, "..."
479 httpd.serve_forever()
480
481
482if __name__ == '__main__':
483 test()