blob: e496d6757829dbdd313718a32b666da719f23797 [file] [log] [blame]
Guido van Rossume7e578f1995-08-04 04:00:20 +00001"""HTTP server base class.
2
3Note: the class in this module doesn't implement any HTTP request; see
4SimpleHTTPServer for simple implementations of GET, HEAD and POST
5(including CGI scripts).
6
7Contents:
8
9- BaseHTTPRequestHandler: HTTP request handler base class
10- test: test function
11
12XXX To do:
13
14- send server version
15- log requests even later (to capture byte count)
16- log user-agent header and other interesting goodies
17- send error log to separate file
18- are request names really case sensitive?
19
20"""
21
22
23# See also:
24#
25# HTTP Working Group T. Berners-Lee
26# INTERNET-DRAFT R. T. Fielding
27# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
28# Expires September 8, 1995 March 8, 1995
29#
30# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
31
32
33# Log files
34# ---------
35#
36# Here's a quote from the NCSA httpd docs about log file format.
37#
38# | The logfile format is as follows. Each line consists of:
39# |
40# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
41# |
42# | host: Either the DNS name or the IP number of the remote client
43# | rfc931: Any information returned by identd for this person,
44# | - otherwise.
45# | authuser: If user sent a userid for authentication, the user name,
46# | - otherwise.
47# | DD: Day
48# | Mon: Month (calendar name)
49# | YYYY: Year
50# | hh: hour (24-hour format, the machine's timezone)
51# | mm: minutes
52# | ss: seconds
53# | request: The first line of the HTTP request as sent by the client.
54# | ddd: the status code returned by the server, - if not available.
55# | bbbb: the total number of bytes sent,
56# | *not including the HTTP/1.0 header*, - if not available
57# |
58# | You can determine the name of the file accessed through request.
59#
60# (Actually, the latter is only true if you know the server configuration
61# at the time the request was made!)
62
63
64__version__ = "0.2"
65
66
67import sys
68import time
69import socket # For gethostbyaddr()
70import string
Guido van Rossume7e578f1995-08-04 04:00:20 +000071import mimetools
72import SocketServer
73
74# Default error message
75DEFAULT_ERROR_MESSAGE = """\
76<head>
77<title>Error response</title>
78</head>
79<body>
80<h1>Error response</h1>
81<p>Error code %(code)d.
82<p>Message: %(message)s.
83<p>Error code explanation: %(code)s = %(explain)s.
84</body>
85"""
86
87
88class HTTPServer(SocketServer.TCPServer):
89
90 def server_bind(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000091 """Override server_bind to store the server name."""
92 SocketServer.TCPServer.server_bind(self)
93 host, port = self.socket.getsockname()
94 if not host or host == '0.0.0.0':
95 host = socket.gethostname()
96 hostname, hostnames, hostaddrs = socket.gethostbyaddr(host)
97 if '.' not in hostname:
98 for host in hostnames:
99 if '.' in host:
100 hostname = host
101 break
102 self.server_name = hostname
103 self.server_port = port
Guido van Rossume7e578f1995-08-04 04:00:20 +0000104
105
106class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
107
108 """HTTP request handler base class.
109
110 The following explanation of HTTP serves to guide you through the
111 code as well as to expose any misunderstandings I may have about
112 HTTP (so you don't need to read the code to figure out I'm wrong
113 :-).
114
115 HTTP (HyperText Transfer Protocol) is an extensible protocol on
116 top of a reliable stream transport (e.g. TCP/IP). The protocol
117 recognizes three parts to a request:
118
119 1. One line identifying the request type and path
120 2. An optional set of RFC-822-style headers
121 3. An optional data part
122
123 The headers and data are separated by a blank line.
124
125 The first line of the request has the form
126
127 <command> <path> <version>
128
129 where <command> is a (case-sensitive) keyword such as GET or POST,
130 <path> is a string containing path information for the request,
131 and <version> should be the string "HTTP/1.0". <path> is encoded
132 using the URL encoding scheme (using %xx to signify the ASCII
133 character with hex code xx).
134
135 The protocol is vague about whether lines are separated by LF
136 characters or by CRLF pairs -- for compatibility with the widest
137 range of clients, both should be accepted. Similarly, whitespace
138 in the request line should be treated sensibly (allowing multiple
139 spaces between components and allowing trailing whitespace).
140
141 Similarly, for output, lines ought to be separated by CRLF pairs
142 but most clients grok LF characters just fine.
143
144 If the first line of the request has the form
145
146 <command> <path>
147
148 (i.e. <version> is left out) then this is assumed to be an HTTP
149 0.9 request; this form has no optional headers and data part and
150 the reply consists of just the data.
151
152 The reply form of the HTTP 1.0 protocol again has three parts:
153
154 1. One line giving the response code
155 2. An optional set of RFC-822-style headers
156 3. The data
157
158 Again, the headers and data are separated by a blank line.
159
160 The response code line has the form
161
162 <version> <responsecode> <responsestring>
163
164 where <version> is the protocol version (always "HTTP/1.0"),
165 <responsecode> is a 3-digit response code indicating success or
166 failure of the request, and <responsestring> is an optional
167 human-readable string explaining what the response code means.
168
169 This server parses the request and the headers, and then calls a
170 function specific to the request type (<command>). Specifically,
171 a request SPAM will be handled by a method handle_SPAM(). If no
172 such method exists the server sends an error response to the
173 client. If it exists, it is called with no arguments:
174
175 do_SPAM()
176
177 Note that the request name is case sensitive (i.e. SPAM and spam
178 are different requests).
179
180 The various request details are stored in instance variables:
181
182 - client_address is the client IP address in the form (host,
183 port);
184
185 - command, path and version are the broken-down request line;
186
187 - headers is an instance of mimetools.Message (or a derived
188 class) containing the header information;
189
190 - rfile is a file object open for reading positioned at the
191 start of the optional input data part;
192
193 - wfile is a file object open for writing.
194
195 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
196
197 The first thing to be written must be the response line. Then
198 follow 0 or more header lines, then a blank line, and then the
199 actual data (if any). The meaning of the header lines depends on
200 the command executed by the server; in most cases, when data is
201 returned, there should be at least one header line of the form
202
203 Content-type: <type>/<subtype>
204
205 where <type> and <subtype> should be registered MIME types,
206 e.g. "text/html" or "text/plain".
207
208 """
209
210 # The Python system version, truncated to its first component.
211 sys_version = "Python/" + string.split(sys.version)[0]
212
213 # The server software version. You may want to override this.
214 # The format is multiple whitespace-separated strings,
215 # where each string is of the form name[/version].
216 server_version = "BaseHTTP/" + __version__
217
218 def handle(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000219 """Handle a single HTTP request.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000220
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000221 You normally don't need to override this method; see the class
222 __doc__ string for information on how to handle specific HTTP
223 commands such as GET and POST.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000224
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000225 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000226
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000227 self.raw_requestline = self.rfile.readline()
228 self.request_version = version = "HTTP/0.9" # Default
229 requestline = self.raw_requestline
230 if requestline[-2:] == '\r\n':
231 requestline = requestline[:-2]
232 elif requestline[-1:] == '\n':
233 requestline = requestline[:-1]
234 self.requestline = requestline
235 words = string.split(requestline)
236 if len(words) == 3:
237 [command, path, version] = words
238 if version[:5] != 'HTTP/':
239 self.send_error(400, "Bad request version (%s)" % `version`)
240 return
241 elif len(words) == 2:
242 [command, path] = words
243 if command != 'GET':
244 self.send_error(400,
245 "Bad HTTP/0.9 request type (%s)" % `command`)
246 return
247 else:
248 self.send_error(400, "Bad request syntax (%s)" % `requestline`)
249 return
250 self.command, self.path, self.request_version = command, path, version
251 self.headers = self.MessageClass(self.rfile, 0)
252 mname = 'do_' + command
253 if not hasattr(self, mname):
Guido van Rossum60e73301999-03-30 20:17:31 +0000254 self.send_error(501, "Unsupported method (%s)" % `command`)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000255 return
256 method = getattr(self, mname)
257 method()
Guido van Rossume7e578f1995-08-04 04:00:20 +0000258
259 def send_error(self, code, message=None):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000260 """Send and log an error reply.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000261
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000262 Arguments are the error code, and a detailed message.
263 The detailed message defaults to the short entry matching the
264 response code.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000265
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000266 This sends an error response (so it must be called before any
267 output has been generated), logs the error, and finally sends
268 a piece of HTML explaining the error to the user.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000269
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000270 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000271
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000272 try:
273 short, long = self.responses[code]
274 except KeyError:
275 short, long = '???', '???'
276 if not message:
277 message = short
278 explain = long
279 self.log_error("code %d, message %s", code, message)
280 self.send_response(code, message)
281 self.end_headers()
282 self.wfile.write(self.error_message_format %
283 {'code': code,
284 'message': message,
285 'explain': explain})
Guido van Rossume7e578f1995-08-04 04:00:20 +0000286
287 error_message_format = DEFAULT_ERROR_MESSAGE
288
289 def send_response(self, code, message=None):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000290 """Send the response header and log the response code.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000291
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000292 Also send two standard headers with the server software
293 version and the current date.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000294
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000295 """
296 self.log_request(code)
297 if message is None:
298 if self.responses.has_key(code):
299 message = self.responses[code][0]
300 else:
301 message = ''
302 if self.request_version != 'HTTP/0.9':
303 self.wfile.write("%s %s %s\r\n" %
304 (self.protocol_version, str(code), message))
305 self.send_header('Server', self.version_string())
306 self.send_header('Date', self.date_time_string())
Guido van Rossume7e578f1995-08-04 04:00:20 +0000307
308 def send_header(self, keyword, value):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000309 """Send a MIME header."""
310 if self.request_version != 'HTTP/0.9':
311 self.wfile.write("%s: %s\r\n" % (keyword, value))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000312
313 def end_headers(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000314 """Send the blank line ending the MIME headers."""
315 if self.request_version != 'HTTP/0.9':
316 self.wfile.write("\r\n")
Guido van Rossume7e578f1995-08-04 04:00:20 +0000317
318 def log_request(self, code='-', size='-'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000319 """Log an accepted request.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000320
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000321 This is called by send_reponse().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000322
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000323 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000324
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000325 self.log_message('"%s" %s %s',
326 self.requestline, str(code), str(size))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000327
328 def log_error(self, *args):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000329 """Log an error.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000330
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000331 This is called when a request cannot be fulfilled. By
332 default it passes the message on to log_message().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000333
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000334 Arguments are the same as for log_message().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000335
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000336 XXX This should go to the separate error log.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000337
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000338 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000339
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000340 apply(self.log_message, args)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000341
342 def log_message(self, format, *args):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000343 """Log an arbitrary message.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000344
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000345 This is used by all other logging functions. Override
346 it if you have specific logging wishes.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000347
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000348 The first argument, FORMAT, is a format string for the
349 message to be logged. If the format string contains
350 any % escapes requiring parameters, they should be
351 specified as subsequent arguments (it's just like
352 printf!).
Guido van Rossume7e578f1995-08-04 04:00:20 +0000353
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000354 The client host and current date/time are prefixed to
355 every message.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000356
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000357 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000358
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000359 sys.stderr.write("%s - - [%s] %s\n" %
360 (self.address_string(),
361 self.log_date_time_string(),
362 format%args))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000363
364 def version_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000365 """Return the server software version string."""
366 return self.server_version + ' ' + self.sys_version
Guido van Rossume7e578f1995-08-04 04:00:20 +0000367
368 def date_time_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000369 """Return the current date and time formatted for a message header."""
370 now = time.time()
371 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
372 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
373 self.weekdayname[wd],
374 day, self.monthname[month], year,
375 hh, mm, ss)
376 return s
Guido van Rossume7e578f1995-08-04 04:00:20 +0000377
378 def log_date_time_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000379 """Return the current time formatted for logging."""
380 now = time.time()
381 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
382 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
383 day, self.monthname[month], year, hh, mm, ss)
384 return s
Guido van Rossume7e578f1995-08-04 04:00:20 +0000385
386 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
387
388 monthname = [None,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000389 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
390 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossume7e578f1995-08-04 04:00:20 +0000391
392 def address_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000393 """Return the client address formatted for logging.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000394
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000395 This version looks up the full hostname using gethostbyaddr(),
396 and tries to find a name that contains at least one dot.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000397
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000398 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000399
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000400 (host, port) = self.client_address
401 try:
402 name, names, addresses = socket.gethostbyaddr(host)
403 except socket.error, msg:
404 return host
405 names.insert(0, name)
406 for name in names:
407 if '.' in name: return name
408 return names[0]
Guido van Rossume7e578f1995-08-04 04:00:20 +0000409
410
411 # Essentially static class variables
412
413 # The version of the HTTP protocol we support.
414 # Don't override unless you know what you're doing (hint: incoming
415 # requests are required to have exactly this version string).
416 protocol_version = "HTTP/1.0"
417
418 # The Message-like class used to parse headers
419 MessageClass = mimetools.Message
420
421 # Table mapping response codes to messages; entries have the
422 # form {code: (shortmessage, longmessage)}.
423 # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
424 responses = {
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000425 200: ('OK', 'Request fulfilled, document follows'),
426 201: ('Created', 'Document created, URL follows'),
427 202: ('Accepted',
428 'Request accepted, processing continues off-line'),
429 203: ('Partial information', 'Request fulfilled from cache'),
430 204: ('No response', 'Request fulfilled, nothing follows'),
431
432 301: ('Moved', 'Object moved permanently -- see URI list'),
433 302: ('Found', 'Object moved temporarily -- see URI list'),
434 303: ('Method', 'Object moved -- see Method and URL list'),
435 304: ('Not modified',
436 'Document has not changed singe given time'),
437
438 400: ('Bad request',
439 'Bad request syntax or unsupported method'),
440 401: ('Unauthorized',
441 'No permission -- see authorization schemes'),
442 402: ('Payment required',
443 'No payment -- see charging schemes'),
444 403: ('Forbidden',
445 'Request forbidden -- authorization will not help'),
446 404: ('Not found', 'Nothing matches the given URI'),
447
448 500: ('Internal error', 'Server got itself in trouble'),
449 501: ('Not implemented',
450 'Server does not support this operation'),
451 502: ('Service temporarily overloaded',
452 'The server cannot process the request due to a high load'),
453 503: ('Gateway timeout',
454 'The gateway server did not receive a timely response'),
455
456 }
Guido van Rossume7e578f1995-08-04 04:00:20 +0000457
458
459def test(HandlerClass = BaseHTTPRequestHandler,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000460 ServerClass = HTTPServer):
Guido van Rossume7e578f1995-08-04 04:00:20 +0000461 """Test the HTTP request handler class.
462
463 This runs an HTTP server on port 8000 (or the first command line
464 argument).
465
466 """
467
468 if sys.argv[1:]:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000469 port = string.atoi(sys.argv[1])
Guido van Rossume7e578f1995-08-04 04:00:20 +0000470 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000471 port = 8000
Guido van Rossume7e578f1995-08-04 04:00:20 +0000472 server_address = ('', port)
473
474 httpd = ServerClass(server_address, HandlerClass)
475
476 print "Serving HTTP on port", port, "..."
477 httpd.serve_forever()
478
479
480if __name__ == '__main__':
481 test()