blob: ecb40d0d5b133cd68d765e93c96c9e4881a3d79e [file] [log] [blame]
Guido van Rossume7e578f1995-08-04 04:00:20 +00001"""HTTP server base class.
2
3Note: the class in this module doesn't implement any HTTP request; see
4SimpleHTTPServer for simple implementations of GET, HEAD and POST
5(including CGI scripts).
6
7Contents:
8
9- BaseHTTPRequestHandler: HTTP request handler base class
10- test: test function
11
12XXX To do:
13
14- send server version
15- log requests even later (to capture byte count)
16- log user-agent header and other interesting goodies
17- send error log to separate file
18- are request names really case sensitive?
19
20"""
21
22
23# See also:
24#
25# HTTP Working Group T. Berners-Lee
26# INTERNET-DRAFT R. T. Fielding
27# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
28# Expires September 8, 1995 March 8, 1995
29#
30# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
31
32
33# Log files
34# ---------
Tim Peters11cf6052001-01-14 21:54:20 +000035#
Guido van Rossume7e578f1995-08-04 04:00:20 +000036# Here's a quote from the NCSA httpd docs about log file format.
Tim Peters11cf6052001-01-14 21:54:20 +000037#
38# | The logfile format is as follows. Each line consists of:
39# |
40# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
41# |
42# | host: Either the DNS name or the IP number of the remote client
Guido van Rossume7e578f1995-08-04 04:00:20 +000043# | rfc931: Any information returned by identd for this person,
Tim Peters11cf6052001-01-14 21:54:20 +000044# | - otherwise.
Guido van Rossume7e578f1995-08-04 04:00:20 +000045# | authuser: If user sent a userid for authentication, the user name,
Tim Peters11cf6052001-01-14 21:54:20 +000046# | - otherwise.
47# | DD: Day
48# | Mon: Month (calendar name)
49# | YYYY: Year
50# | hh: hour (24-hour format, the machine's timezone)
51# | mm: minutes
52# | ss: seconds
53# | request: The first line of the HTTP request as sent by the client.
54# | ddd: the status code returned by the server, - if not available.
Guido van Rossume7e578f1995-08-04 04:00:20 +000055# | bbbb: the total number of bytes sent,
Tim Peters11cf6052001-01-14 21:54:20 +000056# | *not including the HTTP/1.0 header*, - if not available
57# |
Guido van Rossume7e578f1995-08-04 04:00:20 +000058# | You can determine the name of the file accessed through request.
Tim Peters11cf6052001-01-14 21:54:20 +000059#
Guido van Rossume7e578f1995-08-04 04:00:20 +000060# (Actually, the latter is only true if you know the server configuration
61# at the time the request was made!)
62
63
64__version__ = "0.2"
65
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000066__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
Guido van Rossume7e578f1995-08-04 04:00:20 +000067
68import sys
69import time
70import socket # For gethostbyaddr()
Martin v. Löwisa43c2f82001-07-24 20:34:08 +000071import string
Guido van Rossume7e578f1995-08-04 04:00:20 +000072import mimetools
73import SocketServer
Martin v. Löwisa43c2f82001-07-24 20:34:08 +000074import re
Guido van Rossume7e578f1995-08-04 04:00:20 +000075
76# Default error message
77DEFAULT_ERROR_MESSAGE = """\
78<head>
79<title>Error response</title>
80</head>
81<body>
82<h1>Error response</h1>
83<p>Error code %(code)d.
84<p>Message: %(message)s.
85<p>Error code explanation: %(code)s = %(explain)s.
86</body>
87"""
88
89
90class HTTPServer(SocketServer.TCPServer):
91
Guido van Rossum18865de2000-05-09 14:54:13 +000092 allow_reuse_address = 1 # Seems to make sense in testing environment
93
Guido van Rossume7e578f1995-08-04 04:00:20 +000094 def server_bind(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000095 """Override server_bind to store the server name."""
96 SocketServer.TCPServer.server_bind(self)
97 host, port = self.socket.getsockname()
Peter Schneider-Kamp2d2785a2000-08-16 20:30:21 +000098 self.server_name = socket.getfqdn(host)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000099 self.server_port = port
Guido van Rossume7e578f1995-08-04 04:00:20 +0000100
101
102class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
103
104 """HTTP request handler base class.
105
106 The following explanation of HTTP serves to guide you through the
107 code as well as to expose any misunderstandings I may have about
108 HTTP (so you don't need to read the code to figure out I'm wrong
109 :-).
110
111 HTTP (HyperText Transfer Protocol) is an extensible protocol on
112 top of a reliable stream transport (e.g. TCP/IP). The protocol
113 recognizes three parts to a request:
114
115 1. One line identifying the request type and path
116 2. An optional set of RFC-822-style headers
117 3. An optional data part
118
119 The headers and data are separated by a blank line.
120
121 The first line of the request has the form
122
123 <command> <path> <version>
124
125 where <command> is a (case-sensitive) keyword such as GET or POST,
126 <path> is a string containing path information for the request,
127 and <version> should be the string "HTTP/1.0". <path> is encoded
128 using the URL encoding scheme (using %xx to signify the ASCII
129 character with hex code xx).
130
131 The protocol is vague about whether lines are separated by LF
132 characters or by CRLF pairs -- for compatibility with the widest
133 range of clients, both should be accepted. Similarly, whitespace
134 in the request line should be treated sensibly (allowing multiple
135 spaces between components and allowing trailing whitespace).
136
137 Similarly, for output, lines ought to be separated by CRLF pairs
138 but most clients grok LF characters just fine.
139
140 If the first line of the request has the form
141
142 <command> <path>
143
144 (i.e. <version> is left out) then this is assumed to be an HTTP
145 0.9 request; this form has no optional headers and data part and
146 the reply consists of just the data.
147
148 The reply form of the HTTP 1.0 protocol again has three parts:
149
150 1. One line giving the response code
151 2. An optional set of RFC-822-style headers
152 3. The data
153
154 Again, the headers and data are separated by a blank line.
155
156 The response code line has the form
157
158 <version> <responsecode> <responsestring>
159
160 where <version> is the protocol version (always "HTTP/1.0"),
161 <responsecode> is a 3-digit response code indicating success or
162 failure of the request, and <responsestring> is an optional
163 human-readable string explaining what the response code means.
164
165 This server parses the request and the headers, and then calls a
166 function specific to the request type (<command>). Specifically,
Guido van Rossumba895d81999-09-15 15:28:25 +0000167 a request SPAM will be handled by a method do_SPAM(). If no
Guido van Rossume7e578f1995-08-04 04:00:20 +0000168 such method exists the server sends an error response to the
169 client. If it exists, it is called with no arguments:
170
171 do_SPAM()
172
173 Note that the request name is case sensitive (i.e. SPAM and spam
174 are different requests).
175
176 The various request details are stored in instance variables:
177
178 - client_address is the client IP address in the form (host,
179 port);
180
181 - command, path and version are the broken-down request line;
182
183 - headers is an instance of mimetools.Message (or a derived
184 class) containing the header information;
185
186 - rfile is a file object open for reading positioned at the
187 start of the optional input data part;
188
189 - wfile is a file object open for writing.
190
191 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
192
193 The first thing to be written must be the response line. Then
194 follow 0 or more header lines, then a blank line, and then the
195 actual data (if any). The meaning of the header lines depends on
196 the command executed by the server; in most cases, when data is
197 returned, there should be at least one header line of the form
198
199 Content-type: <type>/<subtype>
200
201 where <type> and <subtype> should be registered MIME types,
202 e.g. "text/html" or "text/plain".
203
204 """
205
206 # The Python system version, truncated to its first component.
Eric S. Raymondb49f4a42001-02-09 05:07:04 +0000207 sys_version = "Python/" + sys.version.split()[0]
Guido van Rossume7e578f1995-08-04 04:00:20 +0000208
209 # The server software version. You may want to override this.
210 # The format is multiple whitespace-separated strings,
211 # where each string is of the form name[/version].
212 server_version = "BaseHTTP/" + __version__
213
Guido van Rossumd65b5391999-10-26 13:01:36 +0000214 def parse_request(self):
215 """Parse a request (internal).
Guido van Rossume7e578f1995-08-04 04:00:20 +0000216
Guido van Rossumd65b5391999-10-26 13:01:36 +0000217 The request should be stored in self.raw_request; the results
218 are in self.command, self.path, self.request_version and
219 self.headers.
220
221 Return value is 1 for success, 0 for failure; on failure, an
222 error is sent back.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000223
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000224 """
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000225 self.request_version = version = "HTTP/0.9" # Default
226 requestline = self.raw_requestline
227 if requestline[-2:] == '\r\n':
228 requestline = requestline[:-2]
229 elif requestline[-1:] == '\n':
230 requestline = requestline[:-1]
231 self.requestline = requestline
Eric S. Raymondb49f4a42001-02-09 05:07:04 +0000232 words = requestline.split()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000233 if len(words) == 3:
234 [command, path, version] = words
235 if version[:5] != 'HTTP/':
236 self.send_error(400, "Bad request version (%s)" % `version`)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000237 return 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000238 elif len(words) == 2:
239 [command, path] = words
240 if command != 'GET':
241 self.send_error(400,
242 "Bad HTTP/0.9 request type (%s)" % `command`)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000243 return 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000244 else:
245 self.send_error(400, "Bad request syntax (%s)" % `requestline`)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000246 return 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000247 self.command, self.path, self.request_version = command, path, version
248 self.headers = self.MessageClass(self.rfile, 0)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000249 return 1
250
251 def handle(self):
252 """Handle a single HTTP request.
253
254 You normally don't need to override this method; see the class
255 __doc__ string for information on how to handle specific HTTP
256 commands such as GET and POST.
257
258 """
259
260 self.raw_requestline = self.rfile.readline()
261 if not self.parse_request(): # An error code has been sent, just exit
262 return
263 mname = 'do_' + self.command
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000264 if not hasattr(self, mname):
Guido van Rossumd65b5391999-10-26 13:01:36 +0000265 self.send_error(501, "Unsupported method (%s)" % `self.command`)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000266 return
267 method = getattr(self, mname)
268 method()
Guido van Rossume7e578f1995-08-04 04:00:20 +0000269
270 def send_error(self, code, message=None):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000271 """Send and log an error reply.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000272
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000273 Arguments are the error code, and a detailed message.
274 The detailed message defaults to the short entry matching the
275 response code.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000276
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000277 This sends an error response (so it must be called before any
278 output has been generated), logs the error, and finally sends
279 a piece of HTML explaining the error to the user.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000280
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000281 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000282
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000283 try:
284 short, long = self.responses[code]
285 except KeyError:
286 short, long = '???', '???'
287 if not message:
288 message = short
289 explain = long
290 self.log_error("code %d, message %s", code, message)
291 self.send_response(code, message)
292 self.end_headers()
293 self.wfile.write(self.error_message_format %
294 {'code': code,
295 'message': message,
296 'explain': explain})
Guido van Rossume7e578f1995-08-04 04:00:20 +0000297
298 error_message_format = DEFAULT_ERROR_MESSAGE
299
300 def send_response(self, code, message=None):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000301 """Send the response header and log the response code.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000302
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000303 Also send two standard headers with the server software
304 version and the current date.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000305
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000306 """
307 self.log_request(code)
308 if message is None:
309 if self.responses.has_key(code):
310 message = self.responses[code][0]
311 else:
312 message = ''
313 if self.request_version != 'HTTP/0.9':
314 self.wfile.write("%s %s %s\r\n" %
315 (self.protocol_version, str(code), message))
316 self.send_header('Server', self.version_string())
317 self.send_header('Date', self.date_time_string())
Guido van Rossume7e578f1995-08-04 04:00:20 +0000318
319 def send_header(self, keyword, value):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000320 """Send a MIME header."""
321 if self.request_version != 'HTTP/0.9':
322 self.wfile.write("%s: %s\r\n" % (keyword, value))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000323
324 def end_headers(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000325 """Send the blank line ending the MIME headers."""
326 if self.request_version != 'HTTP/0.9':
327 self.wfile.write("\r\n")
Guido van Rossume7e578f1995-08-04 04:00:20 +0000328
329 def log_request(self, code='-', size='-'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000330 """Log an accepted request.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000331
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000332 This is called by send_reponse().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000333
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000334 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000335
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000336 self.log_message('"%s" %s %s',
337 self.requestline, str(code), str(size))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000338
339 def log_error(self, *args):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000340 """Log an error.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000341
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000342 This is called when a request cannot be fulfilled. By
343 default it passes the message on to log_message().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000344
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000345 Arguments are the same as for log_message().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000346
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000347 XXX This should go to the separate error log.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000348
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000349 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000350
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000351 apply(self.log_message, args)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000352
353 def log_message(self, format, *args):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000354 """Log an arbitrary message.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000355
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000356 This is used by all other logging functions. Override
357 it if you have specific logging wishes.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000358
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000359 The first argument, FORMAT, is a format string for the
360 message to be logged. If the format string contains
361 any % escapes requiring parameters, they should be
362 specified as subsequent arguments (it's just like
363 printf!).
Guido van Rossume7e578f1995-08-04 04:00:20 +0000364
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000365 The client host and current date/time are prefixed to
366 every message.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000367
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000368 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000369
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000370 sys.stderr.write("%s - - [%s] %s\n" %
371 (self.address_string(),
372 self.log_date_time_string(),
373 format%args))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000374
375 def version_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000376 """Return the server software version string."""
377 return self.server_version + ' ' + self.sys_version
Guido van Rossume7e578f1995-08-04 04:00:20 +0000378
379 def date_time_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000380 """Return the current date and time formatted for a message header."""
381 now = time.time()
382 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
383 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
384 self.weekdayname[wd],
385 day, self.monthname[month], year,
386 hh, mm, ss)
387 return s
Guido van Rossume7e578f1995-08-04 04:00:20 +0000388
389 def log_date_time_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000390 """Return the current time formatted for logging."""
391 now = time.time()
392 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
393 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
394 day, self.monthname[month], year, hh, mm, ss)
395 return s
Guido van Rossume7e578f1995-08-04 04:00:20 +0000396
397 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
398
399 monthname = [None,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000400 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
401 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossume7e578f1995-08-04 04:00:20 +0000402
403 def address_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000404 """Return the client address formatted for logging.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000405
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000406 This version looks up the full hostname using gethostbyaddr(),
407 and tries to find a name that contains at least one dot.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000408
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000409 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000410
Peter Schneider-Kamp2d2785a2000-08-16 20:30:21 +0000411 host, port = self.client_address
412 return socket.getfqdn(host)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000413
414 # Essentially static class variables
415
416 # The version of the HTTP protocol we support.
417 # Don't override unless you know what you're doing (hint: incoming
418 # requests are required to have exactly this version string).
419 protocol_version = "HTTP/1.0"
420
421 # The Message-like class used to parse headers
422 MessageClass = mimetools.Message
423
424 # Table mapping response codes to messages; entries have the
425 # form {code: (shortmessage, longmessage)}.
426 # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
427 responses = {
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000428 200: ('OK', 'Request fulfilled, document follows'),
429 201: ('Created', 'Document created, URL follows'),
430 202: ('Accepted',
431 'Request accepted, processing continues off-line'),
432 203: ('Partial information', 'Request fulfilled from cache'),
433 204: ('No response', 'Request fulfilled, nothing follows'),
Tim Peters11cf6052001-01-14 21:54:20 +0000434
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000435 301: ('Moved', 'Object moved permanently -- see URI list'),
436 302: ('Found', 'Object moved temporarily -- see URI list'),
437 303: ('Method', 'Object moved -- see Method and URL list'),
438 304: ('Not modified',
439 'Document has not changed singe given time'),
Tim Peters11cf6052001-01-14 21:54:20 +0000440
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000441 400: ('Bad request',
442 'Bad request syntax or unsupported method'),
443 401: ('Unauthorized',
444 'No permission -- see authorization schemes'),
445 402: ('Payment required',
446 'No payment -- see charging schemes'),
447 403: ('Forbidden',
448 'Request forbidden -- authorization will not help'),
449 404: ('Not found', 'Nothing matches the given URI'),
Tim Peters11cf6052001-01-14 21:54:20 +0000450
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000451 500: ('Internal error', 'Server got itself in trouble'),
452 501: ('Not implemented',
453 'Server does not support this operation'),
454 502: ('Service temporarily overloaded',
455 'The server cannot process the request due to a high load'),
456 503: ('Gateway timeout',
457 'The gateway server did not receive a timely response'),
Tim Peters11cf6052001-01-14 21:54:20 +0000458
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000459 }
Guido van Rossume7e578f1995-08-04 04:00:20 +0000460
461
462def test(HandlerClass = BaseHTTPRequestHandler,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000463 ServerClass = HTTPServer):
Guido van Rossume7e578f1995-08-04 04:00:20 +0000464 """Test the HTTP request handler class.
465
466 This runs an HTTP server on port 8000 (or the first command line
467 argument).
468
469 """
470
471 if sys.argv[1:]:
Eric S. Raymond5ff63d62001-02-09 05:38:46 +0000472 port = int(sys.argv[1])
Guido van Rossume7e578f1995-08-04 04:00:20 +0000473 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000474 port = 8000
Guido van Rossume7e578f1995-08-04 04:00:20 +0000475 server_address = ('', port)
476
477 httpd = ServerClass(server_address, HandlerClass)
478
Martin v. Löwisa43c2f82001-07-24 20:34:08 +0000479 sa = httpd.socket.getsockname()
480 print "Serving HTTP on", sa[0], "port", sa[1], "..."
Guido van Rossume7e578f1995-08-04 04:00:20 +0000481 httpd.serve_forever()
482
483
484if __name__ == '__main__':
485 test()