blob: 4d6d058fe163ac046da3d6b7a41538c804bdfc15 [file] [log] [blame]
Guido van Rossume7e578f1995-08-04 04:00:20 +00001"""HTTP server base class.
2
3Note: the class in this module doesn't implement any HTTP request; see
4SimpleHTTPServer for simple implementations of GET, HEAD and POST
5(including CGI scripts).
6
7Contents:
8
9- BaseHTTPRequestHandler: HTTP request handler base class
10- test: test function
11
12XXX To do:
13
14- send server version
15- log requests even later (to capture byte count)
16- log user-agent header and other interesting goodies
17- send error log to separate file
18- are request names really case sensitive?
19
20"""
21
22
23# See also:
24#
25# HTTP Working Group T. Berners-Lee
26# INTERNET-DRAFT R. T. Fielding
27# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
28# Expires September 8, 1995 March 8, 1995
29#
30# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
31
32
33# Log files
34# ---------
Tim Peters11cf6052001-01-14 21:54:20 +000035#
Guido van Rossume7e578f1995-08-04 04:00:20 +000036# Here's a quote from the NCSA httpd docs about log file format.
Tim Peters11cf6052001-01-14 21:54:20 +000037#
38# | The logfile format is as follows. Each line consists of:
39# |
40# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
41# |
42# | host: Either the DNS name or the IP number of the remote client
Guido van Rossume7e578f1995-08-04 04:00:20 +000043# | rfc931: Any information returned by identd for this person,
Tim Peters11cf6052001-01-14 21:54:20 +000044# | - otherwise.
Guido van Rossume7e578f1995-08-04 04:00:20 +000045# | authuser: If user sent a userid for authentication, the user name,
Tim Peters11cf6052001-01-14 21:54:20 +000046# | - otherwise.
47# | DD: Day
48# | Mon: Month (calendar name)
49# | YYYY: Year
50# | hh: hour (24-hour format, the machine's timezone)
51# | mm: minutes
52# | ss: seconds
53# | request: The first line of the HTTP request as sent by the client.
54# | ddd: the status code returned by the server, - if not available.
Guido van Rossume7e578f1995-08-04 04:00:20 +000055# | bbbb: the total number of bytes sent,
Tim Peters11cf6052001-01-14 21:54:20 +000056# | *not including the HTTP/1.0 header*, - if not available
57# |
Guido van Rossume7e578f1995-08-04 04:00:20 +000058# | You can determine the name of the file accessed through request.
Tim Peters11cf6052001-01-14 21:54:20 +000059#
Guido van Rossume7e578f1995-08-04 04:00:20 +000060# (Actually, the latter is only true if you know the server configuration
61# at the time the request was made!)
62
63
64__version__ = "0.2"
65
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000066__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
Guido van Rossume7e578f1995-08-04 04:00:20 +000067
68import sys
69import time
70import socket # For gethostbyaddr()
Guido van Rossume7e578f1995-08-04 04:00:20 +000071import mimetools
72import SocketServer
73
74# Default error message
75DEFAULT_ERROR_MESSAGE = """\
76<head>
77<title>Error response</title>
78</head>
79<body>
80<h1>Error response</h1>
81<p>Error code %(code)d.
82<p>Message: %(message)s.
83<p>Error code explanation: %(code)s = %(explain)s.
84</body>
85"""
86
87
88class HTTPServer(SocketServer.TCPServer):
89
Guido van Rossum18865de2000-05-09 14:54:13 +000090 allow_reuse_address = 1 # Seems to make sense in testing environment
91
Guido van Rossume7e578f1995-08-04 04:00:20 +000092 def server_bind(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000093 """Override server_bind to store the server name."""
94 SocketServer.TCPServer.server_bind(self)
95 host, port = self.socket.getsockname()
Peter Schneider-Kamp2d2785a2000-08-16 20:30:21 +000096 self.server_name = socket.getfqdn(host)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000097 self.server_port = port
Guido van Rossume7e578f1995-08-04 04:00:20 +000098
99
100class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
101
102 """HTTP request handler base class.
103
104 The following explanation of HTTP serves to guide you through the
105 code as well as to expose any misunderstandings I may have about
106 HTTP (so you don't need to read the code to figure out I'm wrong
107 :-).
108
109 HTTP (HyperText Transfer Protocol) is an extensible protocol on
110 top of a reliable stream transport (e.g. TCP/IP). The protocol
111 recognizes three parts to a request:
112
113 1. One line identifying the request type and path
114 2. An optional set of RFC-822-style headers
115 3. An optional data part
116
117 The headers and data are separated by a blank line.
118
119 The first line of the request has the form
120
121 <command> <path> <version>
122
123 where <command> is a (case-sensitive) keyword such as GET or POST,
124 <path> is a string containing path information for the request,
125 and <version> should be the string "HTTP/1.0". <path> is encoded
126 using the URL encoding scheme (using %xx to signify the ASCII
127 character with hex code xx).
128
129 The protocol is vague about whether lines are separated by LF
130 characters or by CRLF pairs -- for compatibility with the widest
131 range of clients, both should be accepted. Similarly, whitespace
132 in the request line should be treated sensibly (allowing multiple
133 spaces between components and allowing trailing whitespace).
134
135 Similarly, for output, lines ought to be separated by CRLF pairs
136 but most clients grok LF characters just fine.
137
138 If the first line of the request has the form
139
140 <command> <path>
141
142 (i.e. <version> is left out) then this is assumed to be an HTTP
143 0.9 request; this form has no optional headers and data part and
144 the reply consists of just the data.
145
146 The reply form of the HTTP 1.0 protocol again has three parts:
147
148 1. One line giving the response code
149 2. An optional set of RFC-822-style headers
150 3. The data
151
152 Again, the headers and data are separated by a blank line.
153
154 The response code line has the form
155
156 <version> <responsecode> <responsestring>
157
158 where <version> is the protocol version (always "HTTP/1.0"),
159 <responsecode> is a 3-digit response code indicating success or
160 failure of the request, and <responsestring> is an optional
161 human-readable string explaining what the response code means.
162
163 This server parses the request and the headers, and then calls a
164 function specific to the request type (<command>). Specifically,
Guido van Rossumba895d81999-09-15 15:28:25 +0000165 a request SPAM will be handled by a method do_SPAM(). If no
Guido van Rossume7e578f1995-08-04 04:00:20 +0000166 such method exists the server sends an error response to the
167 client. If it exists, it is called with no arguments:
168
169 do_SPAM()
170
171 Note that the request name is case sensitive (i.e. SPAM and spam
172 are different requests).
173
174 The various request details are stored in instance variables:
175
176 - client_address is the client IP address in the form (host,
177 port);
178
179 - command, path and version are the broken-down request line;
180
181 - headers is an instance of mimetools.Message (or a derived
182 class) containing the header information;
183
184 - rfile is a file object open for reading positioned at the
185 start of the optional input data part;
186
187 - wfile is a file object open for writing.
188
189 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
190
191 The first thing to be written must be the response line. Then
192 follow 0 or more header lines, then a blank line, and then the
193 actual data (if any). The meaning of the header lines depends on
194 the command executed by the server; in most cases, when data is
195 returned, there should be at least one header line of the form
196
197 Content-type: <type>/<subtype>
198
199 where <type> and <subtype> should be registered MIME types,
200 e.g. "text/html" or "text/plain".
201
202 """
203
204 # The Python system version, truncated to its first component.
Eric S. Raymondb49f4a42001-02-09 05:07:04 +0000205 sys_version = "Python/" + sys.version.split()[0]
Guido van Rossume7e578f1995-08-04 04:00:20 +0000206
207 # The server software version. You may want to override this.
208 # The format is multiple whitespace-separated strings,
209 # where each string is of the form name[/version].
210 server_version = "BaseHTTP/" + __version__
211
Guido van Rossumd65b5391999-10-26 13:01:36 +0000212 def parse_request(self):
213 """Parse a request (internal).
Guido van Rossume7e578f1995-08-04 04:00:20 +0000214
Guido van Rossumd65b5391999-10-26 13:01:36 +0000215 The request should be stored in self.raw_request; the results
216 are in self.command, self.path, self.request_version and
217 self.headers.
218
219 Return value is 1 for success, 0 for failure; on failure, an
220 error is sent back.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000221
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000222 """
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000223 self.request_version = version = "HTTP/0.9" # Default
224 requestline = self.raw_requestline
225 if requestline[-2:] == '\r\n':
226 requestline = requestline[:-2]
227 elif requestline[-1:] == '\n':
228 requestline = requestline[:-1]
229 self.requestline = requestline
Eric S. Raymondb49f4a42001-02-09 05:07:04 +0000230 words = requestline.split()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000231 if len(words) == 3:
232 [command, path, version] = words
233 if version[:5] != 'HTTP/':
234 self.send_error(400, "Bad request version (%s)" % `version`)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000235 return 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000236 elif len(words) == 2:
237 [command, path] = words
238 if command != 'GET':
239 self.send_error(400,
240 "Bad HTTP/0.9 request type (%s)" % `command`)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000241 return 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000242 else:
243 self.send_error(400, "Bad request syntax (%s)" % `requestline`)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000244 return 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000245 self.command, self.path, self.request_version = command, path, version
246 self.headers = self.MessageClass(self.rfile, 0)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000247 return 1
248
249 def handle(self):
250 """Handle a single HTTP request.
251
252 You normally don't need to override this method; see the class
253 __doc__ string for information on how to handle specific HTTP
254 commands such as GET and POST.
255
256 """
257
258 self.raw_requestline = self.rfile.readline()
259 if not self.parse_request(): # An error code has been sent, just exit
260 return
261 mname = 'do_' + self.command
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000262 if not hasattr(self, mname):
Guido van Rossumd65b5391999-10-26 13:01:36 +0000263 self.send_error(501, "Unsupported method (%s)" % `self.command`)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000264 return
265 method = getattr(self, mname)
266 method()
Guido van Rossume7e578f1995-08-04 04:00:20 +0000267
268 def send_error(self, code, message=None):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000269 """Send and log an error reply.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000270
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000271 Arguments are the error code, and a detailed message.
272 The detailed message defaults to the short entry matching the
273 response code.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000274
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000275 This sends an error response (so it must be called before any
276 output has been generated), logs the error, and finally sends
277 a piece of HTML explaining the error to the user.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000278
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000279 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000280
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000281 try:
282 short, long = self.responses[code]
283 except KeyError:
284 short, long = '???', '???'
285 if not message:
286 message = short
287 explain = long
288 self.log_error("code %d, message %s", code, message)
289 self.send_response(code, message)
290 self.end_headers()
291 self.wfile.write(self.error_message_format %
292 {'code': code,
293 'message': message,
294 'explain': explain})
Guido van Rossume7e578f1995-08-04 04:00:20 +0000295
296 error_message_format = DEFAULT_ERROR_MESSAGE
297
298 def send_response(self, code, message=None):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000299 """Send the response header and log the response code.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000300
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000301 Also send two standard headers with the server software
302 version and the current date.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000303
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000304 """
305 self.log_request(code)
306 if message is None:
307 if self.responses.has_key(code):
308 message = self.responses[code][0]
309 else:
310 message = ''
311 if self.request_version != 'HTTP/0.9':
312 self.wfile.write("%s %s %s\r\n" %
313 (self.protocol_version, str(code), message))
314 self.send_header('Server', self.version_string())
315 self.send_header('Date', self.date_time_string())
Guido van Rossume7e578f1995-08-04 04:00:20 +0000316
317 def send_header(self, keyword, value):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000318 """Send a MIME header."""
319 if self.request_version != 'HTTP/0.9':
320 self.wfile.write("%s: %s\r\n" % (keyword, value))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000321
322 def end_headers(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000323 """Send the blank line ending the MIME headers."""
324 if self.request_version != 'HTTP/0.9':
325 self.wfile.write("\r\n")
Guido van Rossume7e578f1995-08-04 04:00:20 +0000326
327 def log_request(self, code='-', size='-'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000328 """Log an accepted request.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000329
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000330 This is called by send_reponse().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000331
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000332 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000333
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000334 self.log_message('"%s" %s %s',
335 self.requestline, str(code), str(size))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000336
337 def log_error(self, *args):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000338 """Log an error.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000339
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000340 This is called when a request cannot be fulfilled. By
341 default it passes the message on to log_message().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000342
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000343 Arguments are the same as for log_message().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000344
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000345 XXX This should go to the separate error log.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000346
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000347 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000348
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000349 apply(self.log_message, args)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000350
351 def log_message(self, format, *args):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000352 """Log an arbitrary message.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000353
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000354 This is used by all other logging functions. Override
355 it if you have specific logging wishes.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000356
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000357 The first argument, FORMAT, is a format string for the
358 message to be logged. If the format string contains
359 any % escapes requiring parameters, they should be
360 specified as subsequent arguments (it's just like
361 printf!).
Guido van Rossume7e578f1995-08-04 04:00:20 +0000362
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000363 The client host and current date/time are prefixed to
364 every message.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000365
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000366 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000367
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000368 sys.stderr.write("%s - - [%s] %s\n" %
369 (self.address_string(),
370 self.log_date_time_string(),
371 format%args))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000372
373 def version_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000374 """Return the server software version string."""
375 return self.server_version + ' ' + self.sys_version
Guido van Rossume7e578f1995-08-04 04:00:20 +0000376
377 def date_time_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000378 """Return the current date and time formatted for a message header."""
379 now = time.time()
380 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
381 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
382 self.weekdayname[wd],
383 day, self.monthname[month], year,
384 hh, mm, ss)
385 return s
Guido van Rossume7e578f1995-08-04 04:00:20 +0000386
387 def log_date_time_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000388 """Return the current time formatted for logging."""
389 now = time.time()
390 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
391 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
392 day, self.monthname[month], year, hh, mm, ss)
393 return s
Guido van Rossume7e578f1995-08-04 04:00:20 +0000394
395 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
396
397 monthname = [None,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000398 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
399 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossume7e578f1995-08-04 04:00:20 +0000400
401 def address_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000402 """Return the client address formatted for logging.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000403
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000404 This version looks up the full hostname using gethostbyaddr(),
405 and tries to find a name that contains at least one dot.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000406
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000407 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000408
Peter Schneider-Kamp2d2785a2000-08-16 20:30:21 +0000409 host, port = self.client_address
410 return socket.getfqdn(host)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000411
412 # Essentially static class variables
413
414 # The version of the HTTP protocol we support.
415 # Don't override unless you know what you're doing (hint: incoming
416 # requests are required to have exactly this version string).
417 protocol_version = "HTTP/1.0"
418
419 # The Message-like class used to parse headers
420 MessageClass = mimetools.Message
421
422 # Table mapping response codes to messages; entries have the
423 # form {code: (shortmessage, longmessage)}.
424 # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
425 responses = {
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000426 200: ('OK', 'Request fulfilled, document follows'),
427 201: ('Created', 'Document created, URL follows'),
428 202: ('Accepted',
429 'Request accepted, processing continues off-line'),
430 203: ('Partial information', 'Request fulfilled from cache'),
431 204: ('No response', 'Request fulfilled, nothing follows'),
Tim Peters11cf6052001-01-14 21:54:20 +0000432
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000433 301: ('Moved', 'Object moved permanently -- see URI list'),
434 302: ('Found', 'Object moved temporarily -- see URI list'),
435 303: ('Method', 'Object moved -- see Method and URL list'),
436 304: ('Not modified',
437 'Document has not changed singe given time'),
Tim Peters11cf6052001-01-14 21:54:20 +0000438
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000439 400: ('Bad request',
440 'Bad request syntax or unsupported method'),
441 401: ('Unauthorized',
442 'No permission -- see authorization schemes'),
443 402: ('Payment required',
444 'No payment -- see charging schemes'),
445 403: ('Forbidden',
446 'Request forbidden -- authorization will not help'),
447 404: ('Not found', 'Nothing matches the given URI'),
Tim Peters11cf6052001-01-14 21:54:20 +0000448
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000449 500: ('Internal error', 'Server got itself in trouble'),
450 501: ('Not implemented',
451 'Server does not support this operation'),
452 502: ('Service temporarily overloaded',
453 'The server cannot process the request due to a high load'),
454 503: ('Gateway timeout',
455 'The gateway server did not receive a timely response'),
Tim Peters11cf6052001-01-14 21:54:20 +0000456
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000457 }
Guido van Rossume7e578f1995-08-04 04:00:20 +0000458
459
460def test(HandlerClass = BaseHTTPRequestHandler,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000461 ServerClass = HTTPServer):
Guido van Rossume7e578f1995-08-04 04:00:20 +0000462 """Test the HTTP request handler class.
463
464 This runs an HTTP server on port 8000 (or the first command line
465 argument).
466
467 """
468
469 if sys.argv[1:]:
Eric S. Raymond5ff63d62001-02-09 05:38:46 +0000470 port = int(sys.argv[1])
Guido van Rossume7e578f1995-08-04 04:00:20 +0000471 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000472 port = 8000
Guido van Rossume7e578f1995-08-04 04:00:20 +0000473 server_address = ('', port)
474
475 httpd = ServerClass(server_address, HandlerClass)
476
Martin v. Löwisa43c2f82001-07-24 20:34:08 +0000477 sa = httpd.socket.getsockname()
478 print "Serving HTTP on", sa[0], "port", sa[1], "..."
Guido van Rossume7e578f1995-08-04 04:00:20 +0000479 httpd.serve_forever()
480
481
482if __name__ == '__main__':
483 test()