blob: cbc60e400da3811a9abecb7766509be332e6e459 [file] [log] [blame]
Guido van Rossume7e578f1995-08-04 04:00:20 +00001"""HTTP server base class.
2
3Note: the class in this module doesn't implement any HTTP request; see
4SimpleHTTPServer for simple implementations of GET, HEAD and POST
5(including CGI scripts).
6
7Contents:
8
9- BaseHTTPRequestHandler: HTTP request handler base class
10- test: test function
11
12XXX To do:
13
14- send server version
15- log requests even later (to capture byte count)
16- log user-agent header and other interesting goodies
17- send error log to separate file
18- are request names really case sensitive?
19
20"""
21
22
23# See also:
24#
25# HTTP Working Group T. Berners-Lee
26# INTERNET-DRAFT R. T. Fielding
27# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
28# Expires September 8, 1995 March 8, 1995
29#
30# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
31
32
33# Log files
34# ---------
Tim Peters11cf6052001-01-14 21:54:20 +000035#
Guido van Rossume7e578f1995-08-04 04:00:20 +000036# Here's a quote from the NCSA httpd docs about log file format.
Tim Peters11cf6052001-01-14 21:54:20 +000037#
38# | The logfile format is as follows. Each line consists of:
39# |
40# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
41# |
42# | host: Either the DNS name or the IP number of the remote client
Guido van Rossume7e578f1995-08-04 04:00:20 +000043# | rfc931: Any information returned by identd for this person,
Tim Peters11cf6052001-01-14 21:54:20 +000044# | - otherwise.
Guido van Rossume7e578f1995-08-04 04:00:20 +000045# | authuser: If user sent a userid for authentication, the user name,
Tim Peters11cf6052001-01-14 21:54:20 +000046# | - otherwise.
47# | DD: Day
48# | Mon: Month (calendar name)
49# | YYYY: Year
50# | hh: hour (24-hour format, the machine's timezone)
51# | mm: minutes
52# | ss: seconds
53# | request: The first line of the HTTP request as sent by the client.
54# | ddd: the status code returned by the server, - if not available.
Guido van Rossume7e578f1995-08-04 04:00:20 +000055# | bbbb: the total number of bytes sent,
Tim Peters11cf6052001-01-14 21:54:20 +000056# | *not including the HTTP/1.0 header*, - if not available
57# |
Guido van Rossume7e578f1995-08-04 04:00:20 +000058# | You can determine the name of the file accessed through request.
Tim Peters11cf6052001-01-14 21:54:20 +000059#
Guido van Rossume7e578f1995-08-04 04:00:20 +000060# (Actually, the latter is only true if you know the server configuration
61# at the time the request was made!)
62
63
64__version__ = "0.2"
65
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000066__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
Guido van Rossume7e578f1995-08-04 04:00:20 +000067
68import sys
69import time
70import socket # For gethostbyaddr()
Guido van Rossume7e578f1995-08-04 04:00:20 +000071import mimetools
72import SocketServer
73
74# Default error message
75DEFAULT_ERROR_MESSAGE = """\
76<head>
77<title>Error response</title>
78</head>
79<body>
80<h1>Error response</h1>
81<p>Error code %(code)d.
82<p>Message: %(message)s.
83<p>Error code explanation: %(code)s = %(explain)s.
84</body>
85"""
86
87
88class HTTPServer(SocketServer.TCPServer):
89
Guido van Rossum18865de2000-05-09 14:54:13 +000090 allow_reuse_address = 1 # Seems to make sense in testing environment
91
Guido van Rossume7e578f1995-08-04 04:00:20 +000092 def server_bind(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000093 """Override server_bind to store the server name."""
94 SocketServer.TCPServer.server_bind(self)
95 host, port = self.socket.getsockname()
Peter Schneider-Kamp2d2785a2000-08-16 20:30:21 +000096 self.server_name = socket.getfqdn(host)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000097 self.server_port = port
Guido van Rossume7e578f1995-08-04 04:00:20 +000098
99
100class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
101
102 """HTTP request handler base class.
103
104 The following explanation of HTTP serves to guide you through the
105 code as well as to expose any misunderstandings I may have about
106 HTTP (so you don't need to read the code to figure out I'm wrong
107 :-).
108
109 HTTP (HyperText Transfer Protocol) is an extensible protocol on
110 top of a reliable stream transport (e.g. TCP/IP). The protocol
111 recognizes three parts to a request:
112
113 1. One line identifying the request type and path
114 2. An optional set of RFC-822-style headers
115 3. An optional data part
116
117 The headers and data are separated by a blank line.
118
119 The first line of the request has the form
120
121 <command> <path> <version>
122
123 where <command> is a (case-sensitive) keyword such as GET or POST,
124 <path> is a string containing path information for the request,
125 and <version> should be the string "HTTP/1.0". <path> is encoded
126 using the URL encoding scheme (using %xx to signify the ASCII
127 character with hex code xx).
128
129 The protocol is vague about whether lines are separated by LF
130 characters or by CRLF pairs -- for compatibility with the widest
131 range of clients, both should be accepted. Similarly, whitespace
132 in the request line should be treated sensibly (allowing multiple
133 spaces between components and allowing trailing whitespace).
134
135 Similarly, for output, lines ought to be separated by CRLF pairs
136 but most clients grok LF characters just fine.
137
138 If the first line of the request has the form
139
140 <command> <path>
141
142 (i.e. <version> is left out) then this is assumed to be an HTTP
143 0.9 request; this form has no optional headers and data part and
144 the reply consists of just the data.
145
146 The reply form of the HTTP 1.0 protocol again has three parts:
147
148 1. One line giving the response code
149 2. An optional set of RFC-822-style headers
150 3. The data
151
152 Again, the headers and data are separated by a blank line.
153
154 The response code line has the form
155
156 <version> <responsecode> <responsestring>
157
158 where <version> is the protocol version (always "HTTP/1.0"),
159 <responsecode> is a 3-digit response code indicating success or
160 failure of the request, and <responsestring> is an optional
161 human-readable string explaining what the response code means.
162
163 This server parses the request and the headers, and then calls a
164 function specific to the request type (<command>). Specifically,
Guido van Rossumba895d81999-09-15 15:28:25 +0000165 a request SPAM will be handled by a method do_SPAM(). If no
Guido van Rossume7e578f1995-08-04 04:00:20 +0000166 such method exists the server sends an error response to the
167 client. If it exists, it is called with no arguments:
168
169 do_SPAM()
170
171 Note that the request name is case sensitive (i.e. SPAM and spam
172 are different requests).
173
174 The various request details are stored in instance variables:
175
176 - client_address is the client IP address in the form (host,
177 port);
178
179 - command, path and version are the broken-down request line;
180
181 - headers is an instance of mimetools.Message (or a derived
182 class) containing the header information;
183
184 - rfile is a file object open for reading positioned at the
185 start of the optional input data part;
186
187 - wfile is a file object open for writing.
188
189 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
190
191 The first thing to be written must be the response line. Then
192 follow 0 or more header lines, then a blank line, and then the
193 actual data (if any). The meaning of the header lines depends on
194 the command executed by the server; in most cases, when data is
195 returned, there should be at least one header line of the form
196
197 Content-type: <type>/<subtype>
198
199 where <type> and <subtype> should be registered MIME types,
200 e.g. "text/html" or "text/plain".
201
202 """
203
204 # The Python system version, truncated to its first component.
Eric S. Raymondb49f4a42001-02-09 05:07:04 +0000205 sys_version = "Python/" + sys.version.split()[0]
Guido van Rossume7e578f1995-08-04 04:00:20 +0000206
207 # The server software version. You may want to override this.
208 # The format is multiple whitespace-separated strings,
209 # where each string is of the form name[/version].
210 server_version = "BaseHTTP/" + __version__
211
Guido van Rossumd65b5391999-10-26 13:01:36 +0000212 def parse_request(self):
213 """Parse a request (internal).
Guido van Rossume7e578f1995-08-04 04:00:20 +0000214
Guido van Rossumd65b5391999-10-26 13:01:36 +0000215 The request should be stored in self.raw_request; the results
216 are in self.command, self.path, self.request_version and
217 self.headers.
218
219 Return value is 1 for success, 0 for failure; on failure, an
220 error is sent back.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000221
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000222 """
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000223 self.request_version = version = "HTTP/0.9" # Default
224 requestline = self.raw_requestline
225 if requestline[-2:] == '\r\n':
226 requestline = requestline[:-2]
227 elif requestline[-1:] == '\n':
228 requestline = requestline[:-1]
229 self.requestline = requestline
Eric S. Raymondb49f4a42001-02-09 05:07:04 +0000230 words = requestline.split()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000231 if len(words) == 3:
232 [command, path, version] = words
233 if version[:5] != 'HTTP/':
234 self.send_error(400, "Bad request version (%s)" % `version`)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000235 return 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000236 elif len(words) == 2:
237 [command, path] = words
238 if command != 'GET':
239 self.send_error(400,
240 "Bad HTTP/0.9 request type (%s)" % `command`)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000241 return 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000242 else:
243 self.send_error(400, "Bad request syntax (%s)" % `requestline`)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000244 return 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000245 self.command, self.path, self.request_version = command, path, version
246 self.headers = self.MessageClass(self.rfile, 0)
Guido van Rossumd65b5391999-10-26 13:01:36 +0000247 return 1
248
249 def handle(self):
250 """Handle a single HTTP request.
251
252 You normally don't need to override this method; see the class
253 __doc__ string for information on how to handle specific HTTP
254 commands such as GET and POST.
255
256 """
257
258 self.raw_requestline = self.rfile.readline()
259 if not self.parse_request(): # An error code has been sent, just exit
260 return
261 mname = 'do_' + self.command
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000262 if not hasattr(self, mname):
Guido van Rossumd65b5391999-10-26 13:01:36 +0000263 self.send_error(501, "Unsupported method (%s)" % `self.command`)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000264 return
265 method = getattr(self, mname)
266 method()
Guido van Rossume7e578f1995-08-04 04:00:20 +0000267
268 def send_error(self, code, message=None):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000269 """Send and log an error reply.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000270
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000271 Arguments are the error code, and a detailed message.
272 The detailed message defaults to the short entry matching the
273 response code.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000274
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000275 This sends an error response (so it must be called before any
276 output has been generated), logs the error, and finally sends
277 a piece of HTML explaining the error to the user.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000278
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000279 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000280
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000281 try:
282 short, long = self.responses[code]
283 except KeyError:
284 short, long = '???', '???'
285 if not message:
286 message = short
287 explain = long
288 self.log_error("code %d, message %s", code, message)
289 self.send_response(code, message)
Skip Montanaro31fd86c2002-03-08 02:36:18 +0000290 self.send_header("Content-Type", "text/html")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000291 self.end_headers()
292 self.wfile.write(self.error_message_format %
293 {'code': code,
294 'message': message,
295 'explain': explain})
Guido van Rossume7e578f1995-08-04 04:00:20 +0000296
297 error_message_format = DEFAULT_ERROR_MESSAGE
298
299 def send_response(self, code, message=None):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000300 """Send the response header and log the response code.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000301
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000302 Also send two standard headers with the server software
303 version and the current date.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000304
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000305 """
306 self.log_request(code)
307 if message is None:
308 if self.responses.has_key(code):
309 message = self.responses[code][0]
310 else:
311 message = ''
312 if self.request_version != 'HTTP/0.9':
313 self.wfile.write("%s %s %s\r\n" %
314 (self.protocol_version, str(code), message))
315 self.send_header('Server', self.version_string())
316 self.send_header('Date', self.date_time_string())
Guido van Rossume7e578f1995-08-04 04:00:20 +0000317
318 def send_header(self, keyword, value):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000319 """Send a MIME header."""
320 if self.request_version != 'HTTP/0.9':
321 self.wfile.write("%s: %s\r\n" % (keyword, value))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000322
323 def end_headers(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000324 """Send the blank line ending the MIME headers."""
325 if self.request_version != 'HTTP/0.9':
326 self.wfile.write("\r\n")
Guido van Rossume7e578f1995-08-04 04:00:20 +0000327
328 def log_request(self, code='-', size='-'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000329 """Log an accepted request.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000330
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000331 This is called by send_reponse().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000332
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000333 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000334
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000335 self.log_message('"%s" %s %s',
336 self.requestline, str(code), str(size))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000337
338 def log_error(self, *args):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000339 """Log an error.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000340
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000341 This is called when a request cannot be fulfilled. By
342 default it passes the message on to log_message().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000343
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000344 Arguments are the same as for log_message().
Guido van Rossume7e578f1995-08-04 04:00:20 +0000345
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000346 XXX This should go to the separate error log.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000347
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000348 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000349
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000350 apply(self.log_message, args)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000351
352 def log_message(self, format, *args):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000353 """Log an arbitrary message.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000354
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000355 This is used by all other logging functions. Override
356 it if you have specific logging wishes.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000357
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000358 The first argument, FORMAT, is a format string for the
359 message to be logged. If the format string contains
360 any % escapes requiring parameters, they should be
361 specified as subsequent arguments (it's just like
362 printf!).
Guido van Rossume7e578f1995-08-04 04:00:20 +0000363
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000364 The client host and current date/time are prefixed to
365 every message.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000366
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000367 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000368
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000369 sys.stderr.write("%s - - [%s] %s\n" %
370 (self.address_string(),
371 self.log_date_time_string(),
372 format%args))
Guido van Rossume7e578f1995-08-04 04:00:20 +0000373
374 def version_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000375 """Return the server software version string."""
376 return self.server_version + ' ' + self.sys_version
Guido van Rossume7e578f1995-08-04 04:00:20 +0000377
378 def date_time_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000379 """Return the current date and time formatted for a message header."""
380 now = time.time()
381 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
382 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
383 self.weekdayname[wd],
384 day, self.monthname[month], year,
385 hh, mm, ss)
386 return s
Guido van Rossume7e578f1995-08-04 04:00:20 +0000387
388 def log_date_time_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000389 """Return the current time formatted for logging."""
390 now = time.time()
391 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
392 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
393 day, self.monthname[month], year, hh, mm, ss)
394 return s
Guido van Rossume7e578f1995-08-04 04:00:20 +0000395
396 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
397
398 monthname = [None,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000399 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
400 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossume7e578f1995-08-04 04:00:20 +0000401
402 def address_string(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000403 """Return the client address formatted for logging.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000404
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000405 This version looks up the full hostname using gethostbyaddr(),
406 and tries to find a name that contains at least one dot.
Guido van Rossume7e578f1995-08-04 04:00:20 +0000407
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000408 """
Guido van Rossume7e578f1995-08-04 04:00:20 +0000409
Peter Schneider-Kamp2d2785a2000-08-16 20:30:21 +0000410 host, port = self.client_address
411 return socket.getfqdn(host)
Guido van Rossume7e578f1995-08-04 04:00:20 +0000412
413 # Essentially static class variables
414
415 # The version of the HTTP protocol we support.
416 # Don't override unless you know what you're doing (hint: incoming
417 # requests are required to have exactly this version string).
418 protocol_version = "HTTP/1.0"
419
420 # The Message-like class used to parse headers
421 MessageClass = mimetools.Message
422
423 # Table mapping response codes to messages; entries have the
424 # form {code: (shortmessage, longmessage)}.
425 # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
426 responses = {
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000427 200: ('OK', 'Request fulfilled, document follows'),
428 201: ('Created', 'Document created, URL follows'),
429 202: ('Accepted',
430 'Request accepted, processing continues off-line'),
431 203: ('Partial information', 'Request fulfilled from cache'),
432 204: ('No response', 'Request fulfilled, nothing follows'),
Tim Peters11cf6052001-01-14 21:54:20 +0000433
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000434 301: ('Moved', 'Object moved permanently -- see URI list'),
435 302: ('Found', 'Object moved temporarily -- see URI list'),
436 303: ('Method', 'Object moved -- see Method and URL list'),
437 304: ('Not modified',
438 'Document has not changed singe given time'),
Tim Peters11cf6052001-01-14 21:54:20 +0000439
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000440 400: ('Bad request',
441 'Bad request syntax or unsupported method'),
442 401: ('Unauthorized',
443 'No permission -- see authorization schemes'),
444 402: ('Payment required',
445 'No payment -- see charging schemes'),
446 403: ('Forbidden',
447 'Request forbidden -- authorization will not help'),
448 404: ('Not found', 'Nothing matches the given URI'),
Tim Peters11cf6052001-01-14 21:54:20 +0000449
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000450 500: ('Internal error', 'Server got itself in trouble'),
451 501: ('Not implemented',
452 'Server does not support this operation'),
453 502: ('Service temporarily overloaded',
454 'The server cannot process the request due to a high load'),
455 503: ('Gateway timeout',
456 'The gateway server did not receive a timely response'),
Tim Peters11cf6052001-01-14 21:54:20 +0000457
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000458 }
Guido van Rossume7e578f1995-08-04 04:00:20 +0000459
460
461def test(HandlerClass = BaseHTTPRequestHandler,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000462 ServerClass = HTTPServer):
Guido van Rossume7e578f1995-08-04 04:00:20 +0000463 """Test the HTTP request handler class.
464
465 This runs an HTTP server on port 8000 (or the first command line
466 argument).
467
468 """
469
470 if sys.argv[1:]:
Eric S. Raymond5ff63d62001-02-09 05:38:46 +0000471 port = int(sys.argv[1])
Guido van Rossume7e578f1995-08-04 04:00:20 +0000472 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000473 port = 8000
Guido van Rossume7e578f1995-08-04 04:00:20 +0000474 server_address = ('', port)
475
476 httpd = ServerClass(server_address, HandlerClass)
477
Martin v. Löwisa43c2f82001-07-24 20:34:08 +0000478 sa = httpd.socket.getsockname()
479 print "Serving HTTP on", sa[0], "port", sa[1], "..."
Guido van Rossume7e578f1995-08-04 04:00:20 +0000480 httpd.serve_forever()
481
482
483if __name__ == '__main__':
484 test()