blob: 27a9e0110c6b19fb1de697122eeaf3ff5edbf0c7 [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Greg Stein5e0fa402000-06-26 08:28:01 +00006HTTPConnection go through a number of "states", which defines when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Guido van Rossum23acc951994-02-21 16:36:04 +000069import socket
Guido van Rossum65ab98c1995-08-07 20:13:02 +000070import mimetools
Guido van Rossum23acc951994-02-21 16:36:04 +000071
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000072try:
Greg Steindd6eefb2000-07-18 09:09:48 +000073 from cStringIO import StringIO
Greg Stein5e0fa402000-06-26 08:28:01 +000074except ImportError:
Greg Steindd6eefb2000-07-18 09:09:48 +000075 from StringIO import StringIO
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000076
Guido van Rossum23acc951994-02-21 16:36:04 +000077HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000078HTTPS_PORT = 443
79
Greg Stein5e0fa402000-06-26 08:28:01 +000080_UNKNOWN = 'UNKNOWN'
81
82# connection states
83_CS_IDLE = 'Idle'
84_CS_REQ_STARTED = 'Request-started'
85_CS_REQ_SENT = 'Request-sent'
86
87
88class HTTPResponse:
Jeremy Hylton30f86742000-09-18 22:50:38 +000089 def __init__(self, sock, debuglevel=0):
Greg Steindd6eefb2000-07-18 09:09:48 +000090 self.fp = sock.makefile('rb', 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +000091 self.debuglevel = debuglevel
Greg Stein5e0fa402000-06-26 08:28:01 +000092
Greg Steindd6eefb2000-07-18 09:09:48 +000093 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +000094
Greg Steindd6eefb2000-07-18 09:09:48 +000095 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +000096 self.version = _UNKNOWN # HTTP-Version
97 self.status = _UNKNOWN # Status-Code
98 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +000099
Tim Peters07e99cb2001-01-14 23:47:14 +0000100 self.chunked = _UNKNOWN # is "chunked" being used?
101 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
102 self.length = _UNKNOWN # number of bytes left in response
103 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000104
Greg Steindd6eefb2000-07-18 09:09:48 +0000105 def begin(self):
106 if self.msg is not None:
107 # we've already started reading the response
108 return
Greg Stein5e0fa402000-06-26 08:28:01 +0000109
Greg Stein5e0fa402000-06-26 08:28:01 +0000110 line = self.fp.readline()
Jeremy Hylton30f86742000-09-18 22:50:38 +0000111 if self.debuglevel > 0:
112 print "reply:", repr(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000113 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000114 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000115 except ValueError:
116 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000117 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000118 reason = ""
119 except ValueError:
Jeremy Hylton110941a2000-10-12 19:58:36 +0000120 version = "HTTP/0.9"
121 status = "200"
122 reason = ""
Greg Steindd6eefb2000-07-18 09:09:48 +0000123 if version[:5] != 'HTTP/':
124 self.close()
125 raise BadStatusLine(line)
Greg Stein5e0fa402000-06-26 08:28:01 +0000126
Greg Steindd6eefb2000-07-18 09:09:48 +0000127 self.status = status = int(status)
Guido van Rossum34735a62000-12-15 15:09:42 +0000128 self.reason = reason.strip()
Greg Stein5e0fa402000-06-26 08:28:01 +0000129
Greg Steindd6eefb2000-07-18 09:09:48 +0000130 if version == 'HTTP/1.0':
131 self.version = 10
Jeremy Hylton110941a2000-10-12 19:58:36 +0000132 elif version.startswith('HTTP/1.'):
Tim Peters07e99cb2001-01-14 23:47:14 +0000133 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hylton110941a2000-10-12 19:58:36 +0000134 elif version == 'HTTP/0.9':
135 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000136 else:
137 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000138
Jeremy Hylton110941a2000-10-12 19:58:36 +0000139 if self.version == 9:
140 self.msg = mimetools.Message(StringIO())
141 return
142
Greg Steindd6eefb2000-07-18 09:09:48 +0000143 self.msg = mimetools.Message(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000144 if self.debuglevel > 0:
145 for hdr in self.msg.headers:
146 print "header:", hdr,
Greg Stein5e0fa402000-06-26 08:28:01 +0000147
Greg Steindd6eefb2000-07-18 09:09:48 +0000148 # don't let the msg keep an fp
149 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000150
Greg Steindd6eefb2000-07-18 09:09:48 +0000151 # are we using the chunked-style of transfer encoding?
152 tr_enc = self.msg.getheader('transfer-encoding')
153 if tr_enc:
Guido van Rossum34735a62000-12-15 15:09:42 +0000154 if tr_enc.lower() != 'chunked':
Greg Steindd6eefb2000-07-18 09:09:48 +0000155 raise UnknownTransferEncoding()
156 self.chunked = 1
157 self.chunk_left = None
158 else:
159 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000160
Greg Steindd6eefb2000-07-18 09:09:48 +0000161 # will the connection close at the end of the response?
162 conn = self.msg.getheader('connection')
163 if conn:
Guido van Rossum34735a62000-12-15 15:09:42 +0000164 conn = conn.lower()
Greg Steindd6eefb2000-07-18 09:09:48 +0000165 # a "Connection: close" will always close the connection. if we
166 # don't see that and this is not HTTP/1.1, then the connection will
167 # close unless we see a Keep-Alive header.
Guido van Rossum34735a62000-12-15 15:09:42 +0000168 self.will_close = conn.find('close') != -1 or \
Greg Steindd6eefb2000-07-18 09:09:48 +0000169 ( self.version != 11 and \
170 not self.msg.getheader('keep-alive') )
171 else:
172 # for HTTP/1.1, the connection will always remain open
173 # otherwise, it will remain open IFF we see a Keep-Alive header
174 self.will_close = self.version != 11 and \
175 not self.msg.getheader('keep-alive')
Greg Stein5e0fa402000-06-26 08:28:01 +0000176
Greg Steindd6eefb2000-07-18 09:09:48 +0000177 # do we have a Content-Length?
178 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
179 length = self.msg.getheader('content-length')
180 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000181 try:
182 self.length = int(length)
183 except ValueError:
184 self.length = None
Greg Steindd6eefb2000-07-18 09:09:48 +0000185 else:
186 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000187
Greg Steindd6eefb2000-07-18 09:09:48 +0000188 # does the body have a fixed length? (of zero)
Tim Peters07e99cb2001-01-14 23:47:14 +0000189 if (status == 204 or # No Content
190 status == 304 or # Not Modified
191 100 <= status < 200): # 1xx codes
Greg Steindd6eefb2000-07-18 09:09:48 +0000192 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000193
Greg Steindd6eefb2000-07-18 09:09:48 +0000194 # if the connection remains open, and we aren't using chunked, and
195 # a content-length was not provided, then assume that the connection
196 # WILL close.
197 if not self.will_close and \
198 not self.chunked and \
199 self.length is None:
200 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000201
Greg Steindd6eefb2000-07-18 09:09:48 +0000202 def close(self):
203 if self.fp:
204 self.fp.close()
205 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000206
Greg Steindd6eefb2000-07-18 09:09:48 +0000207 def isclosed(self):
208 # NOTE: it is possible that we will not ever call self.close(). This
209 # case occurs when will_close is TRUE, length is None, and we
210 # read up to the last byte, but NOT past it.
211 #
212 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
213 # called, meaning self.isclosed() is meaningful.
214 return self.fp is None
215
216 def read(self, amt=None):
217 if self.fp is None:
218 return ''
219
220 if self.chunked:
221 chunk_left = self.chunk_left
222 value = ''
223 while 1:
224 if chunk_left is None:
225 line = self.fp.readline()
Guido van Rossum34735a62000-12-15 15:09:42 +0000226 i = line.find(';')
Greg Steindd6eefb2000-07-18 09:09:48 +0000227 if i >= 0:
Tim Peters07e99cb2001-01-14 23:47:14 +0000228 line = line[:i] # strip chunk-extensions
Guido van Rossum34735a62000-12-15 15:09:42 +0000229 chunk_left = int(line, 16)
Greg Steindd6eefb2000-07-18 09:09:48 +0000230 if chunk_left == 0:
231 break
232 if amt is None:
233 value = value + self._safe_read(chunk_left)
234 elif amt < chunk_left:
235 value = value + self._safe_read(amt)
236 self.chunk_left = chunk_left - amt
237 return value
238 elif amt == chunk_left:
239 value = value + self._safe_read(amt)
Tim Peters07e99cb2001-01-14 23:47:14 +0000240 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000241 self.chunk_left = None
242 return value
243 else:
244 value = value + self._safe_read(chunk_left)
245 amt = amt - chunk_left
246
247 # we read the whole chunk, get another
Tim Peters07e99cb2001-01-14 23:47:14 +0000248 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000249 chunk_left = None
250
251 # read and discard trailer up to the CRLF terminator
252 ### note: we shouldn't have any trailers!
253 while 1:
254 line = self.fp.readline()
255 if line == '\r\n':
256 break
257
258 # we read everything; close the "file"
259 self.close()
260
261 return value
262
263 elif amt is None:
264 # unbounded read
265 if self.will_close:
266 s = self.fp.read()
267 else:
268 s = self._safe_read(self.length)
Tim Peters07e99cb2001-01-14 23:47:14 +0000269 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000270 return s
271
272 if self.length is not None:
273 if amt > self.length:
274 # clip the read to the "end of response"
275 amt = self.length
276 self.length = self.length - amt
277
278 # we do not use _safe_read() here because this may be a .will_close
279 # connection, and the user is reading more bytes than will be provided
280 # (for example, reading in 1k chunks)
281 s = self.fp.read(amt)
282
Greg Steindd6eefb2000-07-18 09:09:48 +0000283 return s
284
285 def _safe_read(self, amt):
286 """Read the number of bytes requested, compensating for partial reads.
287
288 Normally, we have a blocking socket, but a read() can be interrupted
289 by a signal (resulting in a partial read).
290
291 Note that we cannot distinguish between EOF and an interrupt when zero
292 bytes have been read. IncompleteRead() will be raised in this
293 situation.
294
295 This function should be used when <amt> bytes "should" be present for
296 reading. If the bytes are truly not available (due to EOF), then the
297 IncompleteRead exception can be used to detect the problem.
298 """
299 s = ''
300 while amt > 0:
301 chunk = self.fp.read(amt)
302 if not chunk:
303 raise IncompleteRead(s)
304 s = s + chunk
305 amt = amt - len(chunk)
306 return s
307
308 def getheader(self, name, default=None):
309 if self.msg is None:
310 raise ResponseNotReady()
311 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000312
313
314class HTTPConnection:
315
Greg Steindd6eefb2000-07-18 09:09:48 +0000316 _http_vsn = 11
317 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000318
Greg Steindd6eefb2000-07-18 09:09:48 +0000319 response_class = HTTPResponse
320 default_port = HTTP_PORT
321 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000322 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000323
Greg Steindd6eefb2000-07-18 09:09:48 +0000324 def __init__(self, host, port=None):
325 self.sock = None
326 self.__response = None
327 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000328
Greg Steindd6eefb2000-07-18 09:09:48 +0000329 self._set_hostport(host, port)
Greg Stein5e0fa402000-06-26 08:28:01 +0000330
Greg Steindd6eefb2000-07-18 09:09:48 +0000331 def _set_hostport(self, host, port):
332 if port is None:
Guido van Rossum34735a62000-12-15 15:09:42 +0000333 i = host.find(':')
Greg Steindd6eefb2000-07-18 09:09:48 +0000334 if i >= 0:
Guido van Rossumfd97a912001-01-15 14:34:20 +0000335 port = int(host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000336 host = host[:i]
337 else:
338 port = self.default_port
339 self.host = host
340 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000341
Jeremy Hylton30f86742000-09-18 22:50:38 +0000342 def set_debuglevel(self, level):
343 self.debuglevel = level
344
Greg Steindd6eefb2000-07-18 09:09:48 +0000345 def connect(self):
346 """Connect to the host and port specified in __init__."""
347 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000348 if self.debuglevel > 0:
349 print "connect: (%s, %s)" % (self.host, self.port)
Greg Steindd6eefb2000-07-18 09:09:48 +0000350 self.sock.connect((self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000351
Greg Steindd6eefb2000-07-18 09:09:48 +0000352 def close(self):
353 """Close the connection to the HTTP server."""
354 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000355 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000356 self.sock = None
357 if self.__response:
358 self.__response.close()
359 self.__response = None
360 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000361
Greg Steindd6eefb2000-07-18 09:09:48 +0000362 def send(self, str):
363 """Send `str' to the server."""
364 if self.sock is None:
365 if self.auto_open:
366 self.connect()
367 else:
368 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000369
Greg Steindd6eefb2000-07-18 09:09:48 +0000370 # send the data to the server. if we get a broken pipe, then close
371 # the socket. we want to reconnect when somebody tries to send again.
372 #
373 # NOTE: we DO propagate the error, though, because we cannot simply
374 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000375 if self.debuglevel > 0:
376 print "send:", repr(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000377 try:
378 self.sock.send(str)
379 except socket.error, v:
Tim Peters07e99cb2001-01-14 23:47:14 +0000380 if v[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000381 self.close()
382 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000383
Greg Steindd6eefb2000-07-18 09:09:48 +0000384 def putrequest(self, method, url):
385 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000386
Greg Steindd6eefb2000-07-18 09:09:48 +0000387 `method' specifies an HTTP request method, e.g. 'GET'.
388 `url' specifies the object being requested, e.g. '/index.html'.
389 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000390
Greg Steindd6eefb2000-07-18 09:09:48 +0000391 # check if a prior response has been completed
392 if self.__response and self.__response.isclosed():
393 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000394
Greg Steindd6eefb2000-07-18 09:09:48 +0000395 #
396 # in certain cases, we cannot issue another request on this connection.
397 # this occurs when:
398 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
399 # 2) a response to a previous request has signalled that it is going
400 # to close the connection upon completion.
401 # 3) the headers for the previous response have not been read, thus
402 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
403 #
404 # if there is no prior response, then we can request at will.
405 #
406 # if point (2) is true, then we will have passed the socket to the
407 # response (effectively meaning, "there is no prior response"), and
408 # will open a new one when a new request is made.
409 #
410 # Note: if a prior response exists, then we *can* start a new request.
411 # We are not allowed to begin fetching the response to this new
412 # request, however, until that prior response is complete.
413 #
414 if self.__state == _CS_IDLE:
415 self.__state = _CS_REQ_STARTED
416 else:
417 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000418
Greg Steindd6eefb2000-07-18 09:09:48 +0000419 if not url:
420 url = '/'
421 str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000422
Greg Steindd6eefb2000-07-18 09:09:48 +0000423 try:
424 self.send(str)
425 except socket.error, v:
426 # trap 'Broken pipe' if we're allowed to automatically reconnect
427 if v[0] != 32 or not self.auto_open:
428 raise
429 # try one more time (the socket was closed; this will reopen)
430 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000431
Greg Steindd6eefb2000-07-18 09:09:48 +0000432 if self._http_vsn == 11:
433 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000434
Greg Steindd6eefb2000-07-18 09:09:48 +0000435 # this header is issued *only* for HTTP/1.1 connections. more
436 # specifically, this means it is only issued when the client uses
437 # the new HTTPConnection() class. backwards-compat clients will
438 # be using HTTP/1.0 and those clients may be issuing this header
439 # themselves. we should NOT issue it twice; some web servers (such
440 # as Apache) barf when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000441
442 # if we need a non-standard port,include it in the header
443 if self.port == HTTP_PORT:
444 self.putheader('Host', self.host)
445 else:
446 self.putheader('Host', "%s:%s" % (self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000447
Greg Steindd6eefb2000-07-18 09:09:48 +0000448 # note: we are assuming that clients will not attempt to set these
449 # headers since *this* library must deal with the
450 # consequences. this also means that when the supporting
451 # libraries are updated to recognize other forms, then this
452 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000453
Greg Steindd6eefb2000-07-18 09:09:48 +0000454 # we only want a Content-Encoding of "identity" since we don't
455 # support encodings such as x-gzip or x-deflate.
456 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000457
Greg Steindd6eefb2000-07-18 09:09:48 +0000458 # we can accept "chunked" Transfer-Encodings, but no others
459 # NOTE: no TE header implies *only* "chunked"
460 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000461
Greg Steindd6eefb2000-07-18 09:09:48 +0000462 # if TE is supplied in the header, then it must appear in a
463 # Connection header.
464 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000465
Greg Steindd6eefb2000-07-18 09:09:48 +0000466 else:
467 # For HTTP/1.0, the server will assume "not chunked"
468 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000469
Greg Steindd6eefb2000-07-18 09:09:48 +0000470 def putheader(self, header, value):
471 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000472
Greg Steindd6eefb2000-07-18 09:09:48 +0000473 For example: h.putheader('Accept', 'text/html')
474 """
475 if self.__state != _CS_REQ_STARTED:
476 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000477
Greg Steindd6eefb2000-07-18 09:09:48 +0000478 str = '%s: %s\r\n' % (header, value)
479 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000480
Greg Steindd6eefb2000-07-18 09:09:48 +0000481 def endheaders(self):
482 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000483
Greg Steindd6eefb2000-07-18 09:09:48 +0000484 if self.__state == _CS_REQ_STARTED:
485 self.__state = _CS_REQ_SENT
486 else:
487 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000488
Greg Steindd6eefb2000-07-18 09:09:48 +0000489 self.send('\r\n')
Greg Stein5e0fa402000-06-26 08:28:01 +0000490
Greg Steindd6eefb2000-07-18 09:09:48 +0000491 def request(self, method, url, body=None, headers={}):
492 """Send a complete request to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000493
Greg Steindd6eefb2000-07-18 09:09:48 +0000494 try:
495 self._send_request(method, url, body, headers)
496 except socket.error, v:
497 # trap 'Broken pipe' if we're allowed to automatically reconnect
498 if v[0] != 32 or not self.auto_open:
499 raise
500 # try one more time
501 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000502
Greg Steindd6eefb2000-07-18 09:09:48 +0000503 def _send_request(self, method, url, body, headers):
504 self.putrequest(method, url)
Greg Stein5e0fa402000-06-26 08:28:01 +0000505
Greg Steindd6eefb2000-07-18 09:09:48 +0000506 if body:
507 self.putheader('Content-Length', str(len(body)))
508 for hdr, value in headers.items():
509 self.putheader(hdr, value)
510 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000511
Greg Steindd6eefb2000-07-18 09:09:48 +0000512 if body:
513 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000514
Greg Steindd6eefb2000-07-18 09:09:48 +0000515 def getresponse(self):
516 "Get the response from the server."
Greg Stein5e0fa402000-06-26 08:28:01 +0000517
Greg Steindd6eefb2000-07-18 09:09:48 +0000518 # check if a prior response has been completed
519 if self.__response and self.__response.isclosed():
520 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000521
Greg Steindd6eefb2000-07-18 09:09:48 +0000522 #
523 # if a prior response exists, then it must be completed (otherwise, we
524 # cannot read this response's header to determine the connection-close
525 # behavior)
526 #
527 # note: if a prior response existed, but was connection-close, then the
528 # socket and response were made independent of this HTTPConnection
529 # object since a new request requires that we open a whole new
530 # connection
531 #
532 # this means the prior response had one of two states:
533 # 1) will_close: this connection was reset and the prior socket and
534 # response operate independently
535 # 2) persistent: the response was retained and we await its
536 # isclosed() status to become true.
537 #
538 if self.__state != _CS_REQ_SENT or self.__response:
539 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000540
Jeremy Hylton30f86742000-09-18 22:50:38 +0000541 if self.debuglevel > 0:
542 response = self.response_class(self.sock, self.debuglevel)
543 else:
544 response = self.response_class(self.sock)
Greg Stein5e0fa402000-06-26 08:28:01 +0000545
Greg Steindd6eefb2000-07-18 09:09:48 +0000546 response.begin()
547 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000548
Greg Steindd6eefb2000-07-18 09:09:48 +0000549 if response.will_close:
550 # this effectively passes the connection to the response
551 self.close()
552 else:
553 # remember this, so we can tell when it is complete
554 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000555
Greg Steindd6eefb2000-07-18 09:09:48 +0000556 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000557
558
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000559class FakeSocket:
Greg Steindd6eefb2000-07-18 09:09:48 +0000560 def __init__(self, sock, ssl):
561 self.__sock = sock
562 self.__ssl = ssl
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000563
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000564 def makefile(self, mode, bufsize=None):
565 """Return a readable file-like object with data from socket.
566
567 This method offers only partial support for the makefile
568 interface of a real socket. It only supports modes 'r' and
569 'rb' and the bufsize argument is ignored.
570
Tim Peters5ceadc82001-01-13 19:16:21 +0000571 The returned object contains *all* of the file data
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000572 """
Greg Steindd6eefb2000-07-18 09:09:48 +0000573 if mode != 'r' and mode != 'rb':
574 raise UnimplementedFileMode()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000575
Greg Steindd6eefb2000-07-18 09:09:48 +0000576 msgbuf = ""
577 while 1:
578 try:
579 msgbuf = msgbuf + self.__ssl.read()
580 except socket.sslerror, msg:
581 break
582 return StringIO(msgbuf)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000583
Greg Steindd6eefb2000-07-18 09:09:48 +0000584 def send(self, stuff, flags = 0):
585 return self.__ssl.write(stuff)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000586
Greg Steindd6eefb2000-07-18 09:09:48 +0000587 def recv(self, len = 1024, flags = 0):
588 return self.__ssl.read(len)
Guido van Rossum23acc951994-02-21 16:36:04 +0000589
Greg Steindd6eefb2000-07-18 09:09:48 +0000590 def __getattr__(self, attr):
591 return getattr(self.__sock, attr)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000592
Guido van Rossum23acc951994-02-21 16:36:04 +0000593
Greg Stein5e0fa402000-06-26 08:28:01 +0000594class HTTPSConnection(HTTPConnection):
Greg Steindd6eefb2000-07-18 09:09:48 +0000595 "This class allows communication via SSL."
Greg Stein5e0fa402000-06-26 08:28:01 +0000596
Greg Steindd6eefb2000-07-18 09:09:48 +0000597 default_port = HTTPS_PORT
Greg Stein5e0fa402000-06-26 08:28:01 +0000598
Greg Steindd6eefb2000-07-18 09:09:48 +0000599 def __init__(self, host, port=None, **x509):
600 keys = x509.keys()
601 try:
602 keys.remove('key_file')
603 except ValueError:
604 pass
605 try:
606 keys.remove('cert_file')
607 except ValueError:
608 pass
609 if keys:
610 raise IllegalKeywordArgument()
611 HTTPConnection.__init__(self, host, port)
612 self.key_file = x509.get('key_file')
613 self.cert_file = x509.get('cert_file')
Greg Stein5e0fa402000-06-26 08:28:01 +0000614
Greg Steindd6eefb2000-07-18 09:09:48 +0000615 def connect(self):
616 "Connect to a host on a given (SSL) port."
Greg Stein5e0fa402000-06-26 08:28:01 +0000617
Greg Steindd6eefb2000-07-18 09:09:48 +0000618 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
619 sock.connect((self.host, self.port))
Guido van Rossum0aee7222000-12-11 20:32:20 +0000620 realsock = sock
621 if hasattr(sock, "_sock"):
622 realsock = sock._sock
623 ssl = socket.ssl(realsock, self.key_file, self.cert_file)
Greg Steindd6eefb2000-07-18 09:09:48 +0000624 self.sock = FakeSocket(sock, ssl)
Greg Stein5e0fa402000-06-26 08:28:01 +0000625
626
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000627class HTTP:
Greg Steindd6eefb2000-07-18 09:09:48 +0000628 "Compatibility class with httplib.py from 1.5."
Greg Stein5e0fa402000-06-26 08:28:01 +0000629
Greg Steindd6eefb2000-07-18 09:09:48 +0000630 _http_vsn = 10
631 _http_vsn_str = 'HTTP/1.0'
Greg Stein5e0fa402000-06-26 08:28:01 +0000632
Greg Steindd6eefb2000-07-18 09:09:48 +0000633 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000634
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000635 _connection_class = HTTPConnection
636
Greg Steindd6eefb2000-07-18 09:09:48 +0000637 def __init__(self, host='', port=None, **x509):
638 "Provide a default host, since the superclass requires one."
Greg Stein5e0fa402000-06-26 08:28:01 +0000639
Greg Steindd6eefb2000-07-18 09:09:48 +0000640 # some joker passed 0 explicitly, meaning default port
641 if port == 0:
642 port = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000643
Greg Steindd6eefb2000-07-18 09:09:48 +0000644 # Note that we may pass an empty string as the host; this will throw
645 # an error when we attempt to connect. Presumably, the client code
646 # will call connect before then, with a proper host.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000647 self._conn = self._connection_class(host, port)
648 # set up delegation to flesh out interface
649 self.send = self._conn.send
650 self.putrequest = self._conn.putrequest
651 self.endheaders = self._conn.endheaders
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000652 self._conn._http_vsn = self._http_vsn
653 self._conn._http_vsn_str = self._http_vsn_str
Greg Stein5e0fa402000-06-26 08:28:01 +0000654
Greg Steindd6eefb2000-07-18 09:09:48 +0000655 # we never actually use these for anything, but we keep them here for
656 # compatibility with post-1.5.2 CVS.
657 self.key_file = x509.get('key_file')
658 self.cert_file = x509.get('cert_file')
Greg Stein5e0fa402000-06-26 08:28:01 +0000659
Greg Steindd6eefb2000-07-18 09:09:48 +0000660 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000661
Greg Steindd6eefb2000-07-18 09:09:48 +0000662 def connect(self, host=None, port=None):
663 "Accept arguments to set the host/port, since the superclass doesn't."
Greg Stein5e0fa402000-06-26 08:28:01 +0000664
Greg Steindd6eefb2000-07-18 09:09:48 +0000665 if host is not None:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000666 self._conn._set_hostport(host, port)
667 self._conn.connect()
Greg Stein5e0fa402000-06-26 08:28:01 +0000668
Greg Steindd6eefb2000-07-18 09:09:48 +0000669 def set_debuglevel(self, debuglevel):
Jeremy Hylton30f86742000-09-18 22:50:38 +0000670 self._conn.set_debuglevel(debuglevel)
Greg Stein5e0fa402000-06-26 08:28:01 +0000671
Greg Steindd6eefb2000-07-18 09:09:48 +0000672 def getfile(self):
673 "Provide a getfile, since the superclass' does not use this concept."
674 return self.file
Greg Stein5e0fa402000-06-26 08:28:01 +0000675
Greg Steindd6eefb2000-07-18 09:09:48 +0000676 def putheader(self, header, *values):
677 "The superclass allows only one value argument."
Guido van Rossum34735a62000-12-15 15:09:42 +0000678 self._conn.putheader(header, '\r\n\t'.join(values))
Greg Stein5e0fa402000-06-26 08:28:01 +0000679
Greg Steindd6eefb2000-07-18 09:09:48 +0000680 def getreply(self):
681 """Compat definition since superclass does not define it.
Greg Stein5e0fa402000-06-26 08:28:01 +0000682
Greg Steindd6eefb2000-07-18 09:09:48 +0000683 Returns a tuple consisting of:
684 - server status code (e.g. '200' if all goes well)
685 - server "reason" corresponding to status code
686 - any RFC822 headers in the response from the server
687 """
688 try:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000689 response = self._conn.getresponse()
Greg Steindd6eefb2000-07-18 09:09:48 +0000690 except BadStatusLine, e:
691 ### hmm. if getresponse() ever closes the socket on a bad request,
692 ### then we are going to have problems with self.sock
Greg Stein5e0fa402000-06-26 08:28:01 +0000693
Greg Steindd6eefb2000-07-18 09:09:48 +0000694 ### should we keep this behavior? do people use it?
695 # keep the socket open (as a file), and return it
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000696 self.file = self._conn.sock.makefile('rb', 0)
Greg Stein5e0fa402000-06-26 08:28:01 +0000697
Greg Steindd6eefb2000-07-18 09:09:48 +0000698 # close our socket -- we want to restart after any protocol error
699 self.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000700
Greg Steindd6eefb2000-07-18 09:09:48 +0000701 self.headers = None
702 return -1, e.line, None
Greg Stein5e0fa402000-06-26 08:28:01 +0000703
Greg Steindd6eefb2000-07-18 09:09:48 +0000704 self.headers = response.msg
705 self.file = response.fp
706 return response.status, response.reason, response.msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000707
Greg Steindd6eefb2000-07-18 09:09:48 +0000708 def close(self):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000709 self._conn.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000710
Greg Steindd6eefb2000-07-18 09:09:48 +0000711 # note that self.file == response.fp, which gets closed by the
712 # superclass. just clear the object ref here.
713 ### hmm. messy. if status==-1, then self.file is owned by us.
714 ### well... we aren't explicitly closing, but losing this ref will
715 ### do it
716 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000717
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000718if hasattr(socket, 'ssl'):
719 class HTTPS(HTTP):
720 """Compatibility with 1.5 httplib interface
721
722 Python 1.5.2 did not have an HTTPS class, but it defined an
723 interface for sending http requests that is also useful for
Tim Peters5ceadc82001-01-13 19:16:21 +0000724 https.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000725 """
726
Martin v. Löwisd7bf9742000-09-21 22:09:47 +0000727 _connection_class = HTTPSConnection
Tim Peters5ceadc82001-01-13 19:16:21 +0000728
Greg Stein5e0fa402000-06-26 08:28:01 +0000729
730class HTTPException(Exception):
Greg Steindd6eefb2000-07-18 09:09:48 +0000731 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000732
733class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000734 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000735
736class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000737 def __init__(self, version):
738 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +0000739
740class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000741 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000742
743class IllegalKeywordArgument(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000744 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000745
746class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000747 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000748
749class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000750 def __init__(self, partial):
751 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +0000752
753class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000754 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000755
756class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000757 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000758
759class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000760 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000761
762class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000763 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000764
765class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000766 def __init__(self, line):
767 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +0000768
769# for backwards compatibility
770error = HTTPException
771
772
773#
774# snarfed from httplib.py for now...
775#
Guido van Rossum23acc951994-02-21 16:36:04 +0000776def test():
Guido van Rossum41999c11997-12-09 00:12:23 +0000777 """Test this module.
778
779 The test consists of retrieving and displaying the Python
780 home page, along with the error code and error string returned
781 by the www.python.org server.
Guido van Rossum41999c11997-12-09 00:12:23 +0000782 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000783
Guido van Rossum41999c11997-12-09 00:12:23 +0000784 import sys
785 import getopt
786 opts, args = getopt.getopt(sys.argv[1:], 'd')
787 dl = 0
788 for o, a in opts:
789 if o == '-d': dl = dl + 1
790 host = 'www.python.org'
791 selector = '/'
792 if args[0:]: host = args[0]
793 if args[1:]: selector = args[1]
794 h = HTTP()
795 h.set_debuglevel(dl)
796 h.connect(host)
797 h.putrequest('GET', selector)
798 h.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000799 status, reason, headers = h.getreply()
800 print 'status =', status
801 print 'reason =', reason
Guido van Rossum41999c11997-12-09 00:12:23 +0000802 print
803 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +0000804 for header in headers.headers: print header.strip()
Guido van Rossum41999c11997-12-09 00:12:23 +0000805 print
806 print h.getfile().read()
Greg Stein5e0fa402000-06-26 08:28:01 +0000807
808 if hasattr(socket, 'ssl'):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000809 host = 'sourceforge.net'
Greg Steindd6eefb2000-07-18 09:09:48 +0000810 hs = HTTPS()
811 hs.connect(host)
812 hs.putrequest('GET', selector)
813 hs.endheaders()
814 status, reason, headers = hs.getreply()
815 print 'status =', status
816 print 'reason =', reason
817 print
818 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +0000819 for header in headers.headers: print header.strip()
Greg Steindd6eefb2000-07-18 09:09:48 +0000820 print
821 print hs.getfile().read()
Guido van Rossum23acc951994-02-21 16:36:04 +0000822
Guido van Rossuma0dfc7a1995-09-07 19:28:19 +0000823
Guido van Rossum23acc951994-02-21 16:36:04 +0000824if __name__ == '__main__':
Guido van Rossum41999c11997-12-09 00:12:23 +0000825 test()