blob: 0688fa85437c1e5894c4ef6b04582b35bc80cb16 [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Greg Stein5e0fa402000-06-26 08:28:01 +00006HTTPConnection go through a number of "states", which defines when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
28 | \
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 _______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
41
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Guido van Rossum23acc951994-02-21 16:36:04 +000069import socket
70import string
Guido van Rossum65ab98c1995-08-07 20:13:02 +000071import mimetools
Guido van Rossum23acc951994-02-21 16:36:04 +000072
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000073try:
Greg Steindd6eefb2000-07-18 09:09:48 +000074 from cStringIO import StringIO
Greg Stein5e0fa402000-06-26 08:28:01 +000075except ImportError:
Greg Steindd6eefb2000-07-18 09:09:48 +000076 from StringIO import StringIO
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000077
Guido van Rossum23acc951994-02-21 16:36:04 +000078HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000079HTTPS_PORT = 443
80
Greg Stein5e0fa402000-06-26 08:28:01 +000081_UNKNOWN = 'UNKNOWN'
82
83# connection states
84_CS_IDLE = 'Idle'
85_CS_REQ_STARTED = 'Request-started'
86_CS_REQ_SENT = 'Request-sent'
87
88
89class HTTPResponse:
Jeremy Hylton30f86742000-09-18 22:50:38 +000090 def __init__(self, sock, debuglevel=0):
Greg Steindd6eefb2000-07-18 09:09:48 +000091 self.fp = sock.makefile('rb', 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +000092 self.debuglevel = debuglevel
Greg Stein5e0fa402000-06-26 08:28:01 +000093
Greg Steindd6eefb2000-07-18 09:09:48 +000094 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +000095
Greg Steindd6eefb2000-07-18 09:09:48 +000096 # from the Status-Line of the response
97 self.version = _UNKNOWN # HTTP-Version
98 self.status = _UNKNOWN # Status-Code
99 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000100
Greg Steindd6eefb2000-07-18 09:09:48 +0000101 self.chunked = _UNKNOWN # is "chunked" being used?
102 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
103 self.length = _UNKNOWN # number of bytes left in response
104 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000105
Greg Steindd6eefb2000-07-18 09:09:48 +0000106 def begin(self):
107 if self.msg is not None:
108 # we've already started reading the response
109 return
Greg Stein5e0fa402000-06-26 08:28:01 +0000110
Greg Stein5e0fa402000-06-26 08:28:01 +0000111 line = self.fp.readline()
Jeremy Hylton30f86742000-09-18 22:50:38 +0000112 if self.debuglevel > 0:
113 print "reply:", repr(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000114 try:
115 [version, status, reason] = string.split(line, None, 2)
116 except ValueError:
117 try:
118 [version, status] = string.split(line, None, 1)
119 reason = ""
120 except ValueError:
121 self.close()
122 raise BadStatusLine(line)
123 if version[:5] != 'HTTP/':
124 self.close()
125 raise BadStatusLine(line)
Greg Stein5e0fa402000-06-26 08:28:01 +0000126
Greg Steindd6eefb2000-07-18 09:09:48 +0000127 self.status = status = int(status)
128 self.reason = string.strip(reason)
Greg Stein5e0fa402000-06-26 08:28:01 +0000129
Greg Steindd6eefb2000-07-18 09:09:48 +0000130 if version == 'HTTP/1.0':
131 self.version = 10
132 elif version[:7] == 'HTTP/1.':
133 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
134 else:
135 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000136
Greg Steindd6eefb2000-07-18 09:09:48 +0000137 self.msg = mimetools.Message(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000138 if self.debuglevel > 0:
139 for hdr in self.msg.headers:
140 print "header:", hdr,
Greg Stein5e0fa402000-06-26 08:28:01 +0000141
Greg Steindd6eefb2000-07-18 09:09:48 +0000142 # don't let the msg keep an fp
143 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000144
Greg Steindd6eefb2000-07-18 09:09:48 +0000145 # are we using the chunked-style of transfer encoding?
146 tr_enc = self.msg.getheader('transfer-encoding')
147 if tr_enc:
148 if string.lower(tr_enc) != 'chunked':
149 raise UnknownTransferEncoding()
150 self.chunked = 1
151 self.chunk_left = None
152 else:
153 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000154
Greg Steindd6eefb2000-07-18 09:09:48 +0000155 # will the connection close at the end of the response?
156 conn = self.msg.getheader('connection')
157 if conn:
158 conn = string.lower(conn)
159 # a "Connection: close" will always close the connection. if we
160 # don't see that and this is not HTTP/1.1, then the connection will
161 # close unless we see a Keep-Alive header.
162 self.will_close = string.find(conn, 'close') != -1 or \
163 ( self.version != 11 and \
164 not self.msg.getheader('keep-alive') )
165 else:
166 # for HTTP/1.1, the connection will always remain open
167 # otherwise, it will remain open IFF we see a Keep-Alive header
168 self.will_close = self.version != 11 and \
169 not self.msg.getheader('keep-alive')
Greg Stein5e0fa402000-06-26 08:28:01 +0000170
Greg Steindd6eefb2000-07-18 09:09:48 +0000171 # do we have a Content-Length?
172 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
173 length = self.msg.getheader('content-length')
174 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000175 try:
176 self.length = int(length)
177 except ValueError:
178 self.length = None
Greg Steindd6eefb2000-07-18 09:09:48 +0000179 else:
180 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000181
Greg Steindd6eefb2000-07-18 09:09:48 +0000182 # does the body have a fixed length? (of zero)
183 if (status == 204 or # No Content
184 status == 304 or # Not Modified
185 100 <= status < 200): # 1xx codes
186 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000187
Greg Steindd6eefb2000-07-18 09:09:48 +0000188 # if the connection remains open, and we aren't using chunked, and
189 # a content-length was not provided, then assume that the connection
190 # WILL close.
191 if not self.will_close and \
192 not self.chunked and \
193 self.length is None:
194 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000195
Greg Steindd6eefb2000-07-18 09:09:48 +0000196 def close(self):
197 if self.fp:
198 self.fp.close()
199 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000200
Greg Steindd6eefb2000-07-18 09:09:48 +0000201 def isclosed(self):
202 # NOTE: it is possible that we will not ever call self.close(). This
203 # case occurs when will_close is TRUE, length is None, and we
204 # read up to the last byte, but NOT past it.
205 #
206 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
207 # called, meaning self.isclosed() is meaningful.
208 return self.fp is None
209
210 def read(self, amt=None):
211 if self.fp is None:
212 return ''
213
214 if self.chunked:
215 chunk_left = self.chunk_left
216 value = ''
217 while 1:
218 if chunk_left is None:
219 line = self.fp.readline()
220 i = string.find(line, ';')
221 if i >= 0:
222 line = line[:i] # strip chunk-extensions
223 chunk_left = string.atoi(line, 16)
224 if chunk_left == 0:
225 break
226 if amt is None:
227 value = value + self._safe_read(chunk_left)
228 elif amt < chunk_left:
229 value = value + self._safe_read(amt)
230 self.chunk_left = chunk_left - amt
231 return value
232 elif amt == chunk_left:
233 value = value + self._safe_read(amt)
234 self._safe_read(2) # toss the CRLF at the end of the chunk
235 self.chunk_left = None
236 return value
237 else:
238 value = value + self._safe_read(chunk_left)
239 amt = amt - chunk_left
240
241 # we read the whole chunk, get another
242 self._safe_read(2) # toss the CRLF at the end of the chunk
243 chunk_left = None
244
245 # read and discard trailer up to the CRLF terminator
246 ### note: we shouldn't have any trailers!
247 while 1:
248 line = self.fp.readline()
249 if line == '\r\n':
250 break
251
252 # we read everything; close the "file"
253 self.close()
254
255 return value
256
257 elif amt is None:
258 # unbounded read
259 if self.will_close:
260 s = self.fp.read()
261 else:
262 s = self._safe_read(self.length)
263 self.close() # we read everything
264 return s
265
266 if self.length is not None:
267 if amt > self.length:
268 # clip the read to the "end of response"
269 amt = self.length
270 self.length = self.length - amt
271
272 # we do not use _safe_read() here because this may be a .will_close
273 # connection, and the user is reading more bytes than will be provided
274 # (for example, reading in 1k chunks)
275 s = self.fp.read(amt)
276
Greg Steindd6eefb2000-07-18 09:09:48 +0000277 return s
278
279 def _safe_read(self, amt):
280 """Read the number of bytes requested, compensating for partial reads.
281
282 Normally, we have a blocking socket, but a read() can be interrupted
283 by a signal (resulting in a partial read).
284
285 Note that we cannot distinguish between EOF and an interrupt when zero
286 bytes have been read. IncompleteRead() will be raised in this
287 situation.
288
289 This function should be used when <amt> bytes "should" be present for
290 reading. If the bytes are truly not available (due to EOF), then the
291 IncompleteRead exception can be used to detect the problem.
292 """
293 s = ''
294 while amt > 0:
295 chunk = self.fp.read(amt)
296 if not chunk:
297 raise IncompleteRead(s)
298 s = s + chunk
299 amt = amt - len(chunk)
300 return s
301
302 def getheader(self, name, default=None):
303 if self.msg is None:
304 raise ResponseNotReady()
305 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000306
307
308class HTTPConnection:
309
Greg Steindd6eefb2000-07-18 09:09:48 +0000310 _http_vsn = 11
311 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000312
Greg Steindd6eefb2000-07-18 09:09:48 +0000313 response_class = HTTPResponse
314 default_port = HTTP_PORT
315 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000316 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000317
Greg Steindd6eefb2000-07-18 09:09:48 +0000318 def __init__(self, host, port=None):
319 self.sock = None
320 self.__response = None
321 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000322
Greg Steindd6eefb2000-07-18 09:09:48 +0000323 self._set_hostport(host, port)
Greg Stein5e0fa402000-06-26 08:28:01 +0000324
Greg Steindd6eefb2000-07-18 09:09:48 +0000325 def _set_hostport(self, host, port):
326 if port is None:
327 i = string.find(host, ':')
328 if i >= 0:
329 port = int(host[i+1:])
330 host = host[:i]
331 else:
332 port = self.default_port
333 self.host = host
334 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000335
Jeremy Hylton30f86742000-09-18 22:50:38 +0000336 def set_debuglevel(self, level):
337 self.debuglevel = level
338
Greg Steindd6eefb2000-07-18 09:09:48 +0000339 def connect(self):
340 """Connect to the host and port specified in __init__."""
341 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000342 if self.debuglevel > 0:
343 print "connect: (%s, %s)" % (self.host, self.port)
Greg Steindd6eefb2000-07-18 09:09:48 +0000344 self.sock.connect((self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000345
Greg Steindd6eefb2000-07-18 09:09:48 +0000346 def close(self):
347 """Close the connection to the HTTP server."""
348 if self.sock:
349 self.sock.close() # close it manually... there may be other refs
350 self.sock = None
351 if self.__response:
352 self.__response.close()
353 self.__response = None
354 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000355
Greg Steindd6eefb2000-07-18 09:09:48 +0000356 def send(self, str):
357 """Send `str' to the server."""
358 if self.sock is None:
359 if self.auto_open:
360 self.connect()
361 else:
362 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000363
Greg Steindd6eefb2000-07-18 09:09:48 +0000364 # send the data to the server. if we get a broken pipe, then close
365 # the socket. we want to reconnect when somebody tries to send again.
366 #
367 # NOTE: we DO propagate the error, though, because we cannot simply
368 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000369 if self.debuglevel > 0:
370 print "send:", repr(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000371 try:
372 self.sock.send(str)
373 except socket.error, v:
374 if v[0] == 32: # Broken pipe
375 self.close()
376 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000377
Greg Steindd6eefb2000-07-18 09:09:48 +0000378 def putrequest(self, method, url):
379 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000380
Greg Steindd6eefb2000-07-18 09:09:48 +0000381 `method' specifies an HTTP request method, e.g. 'GET'.
382 `url' specifies the object being requested, e.g. '/index.html'.
383 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000384
Greg Steindd6eefb2000-07-18 09:09:48 +0000385 # check if a prior response has been completed
386 if self.__response and self.__response.isclosed():
387 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000388
Greg Steindd6eefb2000-07-18 09:09:48 +0000389 #
390 # in certain cases, we cannot issue another request on this connection.
391 # this occurs when:
392 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
393 # 2) a response to a previous request has signalled that it is going
394 # to close the connection upon completion.
395 # 3) the headers for the previous response have not been read, thus
396 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
397 #
398 # if there is no prior response, then we can request at will.
399 #
400 # if point (2) is true, then we will have passed the socket to the
401 # response (effectively meaning, "there is no prior response"), and
402 # will open a new one when a new request is made.
403 #
404 # Note: if a prior response exists, then we *can* start a new request.
405 # We are not allowed to begin fetching the response to this new
406 # request, however, until that prior response is complete.
407 #
408 if self.__state == _CS_IDLE:
409 self.__state = _CS_REQ_STARTED
410 else:
411 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000412
Greg Steindd6eefb2000-07-18 09:09:48 +0000413 if not url:
414 url = '/'
415 str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000416
Greg Steindd6eefb2000-07-18 09:09:48 +0000417 try:
418 self.send(str)
419 except socket.error, v:
420 # trap 'Broken pipe' if we're allowed to automatically reconnect
421 if v[0] != 32 or not self.auto_open:
422 raise
423 # try one more time (the socket was closed; this will reopen)
424 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000425
Greg Steindd6eefb2000-07-18 09:09:48 +0000426 if self._http_vsn == 11:
427 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000428
Greg Steindd6eefb2000-07-18 09:09:48 +0000429 # this header is issued *only* for HTTP/1.1 connections. more
430 # specifically, this means it is only issued when the client uses
431 # the new HTTPConnection() class. backwards-compat clients will
432 # be using HTTP/1.0 and those clients may be issuing this header
433 # themselves. we should NOT issue it twice; some web servers (such
434 # as Apache) barf when they see two Host: headers
435 self.putheader('Host', self.host)
Greg Stein5e0fa402000-06-26 08:28:01 +0000436
Greg Steindd6eefb2000-07-18 09:09:48 +0000437 # note: we are assuming that clients will not attempt to set these
438 # headers since *this* library must deal with the
439 # consequences. this also means that when the supporting
440 # libraries are updated to recognize other forms, then this
441 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000442
Greg Steindd6eefb2000-07-18 09:09:48 +0000443 # we only want a Content-Encoding of "identity" since we don't
444 # support encodings such as x-gzip or x-deflate.
445 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000446
Greg Steindd6eefb2000-07-18 09:09:48 +0000447 # we can accept "chunked" Transfer-Encodings, but no others
448 # NOTE: no TE header implies *only* "chunked"
449 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000450
Greg Steindd6eefb2000-07-18 09:09:48 +0000451 # if TE is supplied in the header, then it must appear in a
452 # Connection header.
453 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000454
Greg Steindd6eefb2000-07-18 09:09:48 +0000455 else:
456 # For HTTP/1.0, the server will assume "not chunked"
457 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000458
Greg Steindd6eefb2000-07-18 09:09:48 +0000459 def putheader(self, header, value):
460 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000461
Greg Steindd6eefb2000-07-18 09:09:48 +0000462 For example: h.putheader('Accept', 'text/html')
463 """
464 if self.__state != _CS_REQ_STARTED:
465 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000466
Greg Steindd6eefb2000-07-18 09:09:48 +0000467 str = '%s: %s\r\n' % (header, value)
468 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000469
Greg Steindd6eefb2000-07-18 09:09:48 +0000470 def endheaders(self):
471 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000472
Greg Steindd6eefb2000-07-18 09:09:48 +0000473 if self.__state == _CS_REQ_STARTED:
474 self.__state = _CS_REQ_SENT
475 else:
476 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000477
Greg Steindd6eefb2000-07-18 09:09:48 +0000478 self.send('\r\n')
Greg Stein5e0fa402000-06-26 08:28:01 +0000479
Greg Steindd6eefb2000-07-18 09:09:48 +0000480 def request(self, method, url, body=None, headers={}):
481 """Send a complete request to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000482
Greg Steindd6eefb2000-07-18 09:09:48 +0000483 try:
484 self._send_request(method, url, body, headers)
485 except socket.error, v:
486 # trap 'Broken pipe' if we're allowed to automatically reconnect
487 if v[0] != 32 or not self.auto_open:
488 raise
489 # try one more time
490 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000491
Greg Steindd6eefb2000-07-18 09:09:48 +0000492 def _send_request(self, method, url, body, headers):
493 self.putrequest(method, url)
Greg Stein5e0fa402000-06-26 08:28:01 +0000494
Greg Steindd6eefb2000-07-18 09:09:48 +0000495 if body:
496 self.putheader('Content-Length', str(len(body)))
497 for hdr, value in headers.items():
498 self.putheader(hdr, value)
499 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000500
Greg Steindd6eefb2000-07-18 09:09:48 +0000501 if body:
502 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000503
Greg Steindd6eefb2000-07-18 09:09:48 +0000504 def getresponse(self):
505 "Get the response from the server."
Greg Stein5e0fa402000-06-26 08:28:01 +0000506
Greg Steindd6eefb2000-07-18 09:09:48 +0000507 # check if a prior response has been completed
508 if self.__response and self.__response.isclosed():
509 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000510
Greg Steindd6eefb2000-07-18 09:09:48 +0000511 #
512 # if a prior response exists, then it must be completed (otherwise, we
513 # cannot read this response's header to determine the connection-close
514 # behavior)
515 #
516 # note: if a prior response existed, but was connection-close, then the
517 # socket and response were made independent of this HTTPConnection
518 # object since a new request requires that we open a whole new
519 # connection
520 #
521 # this means the prior response had one of two states:
522 # 1) will_close: this connection was reset and the prior socket and
523 # response operate independently
524 # 2) persistent: the response was retained and we await its
525 # isclosed() status to become true.
526 #
527 if self.__state != _CS_REQ_SENT or self.__response:
528 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000529
Jeremy Hylton30f86742000-09-18 22:50:38 +0000530 if self.debuglevel > 0:
531 response = self.response_class(self.sock, self.debuglevel)
532 else:
533 response = self.response_class(self.sock)
Greg Stein5e0fa402000-06-26 08:28:01 +0000534
Greg Steindd6eefb2000-07-18 09:09:48 +0000535 response.begin()
536 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000537
Greg Steindd6eefb2000-07-18 09:09:48 +0000538 if response.will_close:
539 # this effectively passes the connection to the response
540 self.close()
541 else:
542 # remember this, so we can tell when it is complete
543 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000544
Greg Steindd6eefb2000-07-18 09:09:48 +0000545 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000546
547
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000548class FakeSocket:
Greg Steindd6eefb2000-07-18 09:09:48 +0000549 def __init__(self, sock, ssl):
550 self.__sock = sock
551 self.__ssl = ssl
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000552
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000553 def makefile(self, mode, bufsize=None):
554 """Return a readable file-like object with data from socket.
555
556 This method offers only partial support for the makefile
557 interface of a real socket. It only supports modes 'r' and
558 'rb' and the bufsize argument is ignored.
559
560 The returned object contains *all* of the file data
561 """
Greg Steindd6eefb2000-07-18 09:09:48 +0000562 if mode != 'r' and mode != 'rb':
563 raise UnimplementedFileMode()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000564
Greg Steindd6eefb2000-07-18 09:09:48 +0000565 msgbuf = ""
566 while 1:
567 try:
568 msgbuf = msgbuf + self.__ssl.read()
569 except socket.sslerror, msg:
570 break
571 return StringIO(msgbuf)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000572
Greg Steindd6eefb2000-07-18 09:09:48 +0000573 def send(self, stuff, flags = 0):
574 return self.__ssl.write(stuff)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000575
Greg Steindd6eefb2000-07-18 09:09:48 +0000576 def recv(self, len = 1024, flags = 0):
577 return self.__ssl.read(len)
Guido van Rossum23acc951994-02-21 16:36:04 +0000578
Greg Steindd6eefb2000-07-18 09:09:48 +0000579 def __getattr__(self, attr):
580 return getattr(self.__sock, attr)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000581
Guido van Rossum23acc951994-02-21 16:36:04 +0000582
Greg Stein5e0fa402000-06-26 08:28:01 +0000583class HTTPSConnection(HTTPConnection):
Greg Steindd6eefb2000-07-18 09:09:48 +0000584 "This class allows communication via SSL."
Greg Stein5e0fa402000-06-26 08:28:01 +0000585
Greg Steindd6eefb2000-07-18 09:09:48 +0000586 default_port = HTTPS_PORT
Greg Stein5e0fa402000-06-26 08:28:01 +0000587
Greg Steindd6eefb2000-07-18 09:09:48 +0000588 def __init__(self, host, port=None, **x509):
589 keys = x509.keys()
590 try:
591 keys.remove('key_file')
592 except ValueError:
593 pass
594 try:
595 keys.remove('cert_file')
596 except ValueError:
597 pass
598 if keys:
599 raise IllegalKeywordArgument()
600 HTTPConnection.__init__(self, host, port)
601 self.key_file = x509.get('key_file')
602 self.cert_file = x509.get('cert_file')
Greg Stein5e0fa402000-06-26 08:28:01 +0000603
Greg Steindd6eefb2000-07-18 09:09:48 +0000604 def connect(self):
605 "Connect to a host on a given (SSL) port."
Greg Stein5e0fa402000-06-26 08:28:01 +0000606
Greg Steindd6eefb2000-07-18 09:09:48 +0000607 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
608 sock.connect((self.host, self.port))
609 ssl = socket.ssl(sock, self.key_file, self.cert_file)
610 self.sock = FakeSocket(sock, ssl)
Greg Stein5e0fa402000-06-26 08:28:01 +0000611
612
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000613class HTTP:
Greg Steindd6eefb2000-07-18 09:09:48 +0000614 "Compatibility class with httplib.py from 1.5."
Greg Stein5e0fa402000-06-26 08:28:01 +0000615
Greg Steindd6eefb2000-07-18 09:09:48 +0000616 _http_vsn = 10
617 _http_vsn_str = 'HTTP/1.0'
Greg Stein5e0fa402000-06-26 08:28:01 +0000618
Greg Steindd6eefb2000-07-18 09:09:48 +0000619 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000620
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000621 _connection_class = HTTPConnection
622
Greg Steindd6eefb2000-07-18 09:09:48 +0000623 def __init__(self, host='', port=None, **x509):
624 "Provide a default host, since the superclass requires one."
Greg Stein5e0fa402000-06-26 08:28:01 +0000625
Greg Steindd6eefb2000-07-18 09:09:48 +0000626 # some joker passed 0 explicitly, meaning default port
627 if port == 0:
628 port = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000629
Greg Steindd6eefb2000-07-18 09:09:48 +0000630 # Note that we may pass an empty string as the host; this will throw
631 # an error when we attempt to connect. Presumably, the client code
632 # will call connect before then, with a proper host.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000633 self._conn = self._connection_class(host, port)
634 # set up delegation to flesh out interface
635 self.send = self._conn.send
636 self.putrequest = self._conn.putrequest
637 self.endheaders = self._conn.endheaders
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000638 self._conn._http_vsn = self._http_vsn
639 self._conn._http_vsn_str = self._http_vsn_str
Greg Stein5e0fa402000-06-26 08:28:01 +0000640
Greg Steindd6eefb2000-07-18 09:09:48 +0000641 # we never actually use these for anything, but we keep them here for
642 # compatibility with post-1.5.2 CVS.
643 self.key_file = x509.get('key_file')
644 self.cert_file = x509.get('cert_file')
Greg Stein5e0fa402000-06-26 08:28:01 +0000645
Greg Steindd6eefb2000-07-18 09:09:48 +0000646 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000647
Greg Steindd6eefb2000-07-18 09:09:48 +0000648 def connect(self, host=None, port=None):
649 "Accept arguments to set the host/port, since the superclass doesn't."
Greg Stein5e0fa402000-06-26 08:28:01 +0000650
Greg Steindd6eefb2000-07-18 09:09:48 +0000651 if host is not None:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000652 self._conn._set_hostport(host, port)
653 self._conn.connect()
Greg Stein5e0fa402000-06-26 08:28:01 +0000654
Greg Steindd6eefb2000-07-18 09:09:48 +0000655 def set_debuglevel(self, debuglevel):
Jeremy Hylton30f86742000-09-18 22:50:38 +0000656 self._conn.set_debuglevel(debuglevel)
Greg Stein5e0fa402000-06-26 08:28:01 +0000657
Greg Steindd6eefb2000-07-18 09:09:48 +0000658 def getfile(self):
659 "Provide a getfile, since the superclass' does not use this concept."
660 return self.file
Greg Stein5e0fa402000-06-26 08:28:01 +0000661
Greg Steindd6eefb2000-07-18 09:09:48 +0000662 def putheader(self, header, *values):
663 "The superclass allows only one value argument."
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000664 self._conn.putheader(header,
665 string.joinfields(values, '\r\n\t'))
Greg Stein5e0fa402000-06-26 08:28:01 +0000666
Greg Steindd6eefb2000-07-18 09:09:48 +0000667 def getreply(self):
668 """Compat definition since superclass does not define it.
Greg Stein5e0fa402000-06-26 08:28:01 +0000669
Greg Steindd6eefb2000-07-18 09:09:48 +0000670 Returns a tuple consisting of:
671 - server status code (e.g. '200' if all goes well)
672 - server "reason" corresponding to status code
673 - any RFC822 headers in the response from the server
674 """
675 try:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000676 response = self._conn.getresponse()
Greg Steindd6eefb2000-07-18 09:09:48 +0000677 except BadStatusLine, e:
678 ### hmm. if getresponse() ever closes the socket on a bad request,
679 ### then we are going to have problems with self.sock
Greg Stein5e0fa402000-06-26 08:28:01 +0000680
Greg Steindd6eefb2000-07-18 09:09:48 +0000681 ### should we keep this behavior? do people use it?
682 # keep the socket open (as a file), and return it
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000683 self.file = self._conn.sock.makefile('rb', 0)
Greg Stein5e0fa402000-06-26 08:28:01 +0000684
Greg Steindd6eefb2000-07-18 09:09:48 +0000685 # close our socket -- we want to restart after any protocol error
686 self.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000687
Greg Steindd6eefb2000-07-18 09:09:48 +0000688 self.headers = None
689 return -1, e.line, None
Greg Stein5e0fa402000-06-26 08:28:01 +0000690
Greg Steindd6eefb2000-07-18 09:09:48 +0000691 self.headers = response.msg
692 self.file = response.fp
693 return response.status, response.reason, response.msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000694
Greg Steindd6eefb2000-07-18 09:09:48 +0000695 def close(self):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000696 self._conn.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000697
Greg Steindd6eefb2000-07-18 09:09:48 +0000698 # note that self.file == response.fp, which gets closed by the
699 # superclass. just clear the object ref here.
700 ### hmm. messy. if status==-1, then self.file is owned by us.
701 ### well... we aren't explicitly closing, but losing this ref will
702 ### do it
703 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000704
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000705if hasattr(socket, 'ssl'):
706 class HTTPS(HTTP):
707 """Compatibility with 1.5 httplib interface
708
709 Python 1.5.2 did not have an HTTPS class, but it defined an
710 interface for sending http requests that is also useful for
711 https.
712 """
713
714 _connection_class = HTTPSConnection
715
Greg Stein5e0fa402000-06-26 08:28:01 +0000716
717class HTTPException(Exception):
Greg Steindd6eefb2000-07-18 09:09:48 +0000718 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000719
720class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000721 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000722
723class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000724 def __init__(self, version):
725 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +0000726
727class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000728 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000729
730class IllegalKeywordArgument(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000731 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000732
733class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000734 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000735
736class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000737 def __init__(self, partial):
738 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +0000739
740class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000741 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000742
743class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000744 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000745
746class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000747 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000748
749class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000750 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000751
752class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000753 def __init__(self, line):
754 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +0000755
756# for backwards compatibility
757error = HTTPException
758
759
760#
761# snarfed from httplib.py for now...
762#
Guido van Rossum23acc951994-02-21 16:36:04 +0000763def test():
Guido van Rossum41999c11997-12-09 00:12:23 +0000764 """Test this module.
765
766 The test consists of retrieving and displaying the Python
767 home page, along with the error code and error string returned
768 by the www.python.org server.
Guido van Rossum41999c11997-12-09 00:12:23 +0000769 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000770
Guido van Rossum41999c11997-12-09 00:12:23 +0000771 import sys
772 import getopt
773 opts, args = getopt.getopt(sys.argv[1:], 'd')
774 dl = 0
775 for o, a in opts:
776 if o == '-d': dl = dl + 1
777 host = 'www.python.org'
778 selector = '/'
779 if args[0:]: host = args[0]
780 if args[1:]: selector = args[1]
781 h = HTTP()
782 h.set_debuglevel(dl)
783 h.connect(host)
784 h.putrequest('GET', selector)
785 h.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000786 status, reason, headers = h.getreply()
787 print 'status =', status
788 print 'reason =', reason
Guido van Rossum41999c11997-12-09 00:12:23 +0000789 print
790 if headers:
791 for header in headers.headers: print string.strip(header)
792 print
793 print h.getfile().read()
Greg Stein5e0fa402000-06-26 08:28:01 +0000794
795 if hasattr(socket, 'ssl'):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000796 host = 'sourceforge.net'
Greg Steindd6eefb2000-07-18 09:09:48 +0000797 hs = HTTPS()
798 hs.connect(host)
799 hs.putrequest('GET', selector)
800 hs.endheaders()
801 status, reason, headers = hs.getreply()
802 print 'status =', status
803 print 'reason =', reason
804 print
805 if headers:
806 for header in headers.headers: print string.strip(header)
807 print
808 print hs.getfile().read()
Guido van Rossum23acc951994-02-21 16:36:04 +0000809
Guido van Rossuma0dfc7a1995-09-07 19:28:19 +0000810
Guido van Rossum23acc951994-02-21 16:36:04 +0000811if __name__ == '__main__':
Guido van Rossum41999c11997-12-09 00:12:23 +0000812 test()