blob: 6c3b5e0402991ce630f16e4265e1a3985e6c02d0 [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Greg Stein5e0fa402000-06-26 08:28:01 +00006HTTPConnection go through a number of "states", which defines when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Guido van Rossum23acc951994-02-21 16:36:04 +000069import socket
Guido van Rossum65ab98c1995-08-07 20:13:02 +000070import mimetools
Guido van Rossum23acc951994-02-21 16:36:04 +000071
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000072try:
Greg Steindd6eefb2000-07-18 09:09:48 +000073 from cStringIO import StringIO
Greg Stein5e0fa402000-06-26 08:28:01 +000074except ImportError:
Greg Steindd6eefb2000-07-18 09:09:48 +000075 from StringIO import StringIO
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000076
Skip Montanaro2dd42762001-01-23 15:35:05 +000077__all__ = ["HTTP"]
78
Guido van Rossum23acc951994-02-21 16:36:04 +000079HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000080HTTPS_PORT = 443
81
Greg Stein5e0fa402000-06-26 08:28:01 +000082_UNKNOWN = 'UNKNOWN'
83
84# connection states
85_CS_IDLE = 'Idle'
86_CS_REQ_STARTED = 'Request-started'
87_CS_REQ_SENT = 'Request-sent'
88
89
90class HTTPResponse:
Jeremy Hylton30f86742000-09-18 22:50:38 +000091 def __init__(self, sock, debuglevel=0):
Greg Steindd6eefb2000-07-18 09:09:48 +000092 self.fp = sock.makefile('rb', 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +000093 self.debuglevel = debuglevel
Greg Stein5e0fa402000-06-26 08:28:01 +000094
Greg Steindd6eefb2000-07-18 09:09:48 +000095 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +000096
Greg Steindd6eefb2000-07-18 09:09:48 +000097 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +000098 self.version = _UNKNOWN # HTTP-Version
99 self.status = _UNKNOWN # Status-Code
100 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000101
Tim Peters07e99cb2001-01-14 23:47:14 +0000102 self.chunked = _UNKNOWN # is "chunked" being used?
103 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
104 self.length = _UNKNOWN # number of bytes left in response
105 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000106
Greg Steindd6eefb2000-07-18 09:09:48 +0000107 def begin(self):
108 if self.msg is not None:
109 # we've already started reading the response
110 return
Greg Stein5e0fa402000-06-26 08:28:01 +0000111
Greg Stein5e0fa402000-06-26 08:28:01 +0000112 line = self.fp.readline()
Jeremy Hylton30f86742000-09-18 22:50:38 +0000113 if self.debuglevel > 0:
114 print "reply:", repr(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000115 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000116 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000117 except ValueError:
118 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000119 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000120 reason = ""
121 except ValueError:
Jeremy Hylton110941a2000-10-12 19:58:36 +0000122 version = "HTTP/0.9"
123 status = "200"
124 reason = ""
Greg Steindd6eefb2000-07-18 09:09:48 +0000125 if version[:5] != 'HTTP/':
126 self.close()
127 raise BadStatusLine(line)
Greg Stein5e0fa402000-06-26 08:28:01 +0000128
Greg Steindd6eefb2000-07-18 09:09:48 +0000129 self.status = status = int(status)
Guido van Rossum34735a62000-12-15 15:09:42 +0000130 self.reason = reason.strip()
Greg Stein5e0fa402000-06-26 08:28:01 +0000131
Greg Steindd6eefb2000-07-18 09:09:48 +0000132 if version == 'HTTP/1.0':
133 self.version = 10
Jeremy Hylton110941a2000-10-12 19:58:36 +0000134 elif version.startswith('HTTP/1.'):
Tim Peters07e99cb2001-01-14 23:47:14 +0000135 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hylton110941a2000-10-12 19:58:36 +0000136 elif version == 'HTTP/0.9':
137 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000138 else:
139 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000140
Jeremy Hylton110941a2000-10-12 19:58:36 +0000141 if self.version == 9:
142 self.msg = mimetools.Message(StringIO())
143 return
144
Greg Steindd6eefb2000-07-18 09:09:48 +0000145 self.msg = mimetools.Message(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000146 if self.debuglevel > 0:
147 for hdr in self.msg.headers:
148 print "header:", hdr,
Greg Stein5e0fa402000-06-26 08:28:01 +0000149
Greg Steindd6eefb2000-07-18 09:09:48 +0000150 # don't let the msg keep an fp
151 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000152
Greg Steindd6eefb2000-07-18 09:09:48 +0000153 # are we using the chunked-style of transfer encoding?
154 tr_enc = self.msg.getheader('transfer-encoding')
155 if tr_enc:
Guido van Rossum34735a62000-12-15 15:09:42 +0000156 if tr_enc.lower() != 'chunked':
Greg Steindd6eefb2000-07-18 09:09:48 +0000157 raise UnknownTransferEncoding()
158 self.chunked = 1
159 self.chunk_left = None
160 else:
161 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000162
Greg Steindd6eefb2000-07-18 09:09:48 +0000163 # will the connection close at the end of the response?
164 conn = self.msg.getheader('connection')
165 if conn:
Guido van Rossum34735a62000-12-15 15:09:42 +0000166 conn = conn.lower()
Greg Steindd6eefb2000-07-18 09:09:48 +0000167 # a "Connection: close" will always close the connection. if we
168 # don't see that and this is not HTTP/1.1, then the connection will
169 # close unless we see a Keep-Alive header.
Guido van Rossum34735a62000-12-15 15:09:42 +0000170 self.will_close = conn.find('close') != -1 or \
Greg Steindd6eefb2000-07-18 09:09:48 +0000171 ( self.version != 11 and \
172 not self.msg.getheader('keep-alive') )
173 else:
174 # for HTTP/1.1, the connection will always remain open
175 # otherwise, it will remain open IFF we see a Keep-Alive header
176 self.will_close = self.version != 11 and \
177 not self.msg.getheader('keep-alive')
Greg Stein5e0fa402000-06-26 08:28:01 +0000178
Greg Steindd6eefb2000-07-18 09:09:48 +0000179 # do we have a Content-Length?
180 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
181 length = self.msg.getheader('content-length')
182 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000183 try:
184 self.length = int(length)
185 except ValueError:
186 self.length = None
Greg Steindd6eefb2000-07-18 09:09:48 +0000187 else:
188 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000189
Greg Steindd6eefb2000-07-18 09:09:48 +0000190 # does the body have a fixed length? (of zero)
Tim Peters07e99cb2001-01-14 23:47:14 +0000191 if (status == 204 or # No Content
192 status == 304 or # Not Modified
193 100 <= status < 200): # 1xx codes
Greg Steindd6eefb2000-07-18 09:09:48 +0000194 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000195
Greg Steindd6eefb2000-07-18 09:09:48 +0000196 # if the connection remains open, and we aren't using chunked, and
197 # a content-length was not provided, then assume that the connection
198 # WILL close.
199 if not self.will_close and \
200 not self.chunked and \
201 self.length is None:
202 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000203
Greg Steindd6eefb2000-07-18 09:09:48 +0000204 def close(self):
205 if self.fp:
206 self.fp.close()
207 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000208
Greg Steindd6eefb2000-07-18 09:09:48 +0000209 def isclosed(self):
210 # NOTE: it is possible that we will not ever call self.close(). This
211 # case occurs when will_close is TRUE, length is None, and we
212 # read up to the last byte, but NOT past it.
213 #
214 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
215 # called, meaning self.isclosed() is meaningful.
216 return self.fp is None
217
218 def read(self, amt=None):
219 if self.fp is None:
220 return ''
221
222 if self.chunked:
223 chunk_left = self.chunk_left
224 value = ''
225 while 1:
226 if chunk_left is None:
227 line = self.fp.readline()
Guido van Rossum34735a62000-12-15 15:09:42 +0000228 i = line.find(';')
Greg Steindd6eefb2000-07-18 09:09:48 +0000229 if i >= 0:
Tim Peters07e99cb2001-01-14 23:47:14 +0000230 line = line[:i] # strip chunk-extensions
Guido van Rossum34735a62000-12-15 15:09:42 +0000231 chunk_left = int(line, 16)
Greg Steindd6eefb2000-07-18 09:09:48 +0000232 if chunk_left == 0:
233 break
234 if amt is None:
235 value = value + self._safe_read(chunk_left)
236 elif amt < chunk_left:
237 value = value + self._safe_read(amt)
238 self.chunk_left = chunk_left - amt
239 return value
240 elif amt == chunk_left:
241 value = value + self._safe_read(amt)
Tim Peters07e99cb2001-01-14 23:47:14 +0000242 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000243 self.chunk_left = None
244 return value
245 else:
246 value = value + self._safe_read(chunk_left)
247 amt = amt - chunk_left
248
249 # we read the whole chunk, get another
Tim Peters07e99cb2001-01-14 23:47:14 +0000250 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000251 chunk_left = None
252
253 # read and discard trailer up to the CRLF terminator
254 ### note: we shouldn't have any trailers!
255 while 1:
256 line = self.fp.readline()
257 if line == '\r\n':
258 break
259
260 # we read everything; close the "file"
261 self.close()
262
263 return value
264
265 elif amt is None:
266 # unbounded read
267 if self.will_close:
268 s = self.fp.read()
269 else:
270 s = self._safe_read(self.length)
Tim Peters07e99cb2001-01-14 23:47:14 +0000271 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000272 return s
273
274 if self.length is not None:
275 if amt > self.length:
276 # clip the read to the "end of response"
277 amt = self.length
278 self.length = self.length - amt
279
280 # we do not use _safe_read() here because this may be a .will_close
281 # connection, and the user is reading more bytes than will be provided
282 # (for example, reading in 1k chunks)
283 s = self.fp.read(amt)
284
Greg Steindd6eefb2000-07-18 09:09:48 +0000285 return s
286
287 def _safe_read(self, amt):
288 """Read the number of bytes requested, compensating for partial reads.
289
290 Normally, we have a blocking socket, but a read() can be interrupted
291 by a signal (resulting in a partial read).
292
293 Note that we cannot distinguish between EOF and an interrupt when zero
294 bytes have been read. IncompleteRead() will be raised in this
295 situation.
296
297 This function should be used when <amt> bytes "should" be present for
298 reading. If the bytes are truly not available (due to EOF), then the
299 IncompleteRead exception can be used to detect the problem.
300 """
301 s = ''
302 while amt > 0:
303 chunk = self.fp.read(amt)
304 if not chunk:
305 raise IncompleteRead(s)
306 s = s + chunk
307 amt = amt - len(chunk)
308 return s
309
310 def getheader(self, name, default=None):
311 if self.msg is None:
312 raise ResponseNotReady()
313 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000314
315
316class HTTPConnection:
317
Greg Steindd6eefb2000-07-18 09:09:48 +0000318 _http_vsn = 11
319 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000320
Greg Steindd6eefb2000-07-18 09:09:48 +0000321 response_class = HTTPResponse
322 default_port = HTTP_PORT
323 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000324 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000325
Greg Steindd6eefb2000-07-18 09:09:48 +0000326 def __init__(self, host, port=None):
327 self.sock = None
328 self.__response = None
329 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000330
Greg Steindd6eefb2000-07-18 09:09:48 +0000331 self._set_hostport(host, port)
Greg Stein5e0fa402000-06-26 08:28:01 +0000332
Greg Steindd6eefb2000-07-18 09:09:48 +0000333 def _set_hostport(self, host, port):
334 if port is None:
Guido van Rossum34735a62000-12-15 15:09:42 +0000335 i = host.find(':')
Greg Steindd6eefb2000-07-18 09:09:48 +0000336 if i >= 0:
Guido van Rossumfd97a912001-01-15 14:34:20 +0000337 port = int(host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000338 host = host[:i]
339 else:
340 port = self.default_port
341 self.host = host
342 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000343
Jeremy Hylton30f86742000-09-18 22:50:38 +0000344 def set_debuglevel(self, level):
345 self.debuglevel = level
346
Greg Steindd6eefb2000-07-18 09:09:48 +0000347 def connect(self):
348 """Connect to the host and port specified in __init__."""
349 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000350 if self.debuglevel > 0:
351 print "connect: (%s, %s)" % (self.host, self.port)
Greg Steindd6eefb2000-07-18 09:09:48 +0000352 self.sock.connect((self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000353
Greg Steindd6eefb2000-07-18 09:09:48 +0000354 def close(self):
355 """Close the connection to the HTTP server."""
356 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000357 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000358 self.sock = None
359 if self.__response:
360 self.__response.close()
361 self.__response = None
362 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000363
Greg Steindd6eefb2000-07-18 09:09:48 +0000364 def send(self, str):
365 """Send `str' to the server."""
366 if self.sock is None:
367 if self.auto_open:
368 self.connect()
369 else:
370 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000371
Greg Steindd6eefb2000-07-18 09:09:48 +0000372 # send the data to the server. if we get a broken pipe, then close
373 # the socket. we want to reconnect when somebody tries to send again.
374 #
375 # NOTE: we DO propagate the error, though, because we cannot simply
376 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000377 if self.debuglevel > 0:
378 print "send:", repr(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000379 try:
380 self.sock.send(str)
381 except socket.error, v:
Tim Peters07e99cb2001-01-14 23:47:14 +0000382 if v[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000383 self.close()
384 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000385
Greg Steindd6eefb2000-07-18 09:09:48 +0000386 def putrequest(self, method, url):
387 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000388
Greg Steindd6eefb2000-07-18 09:09:48 +0000389 `method' specifies an HTTP request method, e.g. 'GET'.
390 `url' specifies the object being requested, e.g. '/index.html'.
391 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000392
Greg Steindd6eefb2000-07-18 09:09:48 +0000393 # check if a prior response has been completed
394 if self.__response and self.__response.isclosed():
395 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000396
Greg Steindd6eefb2000-07-18 09:09:48 +0000397 #
398 # in certain cases, we cannot issue another request on this connection.
399 # this occurs when:
400 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
401 # 2) a response to a previous request has signalled that it is going
402 # to close the connection upon completion.
403 # 3) the headers for the previous response have not been read, thus
404 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
405 #
406 # if there is no prior response, then we can request at will.
407 #
408 # if point (2) is true, then we will have passed the socket to the
409 # response (effectively meaning, "there is no prior response"), and
410 # will open a new one when a new request is made.
411 #
412 # Note: if a prior response exists, then we *can* start a new request.
413 # We are not allowed to begin fetching the response to this new
414 # request, however, until that prior response is complete.
415 #
416 if self.__state == _CS_IDLE:
417 self.__state = _CS_REQ_STARTED
418 else:
419 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000420
Greg Steindd6eefb2000-07-18 09:09:48 +0000421 if not url:
422 url = '/'
423 str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000424
Greg Steindd6eefb2000-07-18 09:09:48 +0000425 try:
426 self.send(str)
427 except socket.error, v:
428 # trap 'Broken pipe' if we're allowed to automatically reconnect
429 if v[0] != 32 or not self.auto_open:
430 raise
431 # try one more time (the socket was closed; this will reopen)
432 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000433
Greg Steindd6eefb2000-07-18 09:09:48 +0000434 if self._http_vsn == 11:
435 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000436
Greg Steindd6eefb2000-07-18 09:09:48 +0000437 # this header is issued *only* for HTTP/1.1 connections. more
438 # specifically, this means it is only issued when the client uses
439 # the new HTTPConnection() class. backwards-compat clients will
440 # be using HTTP/1.0 and those clients may be issuing this header
441 # themselves. we should NOT issue it twice; some web servers (such
442 # as Apache) barf when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000443
444 # if we need a non-standard port,include it in the header
445 if self.port == HTTP_PORT:
446 self.putheader('Host', self.host)
447 else:
448 self.putheader('Host', "%s:%s" % (self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000449
Greg Steindd6eefb2000-07-18 09:09:48 +0000450 # note: we are assuming that clients will not attempt to set these
451 # headers since *this* library must deal with the
452 # consequences. this also means that when the supporting
453 # libraries are updated to recognize other forms, then this
454 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000455
Greg Steindd6eefb2000-07-18 09:09:48 +0000456 # we only want a Content-Encoding of "identity" since we don't
457 # support encodings such as x-gzip or x-deflate.
458 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000459
Greg Steindd6eefb2000-07-18 09:09:48 +0000460 # we can accept "chunked" Transfer-Encodings, but no others
461 # NOTE: no TE header implies *only* "chunked"
462 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000463
Greg Steindd6eefb2000-07-18 09:09:48 +0000464 # if TE is supplied in the header, then it must appear in a
465 # Connection header.
466 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000467
Greg Steindd6eefb2000-07-18 09:09:48 +0000468 else:
469 # For HTTP/1.0, the server will assume "not chunked"
470 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000471
Greg Steindd6eefb2000-07-18 09:09:48 +0000472 def putheader(self, header, value):
473 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000474
Greg Steindd6eefb2000-07-18 09:09:48 +0000475 For example: h.putheader('Accept', 'text/html')
476 """
477 if self.__state != _CS_REQ_STARTED:
478 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000479
Greg Steindd6eefb2000-07-18 09:09:48 +0000480 str = '%s: %s\r\n' % (header, value)
481 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000482
Greg Steindd6eefb2000-07-18 09:09:48 +0000483 def endheaders(self):
484 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000485
Greg Steindd6eefb2000-07-18 09:09:48 +0000486 if self.__state == _CS_REQ_STARTED:
487 self.__state = _CS_REQ_SENT
488 else:
489 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000490
Greg Steindd6eefb2000-07-18 09:09:48 +0000491 self.send('\r\n')
Greg Stein5e0fa402000-06-26 08:28:01 +0000492
Greg Steindd6eefb2000-07-18 09:09:48 +0000493 def request(self, method, url, body=None, headers={}):
494 """Send a complete request to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000495
Greg Steindd6eefb2000-07-18 09:09:48 +0000496 try:
497 self._send_request(method, url, body, headers)
498 except socket.error, v:
499 # trap 'Broken pipe' if we're allowed to automatically reconnect
500 if v[0] != 32 or not self.auto_open:
501 raise
502 # try one more time
503 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000504
Greg Steindd6eefb2000-07-18 09:09:48 +0000505 def _send_request(self, method, url, body, headers):
506 self.putrequest(method, url)
Greg Stein5e0fa402000-06-26 08:28:01 +0000507
Greg Steindd6eefb2000-07-18 09:09:48 +0000508 if body:
509 self.putheader('Content-Length', str(len(body)))
510 for hdr, value in headers.items():
511 self.putheader(hdr, value)
512 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000513
Greg Steindd6eefb2000-07-18 09:09:48 +0000514 if body:
515 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000516
Greg Steindd6eefb2000-07-18 09:09:48 +0000517 def getresponse(self):
518 "Get the response from the server."
Greg Stein5e0fa402000-06-26 08:28:01 +0000519
Greg Steindd6eefb2000-07-18 09:09:48 +0000520 # check if a prior response has been completed
521 if self.__response and self.__response.isclosed():
522 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000523
Greg Steindd6eefb2000-07-18 09:09:48 +0000524 #
525 # if a prior response exists, then it must be completed (otherwise, we
526 # cannot read this response's header to determine the connection-close
527 # behavior)
528 #
529 # note: if a prior response existed, but was connection-close, then the
530 # socket and response were made independent of this HTTPConnection
531 # object since a new request requires that we open a whole new
532 # connection
533 #
534 # this means the prior response had one of two states:
535 # 1) will_close: this connection was reset and the prior socket and
536 # response operate independently
537 # 2) persistent: the response was retained and we await its
538 # isclosed() status to become true.
539 #
540 if self.__state != _CS_REQ_SENT or self.__response:
541 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000542
Jeremy Hylton30f86742000-09-18 22:50:38 +0000543 if self.debuglevel > 0:
544 response = self.response_class(self.sock, self.debuglevel)
545 else:
546 response = self.response_class(self.sock)
Greg Stein5e0fa402000-06-26 08:28:01 +0000547
Greg Steindd6eefb2000-07-18 09:09:48 +0000548 response.begin()
549 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000550
Greg Steindd6eefb2000-07-18 09:09:48 +0000551 if response.will_close:
552 # this effectively passes the connection to the response
553 self.close()
554 else:
555 # remember this, so we can tell when it is complete
556 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000557
Greg Steindd6eefb2000-07-18 09:09:48 +0000558 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000559
560
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000561class FakeSocket:
Greg Steindd6eefb2000-07-18 09:09:48 +0000562 def __init__(self, sock, ssl):
563 self.__sock = sock
564 self.__ssl = ssl
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000565
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000566 def makefile(self, mode, bufsize=None):
567 """Return a readable file-like object with data from socket.
568
569 This method offers only partial support for the makefile
570 interface of a real socket. It only supports modes 'r' and
571 'rb' and the bufsize argument is ignored.
572
Tim Peters5ceadc82001-01-13 19:16:21 +0000573 The returned object contains *all* of the file data
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000574 """
Greg Steindd6eefb2000-07-18 09:09:48 +0000575 if mode != 'r' and mode != 'rb':
576 raise UnimplementedFileMode()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000577
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000578 msgbuf = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000579 while 1:
580 try:
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000581 buf = self.__ssl.read()
Greg Steindd6eefb2000-07-18 09:09:48 +0000582 except socket.sslerror, msg:
583 break
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000584 if buf == '':
585 break
586 msgbuf.append(buf)
587 return StringIO("".join(msgbuf))
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000588
Greg Steindd6eefb2000-07-18 09:09:48 +0000589 def send(self, stuff, flags = 0):
590 return self.__ssl.write(stuff)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000591
Greg Steindd6eefb2000-07-18 09:09:48 +0000592 def recv(self, len = 1024, flags = 0):
593 return self.__ssl.read(len)
Guido van Rossum23acc951994-02-21 16:36:04 +0000594
Greg Steindd6eefb2000-07-18 09:09:48 +0000595 def __getattr__(self, attr):
596 return getattr(self.__sock, attr)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000597
Guido van Rossum23acc951994-02-21 16:36:04 +0000598
Greg Stein5e0fa402000-06-26 08:28:01 +0000599class HTTPSConnection(HTTPConnection):
Greg Steindd6eefb2000-07-18 09:09:48 +0000600 "This class allows communication via SSL."
Greg Stein5e0fa402000-06-26 08:28:01 +0000601
Greg Steindd6eefb2000-07-18 09:09:48 +0000602 default_port = HTTPS_PORT
Greg Stein5e0fa402000-06-26 08:28:01 +0000603
Greg Steindd6eefb2000-07-18 09:09:48 +0000604 def __init__(self, host, port=None, **x509):
605 keys = x509.keys()
606 try:
607 keys.remove('key_file')
608 except ValueError:
609 pass
610 try:
611 keys.remove('cert_file')
612 except ValueError:
613 pass
614 if keys:
615 raise IllegalKeywordArgument()
616 HTTPConnection.__init__(self, host, port)
617 self.key_file = x509.get('key_file')
618 self.cert_file = x509.get('cert_file')
Greg Stein5e0fa402000-06-26 08:28:01 +0000619
Greg Steindd6eefb2000-07-18 09:09:48 +0000620 def connect(self):
621 "Connect to a host on a given (SSL) port."
Greg Stein5e0fa402000-06-26 08:28:01 +0000622
Greg Steindd6eefb2000-07-18 09:09:48 +0000623 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
624 sock.connect((self.host, self.port))
Guido van Rossum0aee7222000-12-11 20:32:20 +0000625 realsock = sock
626 if hasattr(sock, "_sock"):
627 realsock = sock._sock
628 ssl = socket.ssl(realsock, self.key_file, self.cert_file)
Greg Steindd6eefb2000-07-18 09:09:48 +0000629 self.sock = FakeSocket(sock, ssl)
Greg Stein5e0fa402000-06-26 08:28:01 +0000630
631
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000632class HTTP:
Greg Steindd6eefb2000-07-18 09:09:48 +0000633 "Compatibility class with httplib.py from 1.5."
Greg Stein5e0fa402000-06-26 08:28:01 +0000634
Greg Steindd6eefb2000-07-18 09:09:48 +0000635 _http_vsn = 10
636 _http_vsn_str = 'HTTP/1.0'
Greg Stein5e0fa402000-06-26 08:28:01 +0000637
Greg Steindd6eefb2000-07-18 09:09:48 +0000638 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000639
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000640 _connection_class = HTTPConnection
641
Greg Steindd6eefb2000-07-18 09:09:48 +0000642 def __init__(self, host='', port=None, **x509):
643 "Provide a default host, since the superclass requires one."
Greg Stein5e0fa402000-06-26 08:28:01 +0000644
Greg Steindd6eefb2000-07-18 09:09:48 +0000645 # some joker passed 0 explicitly, meaning default port
646 if port == 0:
647 port = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000648
Greg Steindd6eefb2000-07-18 09:09:48 +0000649 # Note that we may pass an empty string as the host; this will throw
650 # an error when we attempt to connect. Presumably, the client code
651 # will call connect before then, with a proper host.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000652 self._conn = self._connection_class(host, port)
653 # set up delegation to flesh out interface
654 self.send = self._conn.send
655 self.putrequest = self._conn.putrequest
656 self.endheaders = self._conn.endheaders
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000657 self._conn._http_vsn = self._http_vsn
658 self._conn._http_vsn_str = self._http_vsn_str
Greg Stein5e0fa402000-06-26 08:28:01 +0000659
Greg Steindd6eefb2000-07-18 09:09:48 +0000660 # we never actually use these for anything, but we keep them here for
661 # compatibility with post-1.5.2 CVS.
662 self.key_file = x509.get('key_file')
663 self.cert_file = x509.get('cert_file')
Greg Stein5e0fa402000-06-26 08:28:01 +0000664
Greg Steindd6eefb2000-07-18 09:09:48 +0000665 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000666
Greg Steindd6eefb2000-07-18 09:09:48 +0000667 def connect(self, host=None, port=None):
668 "Accept arguments to set the host/port, since the superclass doesn't."
Greg Stein5e0fa402000-06-26 08:28:01 +0000669
Greg Steindd6eefb2000-07-18 09:09:48 +0000670 if host is not None:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000671 self._conn._set_hostport(host, port)
672 self._conn.connect()
Greg Stein5e0fa402000-06-26 08:28:01 +0000673
Greg Steindd6eefb2000-07-18 09:09:48 +0000674 def set_debuglevel(self, debuglevel):
Jeremy Hylton30f86742000-09-18 22:50:38 +0000675 self._conn.set_debuglevel(debuglevel)
Greg Stein5e0fa402000-06-26 08:28:01 +0000676
Greg Steindd6eefb2000-07-18 09:09:48 +0000677 def getfile(self):
678 "Provide a getfile, since the superclass' does not use this concept."
679 return self.file
Greg Stein5e0fa402000-06-26 08:28:01 +0000680
Greg Steindd6eefb2000-07-18 09:09:48 +0000681 def putheader(self, header, *values):
682 "The superclass allows only one value argument."
Guido van Rossum34735a62000-12-15 15:09:42 +0000683 self._conn.putheader(header, '\r\n\t'.join(values))
Greg Stein5e0fa402000-06-26 08:28:01 +0000684
Greg Steindd6eefb2000-07-18 09:09:48 +0000685 def getreply(self):
686 """Compat definition since superclass does not define it.
Greg Stein5e0fa402000-06-26 08:28:01 +0000687
Greg Steindd6eefb2000-07-18 09:09:48 +0000688 Returns a tuple consisting of:
689 - server status code (e.g. '200' if all goes well)
690 - server "reason" corresponding to status code
691 - any RFC822 headers in the response from the server
692 """
693 try:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000694 response = self._conn.getresponse()
Greg Steindd6eefb2000-07-18 09:09:48 +0000695 except BadStatusLine, e:
696 ### hmm. if getresponse() ever closes the socket on a bad request,
697 ### then we are going to have problems with self.sock
Greg Stein5e0fa402000-06-26 08:28:01 +0000698
Greg Steindd6eefb2000-07-18 09:09:48 +0000699 ### should we keep this behavior? do people use it?
700 # keep the socket open (as a file), and return it
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000701 self.file = self._conn.sock.makefile('rb', 0)
Greg Stein5e0fa402000-06-26 08:28:01 +0000702
Greg Steindd6eefb2000-07-18 09:09:48 +0000703 # close our socket -- we want to restart after any protocol error
704 self.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000705
Greg Steindd6eefb2000-07-18 09:09:48 +0000706 self.headers = None
707 return -1, e.line, None
Greg Stein5e0fa402000-06-26 08:28:01 +0000708
Greg Steindd6eefb2000-07-18 09:09:48 +0000709 self.headers = response.msg
710 self.file = response.fp
711 return response.status, response.reason, response.msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000712
Greg Steindd6eefb2000-07-18 09:09:48 +0000713 def close(self):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000714 self._conn.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000715
Greg Steindd6eefb2000-07-18 09:09:48 +0000716 # note that self.file == response.fp, which gets closed by the
717 # superclass. just clear the object ref here.
718 ### hmm. messy. if status==-1, then self.file is owned by us.
719 ### well... we aren't explicitly closing, but losing this ref will
720 ### do it
721 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000722
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000723if hasattr(socket, 'ssl'):
724 class HTTPS(HTTP):
725 """Compatibility with 1.5 httplib interface
726
727 Python 1.5.2 did not have an HTTPS class, but it defined an
728 interface for sending http requests that is also useful for
Tim Peters5ceadc82001-01-13 19:16:21 +0000729 https.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000730 """
731
Martin v. Löwisd7bf9742000-09-21 22:09:47 +0000732 _connection_class = HTTPSConnection
Tim Peters5ceadc82001-01-13 19:16:21 +0000733
Greg Stein5e0fa402000-06-26 08:28:01 +0000734
735class HTTPException(Exception):
Greg Steindd6eefb2000-07-18 09:09:48 +0000736 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000737
738class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000739 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000740
741class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000742 def __init__(self, version):
743 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +0000744
745class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000746 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000747
748class IllegalKeywordArgument(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000749 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000750
751class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000752 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000753
754class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000755 def __init__(self, partial):
756 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +0000757
758class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000759 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000760
761class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000762 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000763
764class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000765 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000766
767class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000768 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000769
770class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000771 def __init__(self, line):
772 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +0000773
774# for backwards compatibility
775error = HTTPException
776
777
778#
779# snarfed from httplib.py for now...
780#
Guido van Rossum23acc951994-02-21 16:36:04 +0000781def test():
Guido van Rossum41999c11997-12-09 00:12:23 +0000782 """Test this module.
783
784 The test consists of retrieving and displaying the Python
785 home page, along with the error code and error string returned
786 by the www.python.org server.
Guido van Rossum41999c11997-12-09 00:12:23 +0000787 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000788
Guido van Rossum41999c11997-12-09 00:12:23 +0000789 import sys
790 import getopt
791 opts, args = getopt.getopt(sys.argv[1:], 'd')
792 dl = 0
793 for o, a in opts:
794 if o == '-d': dl = dl + 1
795 host = 'www.python.org'
796 selector = '/'
797 if args[0:]: host = args[0]
798 if args[1:]: selector = args[1]
799 h = HTTP()
800 h.set_debuglevel(dl)
801 h.connect(host)
802 h.putrequest('GET', selector)
803 h.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000804 status, reason, headers = h.getreply()
805 print 'status =', status
806 print 'reason =', reason
Guido van Rossum41999c11997-12-09 00:12:23 +0000807 print
808 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +0000809 for header in headers.headers: print header.strip()
Guido van Rossum41999c11997-12-09 00:12:23 +0000810 print
811 print h.getfile().read()
Greg Stein5e0fa402000-06-26 08:28:01 +0000812
813 if hasattr(socket, 'ssl'):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000814 host = 'sourceforge.net'
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000815 selector = '/projects/python'
Greg Steindd6eefb2000-07-18 09:09:48 +0000816 hs = HTTPS()
817 hs.connect(host)
818 hs.putrequest('GET', selector)
819 hs.endheaders()
820 status, reason, headers = hs.getreply()
821 print 'status =', status
822 print 'reason =', reason
823 print
824 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +0000825 for header in headers.headers: print header.strip()
Greg Steindd6eefb2000-07-18 09:09:48 +0000826 print
827 print hs.getfile().read()
Guido van Rossum23acc951994-02-21 16:36:04 +0000828
Guido van Rossuma0dfc7a1995-09-07 19:28:19 +0000829
Guido van Rossum23acc951994-02-21 16:36:04 +0000830if __name__ == '__main__':
Guido van Rossum41999c11997-12-09 00:12:23 +0000831 test()