blob: 0fd6ab4449da93af401a7e957bcd5887a0e81614 [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Greg Stein5e0fa402000-06-26 08:28:01 +00006HTTPConnection go through a number of "states", which defines when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000069import errno
Guido van Rossum65ab98c1995-08-07 20:13:02 +000070import mimetools
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000071import socket
Guido van Rossum23acc951994-02-21 16:36:04 +000072
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000073try:
Greg Steindd6eefb2000-07-18 09:09:48 +000074 from cStringIO import StringIO
Greg Stein5e0fa402000-06-26 08:28:01 +000075except ImportError:
Greg Steindd6eefb2000-07-18 09:09:48 +000076 from StringIO import StringIO
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000077
Skip Montanaro951a8842001-06-01 16:25:38 +000078__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
79 "HTTPException", "NotConnected", "UnknownProtocol",
80 "UnknownTransferEncoding", "IllegalKeywordArgument",
81 "UnimplementedFileMode", "IncompleteRead",
82 "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader",
83 "ResponseNotReady", "BadStatusLine", "error"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000084
Guido van Rossum23acc951994-02-21 16:36:04 +000085HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000086HTTPS_PORT = 443
87
Greg Stein5e0fa402000-06-26 08:28:01 +000088_UNKNOWN = 'UNKNOWN'
89
90# connection states
91_CS_IDLE = 'Idle'
92_CS_REQ_STARTED = 'Request-started'
93_CS_REQ_SENT = 'Request-sent'
94
95
96class HTTPResponse:
Jeremy Hylton30f86742000-09-18 22:50:38 +000097 def __init__(self, sock, debuglevel=0):
Greg Steindd6eefb2000-07-18 09:09:48 +000098 self.fp = sock.makefile('rb', 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +000099 self.debuglevel = debuglevel
Greg Stein5e0fa402000-06-26 08:28:01 +0000100
Greg Steindd6eefb2000-07-18 09:09:48 +0000101 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000102
Greg Steindd6eefb2000-07-18 09:09:48 +0000103 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000104 self.version = _UNKNOWN # HTTP-Version
105 self.status = _UNKNOWN # Status-Code
106 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000107
Tim Peters07e99cb2001-01-14 23:47:14 +0000108 self.chunked = _UNKNOWN # is "chunked" being used?
109 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
110 self.length = _UNKNOWN # number of bytes left in response
111 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000112
Greg Steindd6eefb2000-07-18 09:09:48 +0000113 def begin(self):
114 if self.msg is not None:
115 # we've already started reading the response
116 return
Greg Stein5e0fa402000-06-26 08:28:01 +0000117
Greg Stein5e0fa402000-06-26 08:28:01 +0000118 line = self.fp.readline()
Jeremy Hylton30f86742000-09-18 22:50:38 +0000119 if self.debuglevel > 0:
120 print "reply:", repr(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000121 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000122 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000123 except ValueError:
124 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000125 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000126 reason = ""
127 except ValueError:
Jeremy Hylton110941a2000-10-12 19:58:36 +0000128 version = "HTTP/0.9"
129 status = "200"
130 reason = ""
Greg Steindd6eefb2000-07-18 09:09:48 +0000131 if version[:5] != 'HTTP/':
132 self.close()
133 raise BadStatusLine(line)
Greg Stein5e0fa402000-06-26 08:28:01 +0000134
Jeremy Hylton23d40472001-04-13 14:57:08 +0000135 # The status code is a three-digit number
136 try:
137 self.status = status = int(status)
138 if status < 100 or status > 999:
139 raise BadStatusLine(line)
140 except ValueError:
141 raise BadStatusLine(line)
Guido van Rossum34735a62000-12-15 15:09:42 +0000142 self.reason = reason.strip()
Greg Stein5e0fa402000-06-26 08:28:01 +0000143
Greg Steindd6eefb2000-07-18 09:09:48 +0000144 if version == 'HTTP/1.0':
145 self.version = 10
Jeremy Hylton110941a2000-10-12 19:58:36 +0000146 elif version.startswith('HTTP/1.'):
Tim Peters07e99cb2001-01-14 23:47:14 +0000147 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hylton110941a2000-10-12 19:58:36 +0000148 elif version == 'HTTP/0.9':
149 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000150 else:
151 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000152
Jeremy Hylton110941a2000-10-12 19:58:36 +0000153 if self.version == 9:
154 self.msg = mimetools.Message(StringIO())
155 return
156
Greg Steindd6eefb2000-07-18 09:09:48 +0000157 self.msg = mimetools.Message(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000158 if self.debuglevel > 0:
159 for hdr in self.msg.headers:
160 print "header:", hdr,
Greg Stein5e0fa402000-06-26 08:28:01 +0000161
Greg Steindd6eefb2000-07-18 09:09:48 +0000162 # don't let the msg keep an fp
163 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000164
Greg Steindd6eefb2000-07-18 09:09:48 +0000165 # are we using the chunked-style of transfer encoding?
166 tr_enc = self.msg.getheader('transfer-encoding')
167 if tr_enc:
Guido van Rossum34735a62000-12-15 15:09:42 +0000168 if tr_enc.lower() != 'chunked':
Greg Steindd6eefb2000-07-18 09:09:48 +0000169 raise UnknownTransferEncoding()
170 self.chunked = 1
171 self.chunk_left = None
172 else:
173 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000174
Greg Steindd6eefb2000-07-18 09:09:48 +0000175 # will the connection close at the end of the response?
176 conn = self.msg.getheader('connection')
177 if conn:
Guido van Rossum34735a62000-12-15 15:09:42 +0000178 conn = conn.lower()
Greg Steindd6eefb2000-07-18 09:09:48 +0000179 # a "Connection: close" will always close the connection. if we
180 # don't see that and this is not HTTP/1.1, then the connection will
181 # close unless we see a Keep-Alive header.
Guido van Rossum34735a62000-12-15 15:09:42 +0000182 self.will_close = conn.find('close') != -1 or \
Greg Steindd6eefb2000-07-18 09:09:48 +0000183 ( self.version != 11 and \
184 not self.msg.getheader('keep-alive') )
185 else:
186 # for HTTP/1.1, the connection will always remain open
187 # otherwise, it will remain open IFF we see a Keep-Alive header
188 self.will_close = self.version != 11 and \
189 not self.msg.getheader('keep-alive')
Greg Stein5e0fa402000-06-26 08:28:01 +0000190
Greg Steindd6eefb2000-07-18 09:09:48 +0000191 # do we have a Content-Length?
192 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
193 length = self.msg.getheader('content-length')
194 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000195 try:
196 self.length = int(length)
197 except ValueError:
198 self.length = None
Greg Steindd6eefb2000-07-18 09:09:48 +0000199 else:
200 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000201
Greg Steindd6eefb2000-07-18 09:09:48 +0000202 # does the body have a fixed length? (of zero)
Tim Peters07e99cb2001-01-14 23:47:14 +0000203 if (status == 204 or # No Content
204 status == 304 or # Not Modified
205 100 <= status < 200): # 1xx codes
Greg Steindd6eefb2000-07-18 09:09:48 +0000206 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000207
Greg Steindd6eefb2000-07-18 09:09:48 +0000208 # if the connection remains open, and we aren't using chunked, and
209 # a content-length was not provided, then assume that the connection
210 # WILL close.
211 if not self.will_close and \
212 not self.chunked and \
213 self.length is None:
214 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000215
Greg Steindd6eefb2000-07-18 09:09:48 +0000216 def close(self):
217 if self.fp:
218 self.fp.close()
219 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000220
Greg Steindd6eefb2000-07-18 09:09:48 +0000221 def isclosed(self):
222 # NOTE: it is possible that we will not ever call self.close(). This
223 # case occurs when will_close is TRUE, length is None, and we
224 # read up to the last byte, but NOT past it.
225 #
226 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
227 # called, meaning self.isclosed() is meaningful.
228 return self.fp is None
229
230 def read(self, amt=None):
231 if self.fp is None:
232 return ''
233
234 if self.chunked:
235 chunk_left = self.chunk_left
236 value = ''
237 while 1:
238 if chunk_left is None:
239 line = self.fp.readline()
Guido van Rossum34735a62000-12-15 15:09:42 +0000240 i = line.find(';')
Greg Steindd6eefb2000-07-18 09:09:48 +0000241 if i >= 0:
Tim Peters07e99cb2001-01-14 23:47:14 +0000242 line = line[:i] # strip chunk-extensions
Guido van Rossum34735a62000-12-15 15:09:42 +0000243 chunk_left = int(line, 16)
Greg Steindd6eefb2000-07-18 09:09:48 +0000244 if chunk_left == 0:
245 break
246 if amt is None:
247 value = value + self._safe_read(chunk_left)
248 elif amt < chunk_left:
249 value = value + self._safe_read(amt)
250 self.chunk_left = chunk_left - amt
251 return value
252 elif amt == chunk_left:
253 value = value + self._safe_read(amt)
Tim Peters07e99cb2001-01-14 23:47:14 +0000254 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000255 self.chunk_left = None
256 return value
257 else:
258 value = value + self._safe_read(chunk_left)
259 amt = amt - chunk_left
260
261 # we read the whole chunk, get another
Tim Peters07e99cb2001-01-14 23:47:14 +0000262 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000263 chunk_left = None
264
265 # read and discard trailer up to the CRLF terminator
266 ### note: we shouldn't have any trailers!
267 while 1:
268 line = self.fp.readline()
269 if line == '\r\n':
270 break
271
272 # we read everything; close the "file"
273 self.close()
274
275 return value
276
277 elif amt is None:
278 # unbounded read
279 if self.will_close:
280 s = self.fp.read()
281 else:
282 s = self._safe_read(self.length)
Tim Peters07e99cb2001-01-14 23:47:14 +0000283 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000284 return s
285
286 if self.length is not None:
287 if amt > self.length:
288 # clip the read to the "end of response"
289 amt = self.length
290 self.length = self.length - amt
291
292 # we do not use _safe_read() here because this may be a .will_close
293 # connection, and the user is reading more bytes than will be provided
294 # (for example, reading in 1k chunks)
295 s = self.fp.read(amt)
296
Greg Steindd6eefb2000-07-18 09:09:48 +0000297 return s
298
299 def _safe_read(self, amt):
300 """Read the number of bytes requested, compensating for partial reads.
301
302 Normally, we have a blocking socket, but a read() can be interrupted
303 by a signal (resulting in a partial read).
304
305 Note that we cannot distinguish between EOF and an interrupt when zero
306 bytes have been read. IncompleteRead() will be raised in this
307 situation.
308
309 This function should be used when <amt> bytes "should" be present for
310 reading. If the bytes are truly not available (due to EOF), then the
311 IncompleteRead exception can be used to detect the problem.
312 """
313 s = ''
314 while amt > 0:
315 chunk = self.fp.read(amt)
316 if not chunk:
317 raise IncompleteRead(s)
318 s = s + chunk
319 amt = amt - len(chunk)
320 return s
321
322 def getheader(self, name, default=None):
323 if self.msg is None:
324 raise ResponseNotReady()
325 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000326
327
328class HTTPConnection:
329
Greg Steindd6eefb2000-07-18 09:09:48 +0000330 _http_vsn = 11
331 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000332
Greg Steindd6eefb2000-07-18 09:09:48 +0000333 response_class = HTTPResponse
334 default_port = HTTP_PORT
335 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000336 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000337
Greg Steindd6eefb2000-07-18 09:09:48 +0000338 def __init__(self, host, port=None):
339 self.sock = None
340 self.__response = None
341 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000342
Greg Steindd6eefb2000-07-18 09:09:48 +0000343 self._set_hostport(host, port)
Greg Stein5e0fa402000-06-26 08:28:01 +0000344
Greg Steindd6eefb2000-07-18 09:09:48 +0000345 def _set_hostport(self, host, port):
346 if port is None:
Guido van Rossum34735a62000-12-15 15:09:42 +0000347 i = host.find(':')
Greg Steindd6eefb2000-07-18 09:09:48 +0000348 if i >= 0:
Guido van Rossumfd97a912001-01-15 14:34:20 +0000349 port = int(host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000350 host = host[:i]
351 else:
352 port = self.default_port
353 self.host = host
354 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000355
Jeremy Hylton30f86742000-09-18 22:50:38 +0000356 def set_debuglevel(self, level):
357 self.debuglevel = level
358
Greg Steindd6eefb2000-07-18 09:09:48 +0000359 def connect(self):
360 """Connect to the host and port specified in __init__."""
Martin v. Löwis2ad25692001-07-31 08:40:21 +0000361 msg = "getaddrinfo returns an empty list"
Martin v. Löwis4eb59402001-07-26 13:37:33 +0000362 for res in socket.getaddrinfo(self.host, self.port, 0, socket.SOCK_STREAM):
363 af, socktype, proto, canonname, sa = res
364 try:
365 self.sock = socket.socket(af, socktype, proto)
366 if self.debuglevel > 0:
367 print "connect: (%s, %s)" % (self.host, self.port)
368 self.sock.connect(sa)
369 except socket.error, msg:
370 if self.debuglevel > 0:
371 print 'connect fail:', (self.host, self.port)
Martin v. Löwis322c0d12001-10-07 08:53:32 +0000372 if self.sock:
373 self.sock.close()
Martin v. Löwis4eb59402001-07-26 13:37:33 +0000374 self.sock = None
375 continue
376 break
377 if not self.sock:
378 raise socket.error, msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000379
Greg Steindd6eefb2000-07-18 09:09:48 +0000380 def close(self):
381 """Close the connection to the HTTP server."""
382 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000383 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000384 self.sock = None
385 if self.__response:
386 self.__response.close()
387 self.__response = None
388 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000389
Greg Steindd6eefb2000-07-18 09:09:48 +0000390 def send(self, str):
391 """Send `str' to the server."""
392 if self.sock is None:
393 if self.auto_open:
394 self.connect()
395 else:
396 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000397
Greg Steindd6eefb2000-07-18 09:09:48 +0000398 # send the data to the server. if we get a broken pipe, then close
399 # the socket. we want to reconnect when somebody tries to send again.
400 #
401 # NOTE: we DO propagate the error, though, because we cannot simply
402 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000403 if self.debuglevel > 0:
404 print "send:", repr(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000405 try:
406 self.sock.send(str)
407 except socket.error, v:
Tim Peters07e99cb2001-01-14 23:47:14 +0000408 if v[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000409 self.close()
410 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000411
Greg Steindd6eefb2000-07-18 09:09:48 +0000412 def putrequest(self, method, url):
413 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000414
Greg Steindd6eefb2000-07-18 09:09:48 +0000415 `method' specifies an HTTP request method, e.g. 'GET'.
416 `url' specifies the object being requested, e.g. '/index.html'.
417 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000418
Greg Steindd6eefb2000-07-18 09:09:48 +0000419 # check if a prior response has been completed
420 if self.__response and self.__response.isclosed():
421 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000422
Greg Steindd6eefb2000-07-18 09:09:48 +0000423 #
424 # in certain cases, we cannot issue another request on this connection.
425 # this occurs when:
426 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
427 # 2) a response to a previous request has signalled that it is going
428 # to close the connection upon completion.
429 # 3) the headers for the previous response have not been read, thus
430 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
431 #
432 # if there is no prior response, then we can request at will.
433 #
434 # if point (2) is true, then we will have passed the socket to the
435 # response (effectively meaning, "there is no prior response"), and
436 # will open a new one when a new request is made.
437 #
438 # Note: if a prior response exists, then we *can* start a new request.
439 # We are not allowed to begin fetching the response to this new
440 # request, however, until that prior response is complete.
441 #
442 if self.__state == _CS_IDLE:
443 self.__state = _CS_REQ_STARTED
444 else:
445 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000446
Greg Steindd6eefb2000-07-18 09:09:48 +0000447 if not url:
448 url = '/'
449 str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000450
Greg Steindd6eefb2000-07-18 09:09:48 +0000451 try:
452 self.send(str)
453 except socket.error, v:
454 # trap 'Broken pipe' if we're allowed to automatically reconnect
455 if v[0] != 32 or not self.auto_open:
456 raise
457 # try one more time (the socket was closed; this will reopen)
458 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000459
Greg Steindd6eefb2000-07-18 09:09:48 +0000460 if self._http_vsn == 11:
461 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000462
Greg Steindd6eefb2000-07-18 09:09:48 +0000463 # this header is issued *only* for HTTP/1.1 connections. more
464 # specifically, this means it is only issued when the client uses
465 # the new HTTPConnection() class. backwards-compat clients will
466 # be using HTTP/1.0 and those clients may be issuing this header
467 # themselves. we should NOT issue it twice; some web servers (such
468 # as Apache) barf when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000469
470 # if we need a non-standard port,include it in the header
471 if self.port == HTTP_PORT:
472 self.putheader('Host', self.host)
473 else:
474 self.putheader('Host', "%s:%s" % (self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000475
Greg Steindd6eefb2000-07-18 09:09:48 +0000476 # note: we are assuming that clients will not attempt to set these
477 # headers since *this* library must deal with the
478 # consequences. this also means that when the supporting
479 # libraries are updated to recognize other forms, then this
480 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000481
Greg Steindd6eefb2000-07-18 09:09:48 +0000482 # we only want a Content-Encoding of "identity" since we don't
483 # support encodings such as x-gzip or x-deflate.
484 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000485
Greg Steindd6eefb2000-07-18 09:09:48 +0000486 # we can accept "chunked" Transfer-Encodings, but no others
487 # NOTE: no TE header implies *only* "chunked"
488 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000489
Greg Steindd6eefb2000-07-18 09:09:48 +0000490 # if TE is supplied in the header, then it must appear in a
491 # Connection header.
492 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000493
Greg Steindd6eefb2000-07-18 09:09:48 +0000494 else:
495 # For HTTP/1.0, the server will assume "not chunked"
496 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000497
Greg Steindd6eefb2000-07-18 09:09:48 +0000498 def putheader(self, header, value):
499 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000500
Greg Steindd6eefb2000-07-18 09:09:48 +0000501 For example: h.putheader('Accept', 'text/html')
502 """
503 if self.__state != _CS_REQ_STARTED:
504 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000505
Greg Steindd6eefb2000-07-18 09:09:48 +0000506 str = '%s: %s\r\n' % (header, value)
507 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000508
Greg Steindd6eefb2000-07-18 09:09:48 +0000509 def endheaders(self):
510 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000511
Greg Steindd6eefb2000-07-18 09:09:48 +0000512 if self.__state == _CS_REQ_STARTED:
513 self.__state = _CS_REQ_SENT
514 else:
515 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000516
Greg Steindd6eefb2000-07-18 09:09:48 +0000517 self.send('\r\n')
Greg Stein5e0fa402000-06-26 08:28:01 +0000518
Greg Steindd6eefb2000-07-18 09:09:48 +0000519 def request(self, method, url, body=None, headers={}):
520 """Send a complete request to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000521
Greg Steindd6eefb2000-07-18 09:09:48 +0000522 try:
523 self._send_request(method, url, body, headers)
524 except socket.error, v:
525 # trap 'Broken pipe' if we're allowed to automatically reconnect
526 if v[0] != 32 or not self.auto_open:
527 raise
528 # try one more time
529 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000530
Greg Steindd6eefb2000-07-18 09:09:48 +0000531 def _send_request(self, method, url, body, headers):
532 self.putrequest(method, url)
Greg Stein5e0fa402000-06-26 08:28:01 +0000533
Greg Steindd6eefb2000-07-18 09:09:48 +0000534 if body:
535 self.putheader('Content-Length', str(len(body)))
536 for hdr, value in headers.items():
537 self.putheader(hdr, value)
538 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000539
Greg Steindd6eefb2000-07-18 09:09:48 +0000540 if body:
541 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000542
Greg Steindd6eefb2000-07-18 09:09:48 +0000543 def getresponse(self):
544 "Get the response from the server."
Greg Stein5e0fa402000-06-26 08:28:01 +0000545
Greg Steindd6eefb2000-07-18 09:09:48 +0000546 # check if a prior response has been completed
547 if self.__response and self.__response.isclosed():
548 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000549
Greg Steindd6eefb2000-07-18 09:09:48 +0000550 #
551 # if a prior response exists, then it must be completed (otherwise, we
552 # cannot read this response's header to determine the connection-close
553 # behavior)
554 #
555 # note: if a prior response existed, but was connection-close, then the
556 # socket and response were made independent of this HTTPConnection
557 # object since a new request requires that we open a whole new
558 # connection
559 #
560 # this means the prior response had one of two states:
561 # 1) will_close: this connection was reset and the prior socket and
562 # response operate independently
563 # 2) persistent: the response was retained and we await its
564 # isclosed() status to become true.
565 #
566 if self.__state != _CS_REQ_SENT or self.__response:
567 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000568
Jeremy Hylton30f86742000-09-18 22:50:38 +0000569 if self.debuglevel > 0:
570 response = self.response_class(self.sock, self.debuglevel)
571 else:
572 response = self.response_class(self.sock)
Greg Stein5e0fa402000-06-26 08:28:01 +0000573
Greg Steindd6eefb2000-07-18 09:09:48 +0000574 response.begin()
575 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000576
Greg Steindd6eefb2000-07-18 09:09:48 +0000577 if response.will_close:
578 # this effectively passes the connection to the response
579 self.close()
580 else:
581 # remember this, so we can tell when it is complete
582 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000583
Greg Steindd6eefb2000-07-18 09:09:48 +0000584 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000585
586
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000587class FakeSocket:
Greg Steindd6eefb2000-07-18 09:09:48 +0000588 def __init__(self, sock, ssl):
589 self.__sock = sock
590 self.__ssl = ssl
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000591
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000592 def makefile(self, mode, bufsize=None):
593 """Return a readable file-like object with data from socket.
594
595 This method offers only partial support for the makefile
596 interface of a real socket. It only supports modes 'r' and
597 'rb' and the bufsize argument is ignored.
598
Tim Peters5ceadc82001-01-13 19:16:21 +0000599 The returned object contains *all* of the file data
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000600 """
Greg Steindd6eefb2000-07-18 09:09:48 +0000601 if mode != 'r' and mode != 'rb':
602 raise UnimplementedFileMode()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000603
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000604 msgbuf = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000605 while 1:
606 try:
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000607 buf = self.__ssl.read()
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000608 except socket.sslerror, err:
609 if (err[0] == socket.SSL_ERROR_WANT_READ
610 or err[0] == socket.SSL_ERROR_WANT_WRITE
611 or 0):
612 continue
613 if err[0] == socket.SSL_ERROR_ZERO_RETURN:
614 break
615 raise
616 except socket.error, err:
Tim Petersf3623f32001-10-11 18:15:51 +0000617 if err[0] == errno.EINTR:
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000618 continue
619 raise
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000620 if buf == '':
621 break
622 msgbuf.append(buf)
623 return StringIO("".join(msgbuf))
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000624
Greg Steindd6eefb2000-07-18 09:09:48 +0000625 def send(self, stuff, flags = 0):
626 return self.__ssl.write(stuff)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000627
Greg Steindd6eefb2000-07-18 09:09:48 +0000628 def recv(self, len = 1024, flags = 0):
629 return self.__ssl.read(len)
Guido van Rossum23acc951994-02-21 16:36:04 +0000630
Greg Steindd6eefb2000-07-18 09:09:48 +0000631 def __getattr__(self, attr):
632 return getattr(self.__sock, attr)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000633
Guido van Rossum23acc951994-02-21 16:36:04 +0000634
Greg Stein5e0fa402000-06-26 08:28:01 +0000635class HTTPSConnection(HTTPConnection):
Greg Steindd6eefb2000-07-18 09:09:48 +0000636 "This class allows communication via SSL."
Greg Stein5e0fa402000-06-26 08:28:01 +0000637
Greg Steindd6eefb2000-07-18 09:09:48 +0000638 default_port = HTTPS_PORT
Greg Stein5e0fa402000-06-26 08:28:01 +0000639
Greg Steindd6eefb2000-07-18 09:09:48 +0000640 def __init__(self, host, port=None, **x509):
641 keys = x509.keys()
642 try:
643 keys.remove('key_file')
644 except ValueError:
645 pass
646 try:
647 keys.remove('cert_file')
648 except ValueError:
649 pass
650 if keys:
651 raise IllegalKeywordArgument()
652 HTTPConnection.__init__(self, host, port)
653 self.key_file = x509.get('key_file')
654 self.cert_file = x509.get('cert_file')
Greg Stein5e0fa402000-06-26 08:28:01 +0000655
Greg Steindd6eefb2000-07-18 09:09:48 +0000656 def connect(self):
657 "Connect to a host on a given (SSL) port."
Greg Stein5e0fa402000-06-26 08:28:01 +0000658
Greg Steindd6eefb2000-07-18 09:09:48 +0000659 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
660 sock.connect((self.host, self.port))
Guido van Rossum0aee7222000-12-11 20:32:20 +0000661 realsock = sock
662 if hasattr(sock, "_sock"):
663 realsock = sock._sock
664 ssl = socket.ssl(realsock, self.key_file, self.cert_file)
Greg Steindd6eefb2000-07-18 09:09:48 +0000665 self.sock = FakeSocket(sock, ssl)
Greg Stein5e0fa402000-06-26 08:28:01 +0000666
667
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000668class HTTP:
Greg Steindd6eefb2000-07-18 09:09:48 +0000669 "Compatibility class with httplib.py from 1.5."
Greg Stein5e0fa402000-06-26 08:28:01 +0000670
Greg Steindd6eefb2000-07-18 09:09:48 +0000671 _http_vsn = 10
672 _http_vsn_str = 'HTTP/1.0'
Greg Stein5e0fa402000-06-26 08:28:01 +0000673
Greg Steindd6eefb2000-07-18 09:09:48 +0000674 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000675
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000676 _connection_class = HTTPConnection
677
Greg Stein81937a42001-08-18 09:20:23 +0000678 def __init__(self, host='', port=None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000679 "Provide a default host, since the superclass requires one."
Greg Stein5e0fa402000-06-26 08:28:01 +0000680
Greg Steindd6eefb2000-07-18 09:09:48 +0000681 # some joker passed 0 explicitly, meaning default port
682 if port == 0:
683 port = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000684
Greg Steindd6eefb2000-07-18 09:09:48 +0000685 # Note that we may pass an empty string as the host; this will throw
686 # an error when we attempt to connect. Presumably, the client code
687 # will call connect before then, with a proper host.
Greg Stein81937a42001-08-18 09:20:23 +0000688 self._setup(self._connection_class(host, port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000689
Greg Stein81937a42001-08-18 09:20:23 +0000690 def _setup(self, conn):
691 self._conn = conn
692
693 # set up delegation to flesh out interface
694 self.send = conn.send
695 self.putrequest = conn.putrequest
696 self.endheaders = conn.endheaders
697 self.set_debuglevel = conn.set_debuglevel
698
699 conn._http_vsn = self._http_vsn
700 conn._http_vsn_str = self._http_vsn_str
Greg Stein5e0fa402000-06-26 08:28:01 +0000701
Greg Steindd6eefb2000-07-18 09:09:48 +0000702 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000703
Greg Steindd6eefb2000-07-18 09:09:48 +0000704 def connect(self, host=None, port=None):
705 "Accept arguments to set the host/port, since the superclass doesn't."
Greg Stein5e0fa402000-06-26 08:28:01 +0000706
Greg Steindd6eefb2000-07-18 09:09:48 +0000707 if host is not None:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000708 self._conn._set_hostport(host, port)
709 self._conn.connect()
Greg Stein5e0fa402000-06-26 08:28:01 +0000710
Greg Steindd6eefb2000-07-18 09:09:48 +0000711 def getfile(self):
712 "Provide a getfile, since the superclass' does not use this concept."
713 return self.file
Greg Stein5e0fa402000-06-26 08:28:01 +0000714
Greg Steindd6eefb2000-07-18 09:09:48 +0000715 def putheader(self, header, *values):
716 "The superclass allows only one value argument."
Guido van Rossum34735a62000-12-15 15:09:42 +0000717 self._conn.putheader(header, '\r\n\t'.join(values))
Greg Stein5e0fa402000-06-26 08:28:01 +0000718
Greg Steindd6eefb2000-07-18 09:09:48 +0000719 def getreply(self):
720 """Compat definition since superclass does not define it.
Greg Stein5e0fa402000-06-26 08:28:01 +0000721
Greg Steindd6eefb2000-07-18 09:09:48 +0000722 Returns a tuple consisting of:
723 - server status code (e.g. '200' if all goes well)
724 - server "reason" corresponding to status code
725 - any RFC822 headers in the response from the server
726 """
727 try:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000728 response = self._conn.getresponse()
Greg Steindd6eefb2000-07-18 09:09:48 +0000729 except BadStatusLine, e:
730 ### hmm. if getresponse() ever closes the socket on a bad request,
731 ### then we are going to have problems with self.sock
Greg Stein5e0fa402000-06-26 08:28:01 +0000732
Greg Steindd6eefb2000-07-18 09:09:48 +0000733 ### should we keep this behavior? do people use it?
734 # keep the socket open (as a file), and return it
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000735 self.file = self._conn.sock.makefile('rb', 0)
Greg Stein5e0fa402000-06-26 08:28:01 +0000736
Greg Steindd6eefb2000-07-18 09:09:48 +0000737 # close our socket -- we want to restart after any protocol error
738 self.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000739
Greg Steindd6eefb2000-07-18 09:09:48 +0000740 self.headers = None
741 return -1, e.line, None
Greg Stein5e0fa402000-06-26 08:28:01 +0000742
Greg Steindd6eefb2000-07-18 09:09:48 +0000743 self.headers = response.msg
744 self.file = response.fp
745 return response.status, response.reason, response.msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000746
Greg Steindd6eefb2000-07-18 09:09:48 +0000747 def close(self):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000748 self._conn.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000749
Greg Steindd6eefb2000-07-18 09:09:48 +0000750 # note that self.file == response.fp, which gets closed by the
751 # superclass. just clear the object ref here.
752 ### hmm. messy. if status==-1, then self.file is owned by us.
753 ### well... we aren't explicitly closing, but losing this ref will
754 ### do it
755 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000756
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000757if hasattr(socket, 'ssl'):
758 class HTTPS(HTTP):
759 """Compatibility with 1.5 httplib interface
760
761 Python 1.5.2 did not have an HTTPS class, but it defined an
762 interface for sending http requests that is also useful for
Tim Peters5ceadc82001-01-13 19:16:21 +0000763 https.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000764 """
765
Martin v. Löwisd7bf9742000-09-21 22:09:47 +0000766 _connection_class = HTTPSConnection
Tim Peters5ceadc82001-01-13 19:16:21 +0000767
Greg Stein81937a42001-08-18 09:20:23 +0000768 def __init__(self, host='', port=None, **x509):
769 # provide a default host, pass the X509 cert info
770
771 # urf. compensate for bad input.
772 if port == 0:
773 port = None
774 self._setup(self._connection_class(host, port, **x509))
775
776 # we never actually use these for anything, but we keep them
777 # here for compatibility with post-1.5.2 CVS.
778 self.key_file = x509.get('key_file')
779 self.cert_file = x509.get('cert_file')
780
Greg Stein5e0fa402000-06-26 08:28:01 +0000781
782class HTTPException(Exception):
Greg Steindd6eefb2000-07-18 09:09:48 +0000783 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000784
785class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000786 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000787
788class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000789 def __init__(self, version):
790 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +0000791
792class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000793 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000794
795class IllegalKeywordArgument(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000796 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000797
798class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000799 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000800
801class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000802 def __init__(self, partial):
803 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +0000804
805class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000806 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000807
808class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000809 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000810
811class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000812 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000813
814class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000815 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000816
817class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000818 def __init__(self, line):
819 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +0000820
821# for backwards compatibility
822error = HTTPException
823
824
825#
826# snarfed from httplib.py for now...
827#
Guido van Rossum23acc951994-02-21 16:36:04 +0000828def test():
Guido van Rossum41999c11997-12-09 00:12:23 +0000829 """Test this module.
830
831 The test consists of retrieving and displaying the Python
832 home page, along with the error code and error string returned
833 by the www.python.org server.
Guido van Rossum41999c11997-12-09 00:12:23 +0000834 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000835
Guido van Rossum41999c11997-12-09 00:12:23 +0000836 import sys
837 import getopt
838 opts, args = getopt.getopt(sys.argv[1:], 'd')
839 dl = 0
840 for o, a in opts:
841 if o == '-d': dl = dl + 1
842 host = 'www.python.org'
843 selector = '/'
844 if args[0:]: host = args[0]
845 if args[1:]: selector = args[1]
846 h = HTTP()
847 h.set_debuglevel(dl)
848 h.connect(host)
849 h.putrequest('GET', selector)
850 h.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000851 status, reason, headers = h.getreply()
852 print 'status =', status
853 print 'reason =', reason
Guido van Rossum41999c11997-12-09 00:12:23 +0000854 print
855 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +0000856 for header in headers.headers: print header.strip()
Guido van Rossum41999c11997-12-09 00:12:23 +0000857 print
858 print h.getfile().read()
Greg Stein5e0fa402000-06-26 08:28:01 +0000859
860 if hasattr(socket, 'ssl'):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000861 host = 'sourceforge.net'
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000862 selector = '/projects/python'
Greg Steindd6eefb2000-07-18 09:09:48 +0000863 hs = HTTPS()
864 hs.connect(host)
865 hs.putrequest('GET', selector)
866 hs.endheaders()
867 status, reason, headers = hs.getreply()
868 print 'status =', status
869 print 'reason =', reason
870 print
871 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +0000872 for header in headers.headers: print header.strip()
Greg Steindd6eefb2000-07-18 09:09:48 +0000873 print
874 print hs.getfile().read()
Guido van Rossum23acc951994-02-21 16:36:04 +0000875
Guido van Rossuma0dfc7a1995-09-07 19:28:19 +0000876
Guido van Rossum23acc951994-02-21 16:36:04 +0000877if __name__ == '__main__':
Guido van Rossum41999c11997-12-09 00:12:23 +0000878 test()