blob: fa63787ef6dc44c5242bccc438e5cc67c2d47b69 [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Greg Stein5e0fa402000-06-26 08:28:01 +00006HTTPConnection go through a number of "states", which defines when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Guido van Rossum23acc951994-02-21 16:36:04 +000069import socket
Guido van Rossum65ab98c1995-08-07 20:13:02 +000070import mimetools
Guido van Rossum23acc951994-02-21 16:36:04 +000071
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000072try:
Greg Steindd6eefb2000-07-18 09:09:48 +000073 from cStringIO import StringIO
Greg Stein5e0fa402000-06-26 08:28:01 +000074except ImportError:
Greg Steindd6eefb2000-07-18 09:09:48 +000075 from StringIO import StringIO
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000076
Skip Montanaro951a8842001-06-01 16:25:38 +000077__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
78 "HTTPException", "NotConnected", "UnknownProtocol",
79 "UnknownTransferEncoding", "IllegalKeywordArgument",
80 "UnimplementedFileMode", "IncompleteRead",
81 "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader",
82 "ResponseNotReady", "BadStatusLine", "error"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000083
Guido van Rossum23acc951994-02-21 16:36:04 +000084HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000085HTTPS_PORT = 443
86
Greg Stein5e0fa402000-06-26 08:28:01 +000087_UNKNOWN = 'UNKNOWN'
88
89# connection states
90_CS_IDLE = 'Idle'
91_CS_REQ_STARTED = 'Request-started'
92_CS_REQ_SENT = 'Request-sent'
93
94
95class HTTPResponse:
Jeremy Hylton30f86742000-09-18 22:50:38 +000096 def __init__(self, sock, debuglevel=0):
Greg Steindd6eefb2000-07-18 09:09:48 +000097 self.fp = sock.makefile('rb', 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +000098 self.debuglevel = debuglevel
Greg Stein5e0fa402000-06-26 08:28:01 +000099
Greg Steindd6eefb2000-07-18 09:09:48 +0000100 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000101
Greg Steindd6eefb2000-07-18 09:09:48 +0000102 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000103 self.version = _UNKNOWN # HTTP-Version
104 self.status = _UNKNOWN # Status-Code
105 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000106
Tim Peters07e99cb2001-01-14 23:47:14 +0000107 self.chunked = _UNKNOWN # is "chunked" being used?
108 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
109 self.length = _UNKNOWN # number of bytes left in response
110 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000111
Greg Steindd6eefb2000-07-18 09:09:48 +0000112 def begin(self):
113 if self.msg is not None:
114 # we've already started reading the response
115 return
Greg Stein5e0fa402000-06-26 08:28:01 +0000116
Greg Stein5e0fa402000-06-26 08:28:01 +0000117 line = self.fp.readline()
Jeremy Hylton30f86742000-09-18 22:50:38 +0000118 if self.debuglevel > 0:
119 print "reply:", repr(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000120 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000121 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000122 except ValueError:
123 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000124 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000125 reason = ""
126 except ValueError:
Jeremy Hylton110941a2000-10-12 19:58:36 +0000127 version = "HTTP/0.9"
128 status = "200"
129 reason = ""
Greg Steindd6eefb2000-07-18 09:09:48 +0000130 if version[:5] != 'HTTP/':
131 self.close()
132 raise BadStatusLine(line)
Greg Stein5e0fa402000-06-26 08:28:01 +0000133
Jeremy Hylton23d40472001-04-13 14:57:08 +0000134 # The status code is a three-digit number
135 try:
136 self.status = status = int(status)
137 if status < 100 or status > 999:
138 raise BadStatusLine(line)
139 except ValueError:
140 raise BadStatusLine(line)
Guido van Rossum34735a62000-12-15 15:09:42 +0000141 self.reason = reason.strip()
Greg Stein5e0fa402000-06-26 08:28:01 +0000142
Greg Steindd6eefb2000-07-18 09:09:48 +0000143 if version == 'HTTP/1.0':
144 self.version = 10
Jeremy Hylton110941a2000-10-12 19:58:36 +0000145 elif version.startswith('HTTP/1.'):
Tim Peters07e99cb2001-01-14 23:47:14 +0000146 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hylton110941a2000-10-12 19:58:36 +0000147 elif version == 'HTTP/0.9':
148 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000149 else:
150 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000151
Jeremy Hylton110941a2000-10-12 19:58:36 +0000152 if self.version == 9:
153 self.msg = mimetools.Message(StringIO())
154 return
155
Greg Steindd6eefb2000-07-18 09:09:48 +0000156 self.msg = mimetools.Message(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000157 if self.debuglevel > 0:
158 for hdr in self.msg.headers:
159 print "header:", hdr,
Greg Stein5e0fa402000-06-26 08:28:01 +0000160
Greg Steindd6eefb2000-07-18 09:09:48 +0000161 # don't let the msg keep an fp
162 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000163
Greg Steindd6eefb2000-07-18 09:09:48 +0000164 # are we using the chunked-style of transfer encoding?
165 tr_enc = self.msg.getheader('transfer-encoding')
166 if tr_enc:
Guido van Rossum34735a62000-12-15 15:09:42 +0000167 if tr_enc.lower() != 'chunked':
Greg Steindd6eefb2000-07-18 09:09:48 +0000168 raise UnknownTransferEncoding()
169 self.chunked = 1
170 self.chunk_left = None
171 else:
172 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000173
Greg Steindd6eefb2000-07-18 09:09:48 +0000174 # will the connection close at the end of the response?
175 conn = self.msg.getheader('connection')
176 if conn:
Guido van Rossum34735a62000-12-15 15:09:42 +0000177 conn = conn.lower()
Greg Steindd6eefb2000-07-18 09:09:48 +0000178 # a "Connection: close" will always close the connection. if we
179 # don't see that and this is not HTTP/1.1, then the connection will
180 # close unless we see a Keep-Alive header.
Guido van Rossum34735a62000-12-15 15:09:42 +0000181 self.will_close = conn.find('close') != -1 or \
Greg Steindd6eefb2000-07-18 09:09:48 +0000182 ( self.version != 11 and \
183 not self.msg.getheader('keep-alive') )
184 else:
185 # for HTTP/1.1, the connection will always remain open
186 # otherwise, it will remain open IFF we see a Keep-Alive header
187 self.will_close = self.version != 11 and \
188 not self.msg.getheader('keep-alive')
Greg Stein5e0fa402000-06-26 08:28:01 +0000189
Greg Steindd6eefb2000-07-18 09:09:48 +0000190 # do we have a Content-Length?
191 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
192 length = self.msg.getheader('content-length')
193 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000194 try:
195 self.length = int(length)
196 except ValueError:
197 self.length = None
Greg Steindd6eefb2000-07-18 09:09:48 +0000198 else:
199 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000200
Greg Steindd6eefb2000-07-18 09:09:48 +0000201 # does the body have a fixed length? (of zero)
Tim Peters07e99cb2001-01-14 23:47:14 +0000202 if (status == 204 or # No Content
203 status == 304 or # Not Modified
204 100 <= status < 200): # 1xx codes
Greg Steindd6eefb2000-07-18 09:09:48 +0000205 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000206
Greg Steindd6eefb2000-07-18 09:09:48 +0000207 # if the connection remains open, and we aren't using chunked, and
208 # a content-length was not provided, then assume that the connection
209 # WILL close.
210 if not self.will_close and \
211 not self.chunked and \
212 self.length is None:
213 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000214
Greg Steindd6eefb2000-07-18 09:09:48 +0000215 def close(self):
216 if self.fp:
217 self.fp.close()
218 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000219
Greg Steindd6eefb2000-07-18 09:09:48 +0000220 def isclosed(self):
221 # NOTE: it is possible that we will not ever call self.close(). This
222 # case occurs when will_close is TRUE, length is None, and we
223 # read up to the last byte, but NOT past it.
224 #
225 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
226 # called, meaning self.isclosed() is meaningful.
227 return self.fp is None
228
229 def read(self, amt=None):
230 if self.fp is None:
231 return ''
232
233 if self.chunked:
234 chunk_left = self.chunk_left
235 value = ''
236 while 1:
237 if chunk_left is None:
238 line = self.fp.readline()
Guido van Rossum34735a62000-12-15 15:09:42 +0000239 i = line.find(';')
Greg Steindd6eefb2000-07-18 09:09:48 +0000240 if i >= 0:
Tim Peters07e99cb2001-01-14 23:47:14 +0000241 line = line[:i] # strip chunk-extensions
Guido van Rossum34735a62000-12-15 15:09:42 +0000242 chunk_left = int(line, 16)
Greg Steindd6eefb2000-07-18 09:09:48 +0000243 if chunk_left == 0:
244 break
245 if amt is None:
246 value = value + self._safe_read(chunk_left)
247 elif amt < chunk_left:
248 value = value + self._safe_read(amt)
249 self.chunk_left = chunk_left - amt
250 return value
251 elif amt == chunk_left:
252 value = value + self._safe_read(amt)
Tim Peters07e99cb2001-01-14 23:47:14 +0000253 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000254 self.chunk_left = None
255 return value
256 else:
257 value = value + self._safe_read(chunk_left)
258 amt = amt - chunk_left
259
260 # we read the whole chunk, get another
Tim Peters07e99cb2001-01-14 23:47:14 +0000261 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000262 chunk_left = None
263
264 # read and discard trailer up to the CRLF terminator
265 ### note: we shouldn't have any trailers!
266 while 1:
267 line = self.fp.readline()
268 if line == '\r\n':
269 break
270
271 # we read everything; close the "file"
272 self.close()
273
274 return value
275
276 elif amt is None:
277 # unbounded read
278 if self.will_close:
279 s = self.fp.read()
280 else:
281 s = self._safe_read(self.length)
Tim Peters07e99cb2001-01-14 23:47:14 +0000282 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000283 return s
284
285 if self.length is not None:
286 if amt > self.length:
287 # clip the read to the "end of response"
288 amt = self.length
289 self.length = self.length - amt
290
291 # we do not use _safe_read() here because this may be a .will_close
292 # connection, and the user is reading more bytes than will be provided
293 # (for example, reading in 1k chunks)
294 s = self.fp.read(amt)
295
Greg Steindd6eefb2000-07-18 09:09:48 +0000296 return s
297
298 def _safe_read(self, amt):
299 """Read the number of bytes requested, compensating for partial reads.
300
301 Normally, we have a blocking socket, but a read() can be interrupted
302 by a signal (resulting in a partial read).
303
304 Note that we cannot distinguish between EOF and an interrupt when zero
305 bytes have been read. IncompleteRead() will be raised in this
306 situation.
307
308 This function should be used when <amt> bytes "should" be present for
309 reading. If the bytes are truly not available (due to EOF), then the
310 IncompleteRead exception can be used to detect the problem.
311 """
312 s = ''
313 while amt > 0:
314 chunk = self.fp.read(amt)
315 if not chunk:
316 raise IncompleteRead(s)
317 s = s + chunk
318 amt = amt - len(chunk)
319 return s
320
321 def getheader(self, name, default=None):
322 if self.msg is None:
323 raise ResponseNotReady()
324 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000325
326
327class HTTPConnection:
328
Greg Steindd6eefb2000-07-18 09:09:48 +0000329 _http_vsn = 11
330 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000331
Greg Steindd6eefb2000-07-18 09:09:48 +0000332 response_class = HTTPResponse
333 default_port = HTTP_PORT
334 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000335 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000336
Greg Steindd6eefb2000-07-18 09:09:48 +0000337 def __init__(self, host, port=None):
338 self.sock = None
339 self.__response = None
340 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000341
Greg Steindd6eefb2000-07-18 09:09:48 +0000342 self._set_hostport(host, port)
Greg Stein5e0fa402000-06-26 08:28:01 +0000343
Greg Steindd6eefb2000-07-18 09:09:48 +0000344 def _set_hostport(self, host, port):
345 if port is None:
Guido van Rossum34735a62000-12-15 15:09:42 +0000346 i = host.find(':')
Greg Steindd6eefb2000-07-18 09:09:48 +0000347 if i >= 0:
Guido van Rossumfd97a912001-01-15 14:34:20 +0000348 port = int(host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000349 host = host[:i]
350 else:
351 port = self.default_port
352 self.host = host
353 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000354
Jeremy Hylton30f86742000-09-18 22:50:38 +0000355 def set_debuglevel(self, level):
356 self.debuglevel = level
357
Greg Steindd6eefb2000-07-18 09:09:48 +0000358 def connect(self):
359 """Connect to the host and port specified in __init__."""
Martin v. Löwis2ad25692001-07-31 08:40:21 +0000360 msg = "getaddrinfo returns an empty list"
Martin v. Löwis4eb59402001-07-26 13:37:33 +0000361 for res in socket.getaddrinfo(self.host, self.port, 0, socket.SOCK_STREAM):
362 af, socktype, proto, canonname, sa = res
363 try:
364 self.sock = socket.socket(af, socktype, proto)
365 if self.debuglevel > 0:
366 print "connect: (%s, %s)" % (self.host, self.port)
367 self.sock.connect(sa)
368 except socket.error, msg:
369 if self.debuglevel > 0:
370 print 'connect fail:', (self.host, self.port)
Martin v. Löwis322c0d12001-10-07 08:53:32 +0000371 if self.sock:
372 self.sock.close()
Martin v. Löwis4eb59402001-07-26 13:37:33 +0000373 self.sock = None
374 continue
375 break
376 if not self.sock:
377 raise socket.error, msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000378
Greg Steindd6eefb2000-07-18 09:09:48 +0000379 def close(self):
380 """Close the connection to the HTTP server."""
381 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000382 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000383 self.sock = None
384 if self.__response:
385 self.__response.close()
386 self.__response = None
387 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000388
Greg Steindd6eefb2000-07-18 09:09:48 +0000389 def send(self, str):
390 """Send `str' to the server."""
391 if self.sock is None:
392 if self.auto_open:
393 self.connect()
394 else:
395 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000396
Greg Steindd6eefb2000-07-18 09:09:48 +0000397 # send the data to the server. if we get a broken pipe, then close
398 # the socket. we want to reconnect when somebody tries to send again.
399 #
400 # NOTE: we DO propagate the error, though, because we cannot simply
401 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000402 if self.debuglevel > 0:
403 print "send:", repr(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000404 try:
405 self.sock.send(str)
406 except socket.error, v:
Tim Peters07e99cb2001-01-14 23:47:14 +0000407 if v[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000408 self.close()
409 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000410
Greg Steindd6eefb2000-07-18 09:09:48 +0000411 def putrequest(self, method, url):
412 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000413
Greg Steindd6eefb2000-07-18 09:09:48 +0000414 `method' specifies an HTTP request method, e.g. 'GET'.
415 `url' specifies the object being requested, e.g. '/index.html'.
416 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000417
Greg Steindd6eefb2000-07-18 09:09:48 +0000418 # check if a prior response has been completed
419 if self.__response and self.__response.isclosed():
420 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000421
Greg Steindd6eefb2000-07-18 09:09:48 +0000422 #
423 # in certain cases, we cannot issue another request on this connection.
424 # this occurs when:
425 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
426 # 2) a response to a previous request has signalled that it is going
427 # to close the connection upon completion.
428 # 3) the headers for the previous response have not been read, thus
429 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
430 #
431 # if there is no prior response, then we can request at will.
432 #
433 # if point (2) is true, then we will have passed the socket to the
434 # response (effectively meaning, "there is no prior response"), and
435 # will open a new one when a new request is made.
436 #
437 # Note: if a prior response exists, then we *can* start a new request.
438 # We are not allowed to begin fetching the response to this new
439 # request, however, until that prior response is complete.
440 #
441 if self.__state == _CS_IDLE:
442 self.__state = _CS_REQ_STARTED
443 else:
444 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000445
Greg Steindd6eefb2000-07-18 09:09:48 +0000446 if not url:
447 url = '/'
448 str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000449
Greg Steindd6eefb2000-07-18 09:09:48 +0000450 try:
451 self.send(str)
452 except socket.error, v:
453 # trap 'Broken pipe' if we're allowed to automatically reconnect
454 if v[0] != 32 or not self.auto_open:
455 raise
456 # try one more time (the socket was closed; this will reopen)
457 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000458
Greg Steindd6eefb2000-07-18 09:09:48 +0000459 if self._http_vsn == 11:
460 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000461
Greg Steindd6eefb2000-07-18 09:09:48 +0000462 # this header is issued *only* for HTTP/1.1 connections. more
463 # specifically, this means it is only issued when the client uses
464 # the new HTTPConnection() class. backwards-compat clients will
465 # be using HTTP/1.0 and those clients may be issuing this header
466 # themselves. we should NOT issue it twice; some web servers (such
467 # as Apache) barf when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000468
469 # if we need a non-standard port,include it in the header
470 if self.port == HTTP_PORT:
471 self.putheader('Host', self.host)
472 else:
473 self.putheader('Host', "%s:%s" % (self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000474
Greg Steindd6eefb2000-07-18 09:09:48 +0000475 # note: we are assuming that clients will not attempt to set these
476 # headers since *this* library must deal with the
477 # consequences. this also means that when the supporting
478 # libraries are updated to recognize other forms, then this
479 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000480
Greg Steindd6eefb2000-07-18 09:09:48 +0000481 # we only want a Content-Encoding of "identity" since we don't
482 # support encodings such as x-gzip or x-deflate.
483 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000484
Greg Steindd6eefb2000-07-18 09:09:48 +0000485 # we can accept "chunked" Transfer-Encodings, but no others
486 # NOTE: no TE header implies *only* "chunked"
487 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000488
Greg Steindd6eefb2000-07-18 09:09:48 +0000489 # if TE is supplied in the header, then it must appear in a
490 # Connection header.
491 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000492
Greg Steindd6eefb2000-07-18 09:09:48 +0000493 else:
494 # For HTTP/1.0, the server will assume "not chunked"
495 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000496
Greg Steindd6eefb2000-07-18 09:09:48 +0000497 def putheader(self, header, value):
498 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000499
Greg Steindd6eefb2000-07-18 09:09:48 +0000500 For example: h.putheader('Accept', 'text/html')
501 """
502 if self.__state != _CS_REQ_STARTED:
503 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000504
Greg Steindd6eefb2000-07-18 09:09:48 +0000505 str = '%s: %s\r\n' % (header, value)
506 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000507
Greg Steindd6eefb2000-07-18 09:09:48 +0000508 def endheaders(self):
509 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000510
Greg Steindd6eefb2000-07-18 09:09:48 +0000511 if self.__state == _CS_REQ_STARTED:
512 self.__state = _CS_REQ_SENT
513 else:
514 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000515
Greg Steindd6eefb2000-07-18 09:09:48 +0000516 self.send('\r\n')
Greg Stein5e0fa402000-06-26 08:28:01 +0000517
Greg Steindd6eefb2000-07-18 09:09:48 +0000518 def request(self, method, url, body=None, headers={}):
519 """Send a complete request to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000520
Greg Steindd6eefb2000-07-18 09:09:48 +0000521 try:
522 self._send_request(method, url, body, headers)
523 except socket.error, v:
524 # trap 'Broken pipe' if we're allowed to automatically reconnect
525 if v[0] != 32 or not self.auto_open:
526 raise
527 # try one more time
528 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000529
Greg Steindd6eefb2000-07-18 09:09:48 +0000530 def _send_request(self, method, url, body, headers):
531 self.putrequest(method, url)
Greg Stein5e0fa402000-06-26 08:28:01 +0000532
Greg Steindd6eefb2000-07-18 09:09:48 +0000533 if body:
534 self.putheader('Content-Length', str(len(body)))
535 for hdr, value in headers.items():
536 self.putheader(hdr, value)
537 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000538
Greg Steindd6eefb2000-07-18 09:09:48 +0000539 if body:
540 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000541
Greg Steindd6eefb2000-07-18 09:09:48 +0000542 def getresponse(self):
543 "Get the response from the server."
Greg Stein5e0fa402000-06-26 08:28:01 +0000544
Greg Steindd6eefb2000-07-18 09:09:48 +0000545 # check if a prior response has been completed
546 if self.__response and self.__response.isclosed():
547 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000548
Greg Steindd6eefb2000-07-18 09:09:48 +0000549 #
550 # if a prior response exists, then it must be completed (otherwise, we
551 # cannot read this response's header to determine the connection-close
552 # behavior)
553 #
554 # note: if a prior response existed, but was connection-close, then the
555 # socket and response were made independent of this HTTPConnection
556 # object since a new request requires that we open a whole new
557 # connection
558 #
559 # this means the prior response had one of two states:
560 # 1) will_close: this connection was reset and the prior socket and
561 # response operate independently
562 # 2) persistent: the response was retained and we await its
563 # isclosed() status to become true.
564 #
565 if self.__state != _CS_REQ_SENT or self.__response:
566 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000567
Jeremy Hylton30f86742000-09-18 22:50:38 +0000568 if self.debuglevel > 0:
569 response = self.response_class(self.sock, self.debuglevel)
570 else:
571 response = self.response_class(self.sock)
Greg Stein5e0fa402000-06-26 08:28:01 +0000572
Greg Steindd6eefb2000-07-18 09:09:48 +0000573 response.begin()
574 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000575
Greg Steindd6eefb2000-07-18 09:09:48 +0000576 if response.will_close:
577 # this effectively passes the connection to the response
578 self.close()
579 else:
580 # remember this, so we can tell when it is complete
581 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000582
Greg Steindd6eefb2000-07-18 09:09:48 +0000583 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000584
585
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000586class FakeSocket:
Greg Steindd6eefb2000-07-18 09:09:48 +0000587 def __init__(self, sock, ssl):
588 self.__sock = sock
589 self.__ssl = ssl
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000590
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000591 def makefile(self, mode, bufsize=None):
592 """Return a readable file-like object with data from socket.
593
594 This method offers only partial support for the makefile
595 interface of a real socket. It only supports modes 'r' and
596 'rb' and the bufsize argument is ignored.
597
Tim Peters5ceadc82001-01-13 19:16:21 +0000598 The returned object contains *all* of the file data
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000599 """
Greg Steindd6eefb2000-07-18 09:09:48 +0000600 if mode != 'r' and mode != 'rb':
601 raise UnimplementedFileMode()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000602
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000603 msgbuf = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000604 while 1:
605 try:
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000606 buf = self.__ssl.read()
Greg Steindd6eefb2000-07-18 09:09:48 +0000607 except socket.sslerror, msg:
608 break
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000609 if buf == '':
610 break
611 msgbuf.append(buf)
612 return StringIO("".join(msgbuf))
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000613
Greg Steindd6eefb2000-07-18 09:09:48 +0000614 def send(self, stuff, flags = 0):
615 return self.__ssl.write(stuff)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000616
Greg Steindd6eefb2000-07-18 09:09:48 +0000617 def recv(self, len = 1024, flags = 0):
618 return self.__ssl.read(len)
Guido van Rossum23acc951994-02-21 16:36:04 +0000619
Greg Steindd6eefb2000-07-18 09:09:48 +0000620 def __getattr__(self, attr):
621 return getattr(self.__sock, attr)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000622
Guido van Rossum23acc951994-02-21 16:36:04 +0000623
Greg Stein5e0fa402000-06-26 08:28:01 +0000624class HTTPSConnection(HTTPConnection):
Greg Steindd6eefb2000-07-18 09:09:48 +0000625 "This class allows communication via SSL."
Greg Stein5e0fa402000-06-26 08:28:01 +0000626
Greg Steindd6eefb2000-07-18 09:09:48 +0000627 default_port = HTTPS_PORT
Greg Stein5e0fa402000-06-26 08:28:01 +0000628
Greg Steindd6eefb2000-07-18 09:09:48 +0000629 def __init__(self, host, port=None, **x509):
630 keys = x509.keys()
631 try:
632 keys.remove('key_file')
633 except ValueError:
634 pass
635 try:
636 keys.remove('cert_file')
637 except ValueError:
638 pass
639 if keys:
640 raise IllegalKeywordArgument()
641 HTTPConnection.__init__(self, host, port)
642 self.key_file = x509.get('key_file')
643 self.cert_file = x509.get('cert_file')
Greg Stein5e0fa402000-06-26 08:28:01 +0000644
Greg Steindd6eefb2000-07-18 09:09:48 +0000645 def connect(self):
646 "Connect to a host on a given (SSL) port."
Greg Stein5e0fa402000-06-26 08:28:01 +0000647
Greg Steindd6eefb2000-07-18 09:09:48 +0000648 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
649 sock.connect((self.host, self.port))
Guido van Rossum0aee7222000-12-11 20:32:20 +0000650 realsock = sock
651 if hasattr(sock, "_sock"):
652 realsock = sock._sock
653 ssl = socket.ssl(realsock, self.key_file, self.cert_file)
Greg Steindd6eefb2000-07-18 09:09:48 +0000654 self.sock = FakeSocket(sock, ssl)
Greg Stein5e0fa402000-06-26 08:28:01 +0000655
656
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000657class HTTP:
Greg Steindd6eefb2000-07-18 09:09:48 +0000658 "Compatibility class with httplib.py from 1.5."
Greg Stein5e0fa402000-06-26 08:28:01 +0000659
Greg Steindd6eefb2000-07-18 09:09:48 +0000660 _http_vsn = 10
661 _http_vsn_str = 'HTTP/1.0'
Greg Stein5e0fa402000-06-26 08:28:01 +0000662
Greg Steindd6eefb2000-07-18 09:09:48 +0000663 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000664
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000665 _connection_class = HTTPConnection
666
Greg Stein81937a42001-08-18 09:20:23 +0000667 def __init__(self, host='', port=None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000668 "Provide a default host, since the superclass requires one."
Greg Stein5e0fa402000-06-26 08:28:01 +0000669
Greg Steindd6eefb2000-07-18 09:09:48 +0000670 # some joker passed 0 explicitly, meaning default port
671 if port == 0:
672 port = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000673
Greg Steindd6eefb2000-07-18 09:09:48 +0000674 # Note that we may pass an empty string as the host; this will throw
675 # an error when we attempt to connect. Presumably, the client code
676 # will call connect before then, with a proper host.
Greg Stein81937a42001-08-18 09:20:23 +0000677 self._setup(self._connection_class(host, port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000678
Greg Stein81937a42001-08-18 09:20:23 +0000679 def _setup(self, conn):
680 self._conn = conn
681
682 # set up delegation to flesh out interface
683 self.send = conn.send
684 self.putrequest = conn.putrequest
685 self.endheaders = conn.endheaders
686 self.set_debuglevel = conn.set_debuglevel
687
688 conn._http_vsn = self._http_vsn
689 conn._http_vsn_str = self._http_vsn_str
Greg Stein5e0fa402000-06-26 08:28:01 +0000690
Greg Steindd6eefb2000-07-18 09:09:48 +0000691 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000692
Greg Steindd6eefb2000-07-18 09:09:48 +0000693 def connect(self, host=None, port=None):
694 "Accept arguments to set the host/port, since the superclass doesn't."
Greg Stein5e0fa402000-06-26 08:28:01 +0000695
Greg Steindd6eefb2000-07-18 09:09:48 +0000696 if host is not None:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000697 self._conn._set_hostport(host, port)
698 self._conn.connect()
Greg Stein5e0fa402000-06-26 08:28:01 +0000699
Greg Steindd6eefb2000-07-18 09:09:48 +0000700 def getfile(self):
701 "Provide a getfile, since the superclass' does not use this concept."
702 return self.file
Greg Stein5e0fa402000-06-26 08:28:01 +0000703
Greg Steindd6eefb2000-07-18 09:09:48 +0000704 def putheader(self, header, *values):
705 "The superclass allows only one value argument."
Guido van Rossum34735a62000-12-15 15:09:42 +0000706 self._conn.putheader(header, '\r\n\t'.join(values))
Greg Stein5e0fa402000-06-26 08:28:01 +0000707
Greg Steindd6eefb2000-07-18 09:09:48 +0000708 def getreply(self):
709 """Compat definition since superclass does not define it.
Greg Stein5e0fa402000-06-26 08:28:01 +0000710
Greg Steindd6eefb2000-07-18 09:09:48 +0000711 Returns a tuple consisting of:
712 - server status code (e.g. '200' if all goes well)
713 - server "reason" corresponding to status code
714 - any RFC822 headers in the response from the server
715 """
716 try:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000717 response = self._conn.getresponse()
Greg Steindd6eefb2000-07-18 09:09:48 +0000718 except BadStatusLine, e:
719 ### hmm. if getresponse() ever closes the socket on a bad request,
720 ### then we are going to have problems with self.sock
Greg Stein5e0fa402000-06-26 08:28:01 +0000721
Greg Steindd6eefb2000-07-18 09:09:48 +0000722 ### should we keep this behavior? do people use it?
723 # keep the socket open (as a file), and return it
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000724 self.file = self._conn.sock.makefile('rb', 0)
Greg Stein5e0fa402000-06-26 08:28:01 +0000725
Greg Steindd6eefb2000-07-18 09:09:48 +0000726 # close our socket -- we want to restart after any protocol error
727 self.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000728
Greg Steindd6eefb2000-07-18 09:09:48 +0000729 self.headers = None
730 return -1, e.line, None
Greg Stein5e0fa402000-06-26 08:28:01 +0000731
Greg Steindd6eefb2000-07-18 09:09:48 +0000732 self.headers = response.msg
733 self.file = response.fp
734 return response.status, response.reason, response.msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000735
Greg Steindd6eefb2000-07-18 09:09:48 +0000736 def close(self):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000737 self._conn.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000738
Greg Steindd6eefb2000-07-18 09:09:48 +0000739 # note that self.file == response.fp, which gets closed by the
740 # superclass. just clear the object ref here.
741 ### hmm. messy. if status==-1, then self.file is owned by us.
742 ### well... we aren't explicitly closing, but losing this ref will
743 ### do it
744 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000745
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000746if hasattr(socket, 'ssl'):
747 class HTTPS(HTTP):
748 """Compatibility with 1.5 httplib interface
749
750 Python 1.5.2 did not have an HTTPS class, but it defined an
751 interface for sending http requests that is also useful for
Tim Peters5ceadc82001-01-13 19:16:21 +0000752 https.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000753 """
754
Martin v. Löwisd7bf9742000-09-21 22:09:47 +0000755 _connection_class = HTTPSConnection
Tim Peters5ceadc82001-01-13 19:16:21 +0000756
Greg Stein81937a42001-08-18 09:20:23 +0000757 def __init__(self, host='', port=None, **x509):
758 # provide a default host, pass the X509 cert info
759
760 # urf. compensate for bad input.
761 if port == 0:
762 port = None
763 self._setup(self._connection_class(host, port, **x509))
764
765 # we never actually use these for anything, but we keep them
766 # here for compatibility with post-1.5.2 CVS.
767 self.key_file = x509.get('key_file')
768 self.cert_file = x509.get('cert_file')
769
Greg Stein5e0fa402000-06-26 08:28:01 +0000770
771class HTTPException(Exception):
Greg Steindd6eefb2000-07-18 09:09:48 +0000772 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000773
774class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000775 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000776
777class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000778 def __init__(self, version):
779 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +0000780
781class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000782 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000783
784class IllegalKeywordArgument(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000785 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000786
787class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000788 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000789
790class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000791 def __init__(self, partial):
792 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +0000793
794class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000795 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000796
797class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000798 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000799
800class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000801 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000802
803class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000804 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000805
806class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000807 def __init__(self, line):
808 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +0000809
810# for backwards compatibility
811error = HTTPException
812
813
814#
815# snarfed from httplib.py for now...
816#
Guido van Rossum23acc951994-02-21 16:36:04 +0000817def test():
Guido van Rossum41999c11997-12-09 00:12:23 +0000818 """Test this module.
819
820 The test consists of retrieving and displaying the Python
821 home page, along with the error code and error string returned
822 by the www.python.org server.
Guido van Rossum41999c11997-12-09 00:12:23 +0000823 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000824
Guido van Rossum41999c11997-12-09 00:12:23 +0000825 import sys
826 import getopt
827 opts, args = getopt.getopt(sys.argv[1:], 'd')
828 dl = 0
829 for o, a in opts:
830 if o == '-d': dl = dl + 1
831 host = 'www.python.org'
832 selector = '/'
833 if args[0:]: host = args[0]
834 if args[1:]: selector = args[1]
835 h = HTTP()
836 h.set_debuglevel(dl)
837 h.connect(host)
838 h.putrequest('GET', selector)
839 h.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000840 status, reason, headers = h.getreply()
841 print 'status =', status
842 print 'reason =', reason
Guido van Rossum41999c11997-12-09 00:12:23 +0000843 print
844 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +0000845 for header in headers.headers: print header.strip()
Guido van Rossum41999c11997-12-09 00:12:23 +0000846 print
847 print h.getfile().read()
Greg Stein5e0fa402000-06-26 08:28:01 +0000848
849 if hasattr(socket, 'ssl'):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000850 host = 'sourceforge.net'
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000851 selector = '/projects/python'
Greg Steindd6eefb2000-07-18 09:09:48 +0000852 hs = HTTPS()
853 hs.connect(host)
854 hs.putrequest('GET', selector)
855 hs.endheaders()
856 status, reason, headers = hs.getreply()
857 print 'status =', status
858 print 'reason =', reason
859 print
860 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +0000861 for header in headers.headers: print header.strip()
Greg Steindd6eefb2000-07-18 09:09:48 +0000862 print
863 print hs.getfile().read()
Guido van Rossum23acc951994-02-21 16:36:04 +0000864
Guido van Rossuma0dfc7a1995-09-07 19:28:19 +0000865
Guido van Rossum23acc951994-02-21 16:36:04 +0000866if __name__ == '__main__':
Guido van Rossum41999c11997-12-09 00:12:23 +0000867 test()