blob: fb87099ee994066b4c9cad7ec03ff9deb7ad7b69 [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Greg Stein5e0fa402000-06-26 08:28:01 +00006HTTPConnection go through a number of "states", which defines when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Guido van Rossum23acc951994-02-21 16:36:04 +000069import socket
Guido van Rossum65ab98c1995-08-07 20:13:02 +000070import mimetools
Guido van Rossum23acc951994-02-21 16:36:04 +000071
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000072try:
Greg Steindd6eefb2000-07-18 09:09:48 +000073 from cStringIO import StringIO
Greg Stein5e0fa402000-06-26 08:28:01 +000074except ImportError:
Greg Steindd6eefb2000-07-18 09:09:48 +000075 from StringIO import StringIO
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000076
Skip Montanaro2dd42762001-01-23 15:35:05 +000077__all__ = ["HTTP"]
78
Guido van Rossum23acc951994-02-21 16:36:04 +000079HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000080HTTPS_PORT = 443
81
Greg Stein5e0fa402000-06-26 08:28:01 +000082_UNKNOWN = 'UNKNOWN'
83
84# connection states
85_CS_IDLE = 'Idle'
86_CS_REQ_STARTED = 'Request-started'
87_CS_REQ_SENT = 'Request-sent'
88
89
90class HTTPResponse:
Jeremy Hylton30f86742000-09-18 22:50:38 +000091 def __init__(self, sock, debuglevel=0):
Greg Steindd6eefb2000-07-18 09:09:48 +000092 self.fp = sock.makefile('rb', 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +000093 self.debuglevel = debuglevel
Greg Stein5e0fa402000-06-26 08:28:01 +000094
Greg Steindd6eefb2000-07-18 09:09:48 +000095 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +000096
Greg Steindd6eefb2000-07-18 09:09:48 +000097 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +000098 self.version = _UNKNOWN # HTTP-Version
99 self.status = _UNKNOWN # Status-Code
100 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000101
Tim Peters07e99cb2001-01-14 23:47:14 +0000102 self.chunked = _UNKNOWN # is "chunked" being used?
103 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
104 self.length = _UNKNOWN # number of bytes left in response
105 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000106
Greg Steindd6eefb2000-07-18 09:09:48 +0000107 def begin(self):
108 if self.msg is not None:
109 # we've already started reading the response
110 return
Greg Stein5e0fa402000-06-26 08:28:01 +0000111
Greg Stein5e0fa402000-06-26 08:28:01 +0000112 line = self.fp.readline()
Jeremy Hylton30f86742000-09-18 22:50:38 +0000113 if self.debuglevel > 0:
114 print "reply:", repr(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000115 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000116 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000117 except ValueError:
118 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000119 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000120 reason = ""
121 except ValueError:
Jeremy Hylton110941a2000-10-12 19:58:36 +0000122 version = "HTTP/0.9"
123 status = "200"
124 reason = ""
Greg Steindd6eefb2000-07-18 09:09:48 +0000125 if version[:5] != 'HTTP/':
126 self.close()
127 raise BadStatusLine(line)
Greg Stein5e0fa402000-06-26 08:28:01 +0000128
Jeremy Hylton23d40472001-04-13 14:57:08 +0000129 # The status code is a three-digit number
130 try:
131 self.status = status = int(status)
132 if status < 100 or status > 999:
133 raise BadStatusLine(line)
134 except ValueError:
135 raise BadStatusLine(line)
Guido van Rossum34735a62000-12-15 15:09:42 +0000136 self.reason = reason.strip()
Greg Stein5e0fa402000-06-26 08:28:01 +0000137
Greg Steindd6eefb2000-07-18 09:09:48 +0000138 if version == 'HTTP/1.0':
139 self.version = 10
Jeremy Hylton110941a2000-10-12 19:58:36 +0000140 elif version.startswith('HTTP/1.'):
Tim Peters07e99cb2001-01-14 23:47:14 +0000141 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hylton110941a2000-10-12 19:58:36 +0000142 elif version == 'HTTP/0.9':
143 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000144 else:
145 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000146
Jeremy Hylton110941a2000-10-12 19:58:36 +0000147 if self.version == 9:
148 self.msg = mimetools.Message(StringIO())
149 return
150
Greg Steindd6eefb2000-07-18 09:09:48 +0000151 self.msg = mimetools.Message(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000152 if self.debuglevel > 0:
153 for hdr in self.msg.headers:
154 print "header:", hdr,
Greg Stein5e0fa402000-06-26 08:28:01 +0000155
Greg Steindd6eefb2000-07-18 09:09:48 +0000156 # don't let the msg keep an fp
157 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000158
Greg Steindd6eefb2000-07-18 09:09:48 +0000159 # are we using the chunked-style of transfer encoding?
160 tr_enc = self.msg.getheader('transfer-encoding')
161 if tr_enc:
Guido van Rossum34735a62000-12-15 15:09:42 +0000162 if tr_enc.lower() != 'chunked':
Greg Steindd6eefb2000-07-18 09:09:48 +0000163 raise UnknownTransferEncoding()
164 self.chunked = 1
165 self.chunk_left = None
166 else:
167 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000168
Greg Steindd6eefb2000-07-18 09:09:48 +0000169 # will the connection close at the end of the response?
170 conn = self.msg.getheader('connection')
171 if conn:
Guido van Rossum34735a62000-12-15 15:09:42 +0000172 conn = conn.lower()
Greg Steindd6eefb2000-07-18 09:09:48 +0000173 # a "Connection: close" will always close the connection. if we
174 # don't see that and this is not HTTP/1.1, then the connection will
175 # close unless we see a Keep-Alive header.
Guido van Rossum34735a62000-12-15 15:09:42 +0000176 self.will_close = conn.find('close') != -1 or \
Greg Steindd6eefb2000-07-18 09:09:48 +0000177 ( self.version != 11 and \
178 not self.msg.getheader('keep-alive') )
179 else:
180 # for HTTP/1.1, the connection will always remain open
181 # otherwise, it will remain open IFF we see a Keep-Alive header
182 self.will_close = self.version != 11 and \
183 not self.msg.getheader('keep-alive')
Greg Stein5e0fa402000-06-26 08:28:01 +0000184
Greg Steindd6eefb2000-07-18 09:09:48 +0000185 # do we have a Content-Length?
186 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
187 length = self.msg.getheader('content-length')
188 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000189 try:
190 self.length = int(length)
191 except ValueError:
192 self.length = None
Greg Steindd6eefb2000-07-18 09:09:48 +0000193 else:
194 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000195
Greg Steindd6eefb2000-07-18 09:09:48 +0000196 # does the body have a fixed length? (of zero)
Tim Peters07e99cb2001-01-14 23:47:14 +0000197 if (status == 204 or # No Content
198 status == 304 or # Not Modified
199 100 <= status < 200): # 1xx codes
Greg Steindd6eefb2000-07-18 09:09:48 +0000200 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000201
Greg Steindd6eefb2000-07-18 09:09:48 +0000202 # if the connection remains open, and we aren't using chunked, and
203 # a content-length was not provided, then assume that the connection
204 # WILL close.
205 if not self.will_close and \
206 not self.chunked and \
207 self.length is None:
208 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000209
Greg Steindd6eefb2000-07-18 09:09:48 +0000210 def close(self):
211 if self.fp:
212 self.fp.close()
213 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000214
Greg Steindd6eefb2000-07-18 09:09:48 +0000215 def isclosed(self):
216 # NOTE: it is possible that we will not ever call self.close(). This
217 # case occurs when will_close is TRUE, length is None, and we
218 # read up to the last byte, but NOT past it.
219 #
220 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
221 # called, meaning self.isclosed() is meaningful.
222 return self.fp is None
223
224 def read(self, amt=None):
225 if self.fp is None:
226 return ''
227
228 if self.chunked:
229 chunk_left = self.chunk_left
230 value = ''
231 while 1:
232 if chunk_left is None:
233 line = self.fp.readline()
Guido van Rossum34735a62000-12-15 15:09:42 +0000234 i = line.find(';')
Greg Steindd6eefb2000-07-18 09:09:48 +0000235 if i >= 0:
Tim Peters07e99cb2001-01-14 23:47:14 +0000236 line = line[:i] # strip chunk-extensions
Guido van Rossum34735a62000-12-15 15:09:42 +0000237 chunk_left = int(line, 16)
Greg Steindd6eefb2000-07-18 09:09:48 +0000238 if chunk_left == 0:
239 break
240 if amt is None:
241 value = value + self._safe_read(chunk_left)
242 elif amt < chunk_left:
243 value = value + self._safe_read(amt)
244 self.chunk_left = chunk_left - amt
245 return value
246 elif amt == chunk_left:
247 value = value + self._safe_read(amt)
Tim Peters07e99cb2001-01-14 23:47:14 +0000248 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000249 self.chunk_left = None
250 return value
251 else:
252 value = value + self._safe_read(chunk_left)
253 amt = amt - chunk_left
254
255 # we read the whole chunk, get another
Tim Peters07e99cb2001-01-14 23:47:14 +0000256 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000257 chunk_left = None
258
259 # read and discard trailer up to the CRLF terminator
260 ### note: we shouldn't have any trailers!
261 while 1:
262 line = self.fp.readline()
263 if line == '\r\n':
264 break
265
266 # we read everything; close the "file"
267 self.close()
268
269 return value
270
271 elif amt is None:
272 # unbounded read
273 if self.will_close:
274 s = self.fp.read()
275 else:
276 s = self._safe_read(self.length)
Tim Peters07e99cb2001-01-14 23:47:14 +0000277 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000278 return s
279
280 if self.length is not None:
281 if amt > self.length:
282 # clip the read to the "end of response"
283 amt = self.length
284 self.length = self.length - amt
285
286 # we do not use _safe_read() here because this may be a .will_close
287 # connection, and the user is reading more bytes than will be provided
288 # (for example, reading in 1k chunks)
289 s = self.fp.read(amt)
290
Greg Steindd6eefb2000-07-18 09:09:48 +0000291 return s
292
293 def _safe_read(self, amt):
294 """Read the number of bytes requested, compensating for partial reads.
295
296 Normally, we have a blocking socket, but a read() can be interrupted
297 by a signal (resulting in a partial read).
298
299 Note that we cannot distinguish between EOF and an interrupt when zero
300 bytes have been read. IncompleteRead() will be raised in this
301 situation.
302
303 This function should be used when <amt> bytes "should" be present for
304 reading. If the bytes are truly not available (due to EOF), then the
305 IncompleteRead exception can be used to detect the problem.
306 """
307 s = ''
308 while amt > 0:
309 chunk = self.fp.read(amt)
310 if not chunk:
311 raise IncompleteRead(s)
312 s = s + chunk
313 amt = amt - len(chunk)
314 return s
315
316 def getheader(self, name, default=None):
317 if self.msg is None:
318 raise ResponseNotReady()
319 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000320
321
322class HTTPConnection:
323
Greg Steindd6eefb2000-07-18 09:09:48 +0000324 _http_vsn = 11
325 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000326
Greg Steindd6eefb2000-07-18 09:09:48 +0000327 response_class = HTTPResponse
328 default_port = HTTP_PORT
329 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000330 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000331
Greg Steindd6eefb2000-07-18 09:09:48 +0000332 def __init__(self, host, port=None):
333 self.sock = None
334 self.__response = None
335 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000336
Greg Steindd6eefb2000-07-18 09:09:48 +0000337 self._set_hostport(host, port)
Greg Stein5e0fa402000-06-26 08:28:01 +0000338
Greg Steindd6eefb2000-07-18 09:09:48 +0000339 def _set_hostport(self, host, port):
340 if port is None:
Guido van Rossum34735a62000-12-15 15:09:42 +0000341 i = host.find(':')
Greg Steindd6eefb2000-07-18 09:09:48 +0000342 if i >= 0:
Guido van Rossumfd97a912001-01-15 14:34:20 +0000343 port = int(host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000344 host = host[:i]
345 else:
346 port = self.default_port
347 self.host = host
348 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000349
Jeremy Hylton30f86742000-09-18 22:50:38 +0000350 def set_debuglevel(self, level):
351 self.debuglevel = level
352
Greg Steindd6eefb2000-07-18 09:09:48 +0000353 def connect(self):
354 """Connect to the host and port specified in __init__."""
355 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000356 if self.debuglevel > 0:
357 print "connect: (%s, %s)" % (self.host, self.port)
Greg Steindd6eefb2000-07-18 09:09:48 +0000358 self.sock.connect((self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000359
Greg Steindd6eefb2000-07-18 09:09:48 +0000360 def close(self):
361 """Close the connection to the HTTP server."""
362 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000363 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000364 self.sock = None
365 if self.__response:
366 self.__response.close()
367 self.__response = None
368 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000369
Greg Steindd6eefb2000-07-18 09:09:48 +0000370 def send(self, str):
371 """Send `str' to the server."""
372 if self.sock is None:
373 if self.auto_open:
374 self.connect()
375 else:
376 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000377
Greg Steindd6eefb2000-07-18 09:09:48 +0000378 # send the data to the server. if we get a broken pipe, then close
379 # the socket. we want to reconnect when somebody tries to send again.
380 #
381 # NOTE: we DO propagate the error, though, because we cannot simply
382 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000383 if self.debuglevel > 0:
384 print "send:", repr(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000385 try:
386 self.sock.send(str)
387 except socket.error, v:
Tim Peters07e99cb2001-01-14 23:47:14 +0000388 if v[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000389 self.close()
390 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000391
Greg Steindd6eefb2000-07-18 09:09:48 +0000392 def putrequest(self, method, url):
393 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000394
Greg Steindd6eefb2000-07-18 09:09:48 +0000395 `method' specifies an HTTP request method, e.g. 'GET'.
396 `url' specifies the object being requested, e.g. '/index.html'.
397 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000398
Greg Steindd6eefb2000-07-18 09:09:48 +0000399 # check if a prior response has been completed
400 if self.__response and self.__response.isclosed():
401 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000402
Greg Steindd6eefb2000-07-18 09:09:48 +0000403 #
404 # in certain cases, we cannot issue another request on this connection.
405 # this occurs when:
406 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
407 # 2) a response to a previous request has signalled that it is going
408 # to close the connection upon completion.
409 # 3) the headers for the previous response have not been read, thus
410 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
411 #
412 # if there is no prior response, then we can request at will.
413 #
414 # if point (2) is true, then we will have passed the socket to the
415 # response (effectively meaning, "there is no prior response"), and
416 # will open a new one when a new request is made.
417 #
418 # Note: if a prior response exists, then we *can* start a new request.
419 # We are not allowed to begin fetching the response to this new
420 # request, however, until that prior response is complete.
421 #
422 if self.__state == _CS_IDLE:
423 self.__state = _CS_REQ_STARTED
424 else:
425 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000426
Greg Steindd6eefb2000-07-18 09:09:48 +0000427 if not url:
428 url = '/'
429 str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000430
Greg Steindd6eefb2000-07-18 09:09:48 +0000431 try:
432 self.send(str)
433 except socket.error, v:
434 # trap 'Broken pipe' if we're allowed to automatically reconnect
435 if v[0] != 32 or not self.auto_open:
436 raise
437 # try one more time (the socket was closed; this will reopen)
438 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000439
Greg Steindd6eefb2000-07-18 09:09:48 +0000440 if self._http_vsn == 11:
441 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000442
Greg Steindd6eefb2000-07-18 09:09:48 +0000443 # this header is issued *only* for HTTP/1.1 connections. more
444 # specifically, this means it is only issued when the client uses
445 # the new HTTPConnection() class. backwards-compat clients will
446 # be using HTTP/1.0 and those clients may be issuing this header
447 # themselves. we should NOT issue it twice; some web servers (such
448 # as Apache) barf when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000449
450 # if we need a non-standard port,include it in the header
451 if self.port == HTTP_PORT:
452 self.putheader('Host', self.host)
453 else:
454 self.putheader('Host', "%s:%s" % (self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000455
Greg Steindd6eefb2000-07-18 09:09:48 +0000456 # note: we are assuming that clients will not attempt to set these
457 # headers since *this* library must deal with the
458 # consequences. this also means that when the supporting
459 # libraries are updated to recognize other forms, then this
460 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000461
Greg Steindd6eefb2000-07-18 09:09:48 +0000462 # we only want a Content-Encoding of "identity" since we don't
463 # support encodings such as x-gzip or x-deflate.
464 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000465
Greg Steindd6eefb2000-07-18 09:09:48 +0000466 # we can accept "chunked" Transfer-Encodings, but no others
467 # NOTE: no TE header implies *only* "chunked"
468 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000469
Greg Steindd6eefb2000-07-18 09:09:48 +0000470 # if TE is supplied in the header, then it must appear in a
471 # Connection header.
472 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000473
Greg Steindd6eefb2000-07-18 09:09:48 +0000474 else:
475 # For HTTP/1.0, the server will assume "not chunked"
476 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000477
Greg Steindd6eefb2000-07-18 09:09:48 +0000478 def putheader(self, header, value):
479 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000480
Greg Steindd6eefb2000-07-18 09:09:48 +0000481 For example: h.putheader('Accept', 'text/html')
482 """
483 if self.__state != _CS_REQ_STARTED:
484 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000485
Greg Steindd6eefb2000-07-18 09:09:48 +0000486 str = '%s: %s\r\n' % (header, value)
487 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000488
Greg Steindd6eefb2000-07-18 09:09:48 +0000489 def endheaders(self):
490 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000491
Greg Steindd6eefb2000-07-18 09:09:48 +0000492 if self.__state == _CS_REQ_STARTED:
493 self.__state = _CS_REQ_SENT
494 else:
495 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000496
Greg Steindd6eefb2000-07-18 09:09:48 +0000497 self.send('\r\n')
Greg Stein5e0fa402000-06-26 08:28:01 +0000498
Greg Steindd6eefb2000-07-18 09:09:48 +0000499 def request(self, method, url, body=None, headers={}):
500 """Send a complete request to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000501
Greg Steindd6eefb2000-07-18 09:09:48 +0000502 try:
503 self._send_request(method, url, body, headers)
504 except socket.error, v:
505 # trap 'Broken pipe' if we're allowed to automatically reconnect
506 if v[0] != 32 or not self.auto_open:
507 raise
508 # try one more time
509 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000510
Greg Steindd6eefb2000-07-18 09:09:48 +0000511 def _send_request(self, method, url, body, headers):
512 self.putrequest(method, url)
Greg Stein5e0fa402000-06-26 08:28:01 +0000513
Greg Steindd6eefb2000-07-18 09:09:48 +0000514 if body:
515 self.putheader('Content-Length', str(len(body)))
516 for hdr, value in headers.items():
517 self.putheader(hdr, value)
518 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000519
Greg Steindd6eefb2000-07-18 09:09:48 +0000520 if body:
521 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000522
Greg Steindd6eefb2000-07-18 09:09:48 +0000523 def getresponse(self):
524 "Get the response from the server."
Greg Stein5e0fa402000-06-26 08:28:01 +0000525
Greg Steindd6eefb2000-07-18 09:09:48 +0000526 # check if a prior response has been completed
527 if self.__response and self.__response.isclosed():
528 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000529
Greg Steindd6eefb2000-07-18 09:09:48 +0000530 #
531 # if a prior response exists, then it must be completed (otherwise, we
532 # cannot read this response's header to determine the connection-close
533 # behavior)
534 #
535 # note: if a prior response existed, but was connection-close, then the
536 # socket and response were made independent of this HTTPConnection
537 # object since a new request requires that we open a whole new
538 # connection
539 #
540 # this means the prior response had one of two states:
541 # 1) will_close: this connection was reset and the prior socket and
542 # response operate independently
543 # 2) persistent: the response was retained and we await its
544 # isclosed() status to become true.
545 #
546 if self.__state != _CS_REQ_SENT or self.__response:
547 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000548
Jeremy Hylton30f86742000-09-18 22:50:38 +0000549 if self.debuglevel > 0:
550 response = self.response_class(self.sock, self.debuglevel)
551 else:
552 response = self.response_class(self.sock)
Greg Stein5e0fa402000-06-26 08:28:01 +0000553
Greg Steindd6eefb2000-07-18 09:09:48 +0000554 response.begin()
555 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000556
Greg Steindd6eefb2000-07-18 09:09:48 +0000557 if response.will_close:
558 # this effectively passes the connection to the response
559 self.close()
560 else:
561 # remember this, so we can tell when it is complete
562 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000563
Greg Steindd6eefb2000-07-18 09:09:48 +0000564 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000565
566
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000567class FakeSocket:
Greg Steindd6eefb2000-07-18 09:09:48 +0000568 def __init__(self, sock, ssl):
569 self.__sock = sock
570 self.__ssl = ssl
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000571
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000572 def makefile(self, mode, bufsize=None):
573 """Return a readable file-like object with data from socket.
574
575 This method offers only partial support for the makefile
576 interface of a real socket. It only supports modes 'r' and
577 'rb' and the bufsize argument is ignored.
578
Tim Peters5ceadc82001-01-13 19:16:21 +0000579 The returned object contains *all* of the file data
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000580 """
Greg Steindd6eefb2000-07-18 09:09:48 +0000581 if mode != 'r' and mode != 'rb':
582 raise UnimplementedFileMode()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000583
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000584 msgbuf = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000585 while 1:
586 try:
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000587 buf = self.__ssl.read()
Greg Steindd6eefb2000-07-18 09:09:48 +0000588 except socket.sslerror, msg:
589 break
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000590 if buf == '':
591 break
592 msgbuf.append(buf)
593 return StringIO("".join(msgbuf))
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000594
Greg Steindd6eefb2000-07-18 09:09:48 +0000595 def send(self, stuff, flags = 0):
596 return self.__ssl.write(stuff)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000597
Greg Steindd6eefb2000-07-18 09:09:48 +0000598 def recv(self, len = 1024, flags = 0):
599 return self.__ssl.read(len)
Guido van Rossum23acc951994-02-21 16:36:04 +0000600
Greg Steindd6eefb2000-07-18 09:09:48 +0000601 def __getattr__(self, attr):
602 return getattr(self.__sock, attr)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000603
Guido van Rossum23acc951994-02-21 16:36:04 +0000604
Greg Stein5e0fa402000-06-26 08:28:01 +0000605class HTTPSConnection(HTTPConnection):
Greg Steindd6eefb2000-07-18 09:09:48 +0000606 "This class allows communication via SSL."
Greg Stein5e0fa402000-06-26 08:28:01 +0000607
Greg Steindd6eefb2000-07-18 09:09:48 +0000608 default_port = HTTPS_PORT
Greg Stein5e0fa402000-06-26 08:28:01 +0000609
Greg Steindd6eefb2000-07-18 09:09:48 +0000610 def __init__(self, host, port=None, **x509):
611 keys = x509.keys()
612 try:
613 keys.remove('key_file')
614 except ValueError:
615 pass
616 try:
617 keys.remove('cert_file')
618 except ValueError:
619 pass
620 if keys:
621 raise IllegalKeywordArgument()
622 HTTPConnection.__init__(self, host, port)
623 self.key_file = x509.get('key_file')
624 self.cert_file = x509.get('cert_file')
Greg Stein5e0fa402000-06-26 08:28:01 +0000625
Greg Steindd6eefb2000-07-18 09:09:48 +0000626 def connect(self):
627 "Connect to a host on a given (SSL) port."
Greg Stein5e0fa402000-06-26 08:28:01 +0000628
Greg Steindd6eefb2000-07-18 09:09:48 +0000629 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
630 sock.connect((self.host, self.port))
Guido van Rossum0aee7222000-12-11 20:32:20 +0000631 realsock = sock
632 if hasattr(sock, "_sock"):
633 realsock = sock._sock
634 ssl = socket.ssl(realsock, self.key_file, self.cert_file)
Greg Steindd6eefb2000-07-18 09:09:48 +0000635 self.sock = FakeSocket(sock, ssl)
Greg Stein5e0fa402000-06-26 08:28:01 +0000636
637
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000638class HTTP:
Greg Steindd6eefb2000-07-18 09:09:48 +0000639 "Compatibility class with httplib.py from 1.5."
Greg Stein5e0fa402000-06-26 08:28:01 +0000640
Greg Steindd6eefb2000-07-18 09:09:48 +0000641 _http_vsn = 10
642 _http_vsn_str = 'HTTP/1.0'
Greg Stein5e0fa402000-06-26 08:28:01 +0000643
Greg Steindd6eefb2000-07-18 09:09:48 +0000644 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000645
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000646 _connection_class = HTTPConnection
647
Greg Steindd6eefb2000-07-18 09:09:48 +0000648 def __init__(self, host='', port=None, **x509):
649 "Provide a default host, since the superclass requires one."
Greg Stein5e0fa402000-06-26 08:28:01 +0000650
Greg Steindd6eefb2000-07-18 09:09:48 +0000651 # some joker passed 0 explicitly, meaning default port
652 if port == 0:
653 port = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000654
Greg Steindd6eefb2000-07-18 09:09:48 +0000655 # Note that we may pass an empty string as the host; this will throw
656 # an error when we attempt to connect. Presumably, the client code
657 # will call connect before then, with a proper host.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000658 self._conn = self._connection_class(host, port)
659 # set up delegation to flesh out interface
660 self.send = self._conn.send
661 self.putrequest = self._conn.putrequest
662 self.endheaders = self._conn.endheaders
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000663 self._conn._http_vsn = self._http_vsn
664 self._conn._http_vsn_str = self._http_vsn_str
Greg Stein5e0fa402000-06-26 08:28:01 +0000665
Greg Steindd6eefb2000-07-18 09:09:48 +0000666 # we never actually use these for anything, but we keep them here for
667 # compatibility with post-1.5.2 CVS.
668 self.key_file = x509.get('key_file')
669 self.cert_file = x509.get('cert_file')
Greg Stein5e0fa402000-06-26 08:28:01 +0000670
Greg Steindd6eefb2000-07-18 09:09:48 +0000671 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000672
Greg Steindd6eefb2000-07-18 09:09:48 +0000673 def connect(self, host=None, port=None):
674 "Accept arguments to set the host/port, since the superclass doesn't."
Greg Stein5e0fa402000-06-26 08:28:01 +0000675
Greg Steindd6eefb2000-07-18 09:09:48 +0000676 if host is not None:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000677 self._conn._set_hostport(host, port)
678 self._conn.connect()
Greg Stein5e0fa402000-06-26 08:28:01 +0000679
Greg Steindd6eefb2000-07-18 09:09:48 +0000680 def set_debuglevel(self, debuglevel):
Jeremy Hylton30f86742000-09-18 22:50:38 +0000681 self._conn.set_debuglevel(debuglevel)
Greg Stein5e0fa402000-06-26 08:28:01 +0000682
Greg Steindd6eefb2000-07-18 09:09:48 +0000683 def getfile(self):
684 "Provide a getfile, since the superclass' does not use this concept."
685 return self.file
Greg Stein5e0fa402000-06-26 08:28:01 +0000686
Greg Steindd6eefb2000-07-18 09:09:48 +0000687 def putheader(self, header, *values):
688 "The superclass allows only one value argument."
Guido van Rossum34735a62000-12-15 15:09:42 +0000689 self._conn.putheader(header, '\r\n\t'.join(values))
Greg Stein5e0fa402000-06-26 08:28:01 +0000690
Greg Steindd6eefb2000-07-18 09:09:48 +0000691 def getreply(self):
692 """Compat definition since superclass does not define it.
Greg Stein5e0fa402000-06-26 08:28:01 +0000693
Greg Steindd6eefb2000-07-18 09:09:48 +0000694 Returns a tuple consisting of:
695 - server status code (e.g. '200' if all goes well)
696 - server "reason" corresponding to status code
697 - any RFC822 headers in the response from the server
698 """
699 try:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000700 response = self._conn.getresponse()
Greg Steindd6eefb2000-07-18 09:09:48 +0000701 except BadStatusLine, e:
702 ### hmm. if getresponse() ever closes the socket on a bad request,
703 ### then we are going to have problems with self.sock
Greg Stein5e0fa402000-06-26 08:28:01 +0000704
Greg Steindd6eefb2000-07-18 09:09:48 +0000705 ### should we keep this behavior? do people use it?
706 # keep the socket open (as a file), and return it
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000707 self.file = self._conn.sock.makefile('rb', 0)
Greg Stein5e0fa402000-06-26 08:28:01 +0000708
Greg Steindd6eefb2000-07-18 09:09:48 +0000709 # close our socket -- we want to restart after any protocol error
710 self.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000711
Greg Steindd6eefb2000-07-18 09:09:48 +0000712 self.headers = None
713 return -1, e.line, None
Greg Stein5e0fa402000-06-26 08:28:01 +0000714
Greg Steindd6eefb2000-07-18 09:09:48 +0000715 self.headers = response.msg
716 self.file = response.fp
717 return response.status, response.reason, response.msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000718
Greg Steindd6eefb2000-07-18 09:09:48 +0000719 def close(self):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000720 self._conn.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000721
Greg Steindd6eefb2000-07-18 09:09:48 +0000722 # note that self.file == response.fp, which gets closed by the
723 # superclass. just clear the object ref here.
724 ### hmm. messy. if status==-1, then self.file is owned by us.
725 ### well... we aren't explicitly closing, but losing this ref will
726 ### do it
727 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000728
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000729if hasattr(socket, 'ssl'):
730 class HTTPS(HTTP):
731 """Compatibility with 1.5 httplib interface
732
733 Python 1.5.2 did not have an HTTPS class, but it defined an
734 interface for sending http requests that is also useful for
Tim Peters5ceadc82001-01-13 19:16:21 +0000735 https.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000736 """
737
Martin v. Löwisd7bf9742000-09-21 22:09:47 +0000738 _connection_class = HTTPSConnection
Tim Peters5ceadc82001-01-13 19:16:21 +0000739
Greg Stein5e0fa402000-06-26 08:28:01 +0000740
741class HTTPException(Exception):
Greg Steindd6eefb2000-07-18 09:09:48 +0000742 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000743
744class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000745 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000746
747class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000748 def __init__(self, version):
749 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +0000750
751class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000752 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000753
754class IllegalKeywordArgument(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000755 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000756
757class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000758 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000759
760class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000761 def __init__(self, partial):
762 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +0000763
764class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000765 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000766
767class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000768 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000769
770class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000771 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000772
773class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000774 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000775
776class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000777 def __init__(self, line):
778 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +0000779
780# for backwards compatibility
781error = HTTPException
782
783
784#
785# snarfed from httplib.py for now...
786#
Guido van Rossum23acc951994-02-21 16:36:04 +0000787def test():
Guido van Rossum41999c11997-12-09 00:12:23 +0000788 """Test this module.
789
790 The test consists of retrieving and displaying the Python
791 home page, along with the error code and error string returned
792 by the www.python.org server.
Guido van Rossum41999c11997-12-09 00:12:23 +0000793 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000794
Guido van Rossum41999c11997-12-09 00:12:23 +0000795 import sys
796 import getopt
797 opts, args = getopt.getopt(sys.argv[1:], 'd')
798 dl = 0
799 for o, a in opts:
800 if o == '-d': dl = dl + 1
801 host = 'www.python.org'
802 selector = '/'
803 if args[0:]: host = args[0]
804 if args[1:]: selector = args[1]
805 h = HTTP()
806 h.set_debuglevel(dl)
807 h.connect(host)
808 h.putrequest('GET', selector)
809 h.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000810 status, reason, headers = h.getreply()
811 print 'status =', status
812 print 'reason =', reason
Guido van Rossum41999c11997-12-09 00:12:23 +0000813 print
814 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +0000815 for header in headers.headers: print header.strip()
Guido van Rossum41999c11997-12-09 00:12:23 +0000816 print
817 print h.getfile().read()
Greg Stein5e0fa402000-06-26 08:28:01 +0000818
819 if hasattr(socket, 'ssl'):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000820 host = 'sourceforge.net'
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000821 selector = '/projects/python'
Greg Steindd6eefb2000-07-18 09:09:48 +0000822 hs = HTTPS()
823 hs.connect(host)
824 hs.putrequest('GET', selector)
825 hs.endheaders()
826 status, reason, headers = hs.getreply()
827 print 'status =', status
828 print 'reason =', reason
829 print
830 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +0000831 for header in headers.headers: print header.strip()
Greg Steindd6eefb2000-07-18 09:09:48 +0000832 print
833 print hs.getfile().read()
Guido van Rossum23acc951994-02-21 16:36:04 +0000834
Guido van Rossuma0dfc7a1995-09-07 19:28:19 +0000835
Guido van Rossum23acc951994-02-21 16:36:04 +0000836if __name__ == '__main__':
Guido van Rossum41999c11997-12-09 00:12:23 +0000837 test()