blob: f299873583e06d309666b3eac746390b6e798bde [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Greg Stein5e0fa402000-06-26 08:28:01 +00006HTTPConnection go through a number of "states", which defines when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000069import errno
Guido van Rossum65ab98c1995-08-07 20:13:02 +000070import mimetools
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000071import socket
Jeremy Hylton8acf1e02002-03-08 19:35:51 +000072from urlparse import urlsplit
Guido van Rossum23acc951994-02-21 16:36:04 +000073
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000074try:
Greg Steindd6eefb2000-07-18 09:09:48 +000075 from cStringIO import StringIO
Greg Stein5e0fa402000-06-26 08:28:01 +000076except ImportError:
Greg Steindd6eefb2000-07-18 09:09:48 +000077 from StringIO import StringIO
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000078
Skip Montanaro951a8842001-06-01 16:25:38 +000079__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
80 "HTTPException", "NotConnected", "UnknownProtocol",
81 "UnknownTransferEncoding", "IllegalKeywordArgument",
82 "UnimplementedFileMode", "IncompleteRead",
83 "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader",
84 "ResponseNotReady", "BadStatusLine", "error"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000085
Guido van Rossum23acc951994-02-21 16:36:04 +000086HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000087HTTPS_PORT = 443
88
Greg Stein5e0fa402000-06-26 08:28:01 +000089_UNKNOWN = 'UNKNOWN'
90
91# connection states
92_CS_IDLE = 'Idle'
93_CS_REQ_STARTED = 'Request-started'
94_CS_REQ_SENT = 'Request-sent'
95
96
97class HTTPResponse:
Jeremy Hylton30f86742000-09-18 22:50:38 +000098 def __init__(self, sock, debuglevel=0):
Greg Steindd6eefb2000-07-18 09:09:48 +000099 self.fp = sock.makefile('rb', 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000100 self.debuglevel = debuglevel
Greg Stein5e0fa402000-06-26 08:28:01 +0000101
Greg Steindd6eefb2000-07-18 09:09:48 +0000102 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000103
Greg Steindd6eefb2000-07-18 09:09:48 +0000104 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000105 self.version = _UNKNOWN # HTTP-Version
106 self.status = _UNKNOWN # Status-Code
107 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000108
Tim Peters07e99cb2001-01-14 23:47:14 +0000109 self.chunked = _UNKNOWN # is "chunked" being used?
110 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
111 self.length = _UNKNOWN # number of bytes left in response
112 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000113
Greg Steindd6eefb2000-07-18 09:09:48 +0000114 def begin(self):
115 if self.msg is not None:
116 # we've already started reading the response
117 return
Greg Stein5e0fa402000-06-26 08:28:01 +0000118
Greg Stein5e0fa402000-06-26 08:28:01 +0000119 line = self.fp.readline()
Jeremy Hylton30f86742000-09-18 22:50:38 +0000120 if self.debuglevel > 0:
121 print "reply:", repr(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000122 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000123 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000124 except ValueError:
125 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000126 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000127 reason = ""
128 except ValueError:
Jeremy Hylton110941a2000-10-12 19:58:36 +0000129 version = "HTTP/0.9"
130 status = "200"
131 reason = ""
Greg Steindd6eefb2000-07-18 09:09:48 +0000132 if version[:5] != 'HTTP/':
133 self.close()
134 raise BadStatusLine(line)
Greg Stein5e0fa402000-06-26 08:28:01 +0000135
Jeremy Hylton23d40472001-04-13 14:57:08 +0000136 # The status code is a three-digit number
137 try:
138 self.status = status = int(status)
139 if status < 100 or status > 999:
140 raise BadStatusLine(line)
141 except ValueError:
142 raise BadStatusLine(line)
Guido van Rossum34735a62000-12-15 15:09:42 +0000143 self.reason = reason.strip()
Greg Stein5e0fa402000-06-26 08:28:01 +0000144
Greg Steindd6eefb2000-07-18 09:09:48 +0000145 if version == 'HTTP/1.0':
146 self.version = 10
Jeremy Hylton110941a2000-10-12 19:58:36 +0000147 elif version.startswith('HTTP/1.'):
Tim Peters07e99cb2001-01-14 23:47:14 +0000148 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hylton110941a2000-10-12 19:58:36 +0000149 elif version == 'HTTP/0.9':
150 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000151 else:
152 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000153
Jeremy Hylton110941a2000-10-12 19:58:36 +0000154 if self.version == 9:
155 self.msg = mimetools.Message(StringIO())
156 return
157
Greg Steindd6eefb2000-07-18 09:09:48 +0000158 self.msg = mimetools.Message(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000159 if self.debuglevel > 0:
160 for hdr in self.msg.headers:
161 print "header:", hdr,
Greg Stein5e0fa402000-06-26 08:28:01 +0000162
Greg Steindd6eefb2000-07-18 09:09:48 +0000163 # don't let the msg keep an fp
164 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000165
Greg Steindd6eefb2000-07-18 09:09:48 +0000166 # are we using the chunked-style of transfer encoding?
167 tr_enc = self.msg.getheader('transfer-encoding')
168 if tr_enc:
Guido van Rossum34735a62000-12-15 15:09:42 +0000169 if tr_enc.lower() != 'chunked':
Greg Steindd6eefb2000-07-18 09:09:48 +0000170 raise UnknownTransferEncoding()
171 self.chunked = 1
172 self.chunk_left = None
173 else:
174 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000175
Greg Steindd6eefb2000-07-18 09:09:48 +0000176 # will the connection close at the end of the response?
177 conn = self.msg.getheader('connection')
178 if conn:
Guido van Rossum34735a62000-12-15 15:09:42 +0000179 conn = conn.lower()
Greg Steindd6eefb2000-07-18 09:09:48 +0000180 # a "Connection: close" will always close the connection. if we
181 # don't see that and this is not HTTP/1.1, then the connection will
182 # close unless we see a Keep-Alive header.
Guido van Rossum34735a62000-12-15 15:09:42 +0000183 self.will_close = conn.find('close') != -1 or \
Greg Steindd6eefb2000-07-18 09:09:48 +0000184 ( self.version != 11 and \
185 not self.msg.getheader('keep-alive') )
186 else:
187 # for HTTP/1.1, the connection will always remain open
188 # otherwise, it will remain open IFF we see a Keep-Alive header
189 self.will_close = self.version != 11 and \
190 not self.msg.getheader('keep-alive')
Greg Stein5e0fa402000-06-26 08:28:01 +0000191
Greg Steindd6eefb2000-07-18 09:09:48 +0000192 # do we have a Content-Length?
193 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
194 length = self.msg.getheader('content-length')
195 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000196 try:
197 self.length = int(length)
198 except ValueError:
199 self.length = None
Greg Steindd6eefb2000-07-18 09:09:48 +0000200 else:
201 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000202
Greg Steindd6eefb2000-07-18 09:09:48 +0000203 # does the body have a fixed length? (of zero)
Tim Peters07e99cb2001-01-14 23:47:14 +0000204 if (status == 204 or # No Content
205 status == 304 or # Not Modified
206 100 <= status < 200): # 1xx codes
Greg Steindd6eefb2000-07-18 09:09:48 +0000207 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000208
Greg Steindd6eefb2000-07-18 09:09:48 +0000209 # if the connection remains open, and we aren't using chunked, and
210 # a content-length was not provided, then assume that the connection
211 # WILL close.
212 if not self.will_close and \
213 not self.chunked and \
214 self.length is None:
215 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000216
Greg Steindd6eefb2000-07-18 09:09:48 +0000217 def close(self):
218 if self.fp:
219 self.fp.close()
220 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000221
Greg Steindd6eefb2000-07-18 09:09:48 +0000222 def isclosed(self):
223 # NOTE: it is possible that we will not ever call self.close(). This
224 # case occurs when will_close is TRUE, length is None, and we
225 # read up to the last byte, but NOT past it.
226 #
227 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
228 # called, meaning self.isclosed() is meaningful.
229 return self.fp is None
230
231 def read(self, amt=None):
232 if self.fp is None:
233 return ''
234
235 if self.chunked:
236 chunk_left = self.chunk_left
237 value = ''
238 while 1:
239 if chunk_left is None:
240 line = self.fp.readline()
Guido van Rossum34735a62000-12-15 15:09:42 +0000241 i = line.find(';')
Greg Steindd6eefb2000-07-18 09:09:48 +0000242 if i >= 0:
Tim Peters07e99cb2001-01-14 23:47:14 +0000243 line = line[:i] # strip chunk-extensions
Guido van Rossum34735a62000-12-15 15:09:42 +0000244 chunk_left = int(line, 16)
Greg Steindd6eefb2000-07-18 09:09:48 +0000245 if chunk_left == 0:
246 break
247 if amt is None:
248 value = value + self._safe_read(chunk_left)
249 elif amt < chunk_left:
250 value = value + self._safe_read(amt)
251 self.chunk_left = chunk_left - amt
252 return value
253 elif amt == chunk_left:
254 value = value + self._safe_read(amt)
Tim Peters07e99cb2001-01-14 23:47:14 +0000255 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000256 self.chunk_left = None
257 return value
258 else:
259 value = value + self._safe_read(chunk_left)
260 amt = amt - chunk_left
261
262 # we read the whole chunk, get another
Tim Peters07e99cb2001-01-14 23:47:14 +0000263 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000264 chunk_left = None
265
266 # read and discard trailer up to the CRLF terminator
267 ### note: we shouldn't have any trailers!
268 while 1:
269 line = self.fp.readline()
270 if line == '\r\n':
271 break
272
273 # we read everything; close the "file"
274 self.close()
275
276 return value
277
278 elif amt is None:
279 # unbounded read
280 if self.will_close:
281 s = self.fp.read()
282 else:
283 s = self._safe_read(self.length)
Tim Peters07e99cb2001-01-14 23:47:14 +0000284 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000285 return s
286
287 if self.length is not None:
288 if amt > self.length:
289 # clip the read to the "end of response"
290 amt = self.length
291 self.length = self.length - amt
292
293 # we do not use _safe_read() here because this may be a .will_close
294 # connection, and the user is reading more bytes than will be provided
295 # (for example, reading in 1k chunks)
296 s = self.fp.read(amt)
297
Greg Steindd6eefb2000-07-18 09:09:48 +0000298 return s
299
300 def _safe_read(self, amt):
301 """Read the number of bytes requested, compensating for partial reads.
302
303 Normally, we have a blocking socket, but a read() can be interrupted
304 by a signal (resulting in a partial read).
305
306 Note that we cannot distinguish between EOF and an interrupt when zero
307 bytes have been read. IncompleteRead() will be raised in this
308 situation.
309
310 This function should be used when <amt> bytes "should" be present for
311 reading. If the bytes are truly not available (due to EOF), then the
312 IncompleteRead exception can be used to detect the problem.
313 """
314 s = ''
315 while amt > 0:
316 chunk = self.fp.read(amt)
317 if not chunk:
318 raise IncompleteRead(s)
319 s = s + chunk
320 amt = amt - len(chunk)
321 return s
322
323 def getheader(self, name, default=None):
324 if self.msg is None:
325 raise ResponseNotReady()
326 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000327
328
329class HTTPConnection:
330
Greg Steindd6eefb2000-07-18 09:09:48 +0000331 _http_vsn = 11
332 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000333
Greg Steindd6eefb2000-07-18 09:09:48 +0000334 response_class = HTTPResponse
335 default_port = HTTP_PORT
336 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000337 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000338
Greg Steindd6eefb2000-07-18 09:09:48 +0000339 def __init__(self, host, port=None):
340 self.sock = None
341 self.__response = None
342 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000343
Greg Steindd6eefb2000-07-18 09:09:48 +0000344 self._set_hostport(host, port)
Greg Stein5e0fa402000-06-26 08:28:01 +0000345
Greg Steindd6eefb2000-07-18 09:09:48 +0000346 def _set_hostport(self, host, port):
347 if port is None:
Guido van Rossum34735a62000-12-15 15:09:42 +0000348 i = host.find(':')
Greg Steindd6eefb2000-07-18 09:09:48 +0000349 if i >= 0:
Guido van Rossumfd97a912001-01-15 14:34:20 +0000350 port = int(host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000351 host = host[:i]
352 else:
353 port = self.default_port
354 self.host = host
355 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000356
Jeremy Hylton30f86742000-09-18 22:50:38 +0000357 def set_debuglevel(self, level):
358 self.debuglevel = level
359
Greg Steindd6eefb2000-07-18 09:09:48 +0000360 def connect(self):
361 """Connect to the host and port specified in __init__."""
Martin v. Löwis2ad25692001-07-31 08:40:21 +0000362 msg = "getaddrinfo returns an empty list"
Martin v. Löwis4eb59402001-07-26 13:37:33 +0000363 for res in socket.getaddrinfo(self.host, self.port, 0, socket.SOCK_STREAM):
364 af, socktype, proto, canonname, sa = res
365 try:
366 self.sock = socket.socket(af, socktype, proto)
367 if self.debuglevel > 0:
368 print "connect: (%s, %s)" % (self.host, self.port)
369 self.sock.connect(sa)
370 except socket.error, msg:
371 if self.debuglevel > 0:
372 print 'connect fail:', (self.host, self.port)
Martin v. Löwis322c0d12001-10-07 08:53:32 +0000373 if self.sock:
374 self.sock.close()
Martin v. Löwis4eb59402001-07-26 13:37:33 +0000375 self.sock = None
376 continue
377 break
378 if not self.sock:
379 raise socket.error, msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000380
Greg Steindd6eefb2000-07-18 09:09:48 +0000381 def close(self):
382 """Close the connection to the HTTP server."""
383 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000384 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000385 self.sock = None
386 if self.__response:
387 self.__response.close()
388 self.__response = None
389 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000390
Greg Steindd6eefb2000-07-18 09:09:48 +0000391 def send(self, str):
392 """Send `str' to the server."""
393 if self.sock is None:
394 if self.auto_open:
395 self.connect()
396 else:
397 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000398
Greg Steindd6eefb2000-07-18 09:09:48 +0000399 # send the data to the server. if we get a broken pipe, then close
400 # the socket. we want to reconnect when somebody tries to send again.
401 #
402 # NOTE: we DO propagate the error, though, because we cannot simply
403 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000404 if self.debuglevel > 0:
405 print "send:", repr(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000406 try:
Martin v. Löwise12454f2002-02-16 23:06:19 +0000407 self.sock.sendall(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000408 except socket.error, v:
Tim Peters07e99cb2001-01-14 23:47:14 +0000409 if v[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000410 self.close()
411 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000412
Greg Steindd6eefb2000-07-18 09:09:48 +0000413 def putrequest(self, method, url):
414 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000415
Greg Steindd6eefb2000-07-18 09:09:48 +0000416 `method' specifies an HTTP request method, e.g. 'GET'.
417 `url' specifies the object being requested, e.g. '/index.html'.
418 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000419
Greg Steindd6eefb2000-07-18 09:09:48 +0000420 # check if a prior response has been completed
421 if self.__response and self.__response.isclosed():
422 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000423
Greg Steindd6eefb2000-07-18 09:09:48 +0000424 #
425 # in certain cases, we cannot issue another request on this connection.
426 # this occurs when:
427 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
428 # 2) a response to a previous request has signalled that it is going
429 # to close the connection upon completion.
430 # 3) the headers for the previous response have not been read, thus
431 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
432 #
433 # if there is no prior response, then we can request at will.
434 #
435 # if point (2) is true, then we will have passed the socket to the
436 # response (effectively meaning, "there is no prior response"), and
437 # will open a new one when a new request is made.
438 #
439 # Note: if a prior response exists, then we *can* start a new request.
440 # We are not allowed to begin fetching the response to this new
441 # request, however, until that prior response is complete.
442 #
443 if self.__state == _CS_IDLE:
444 self.__state = _CS_REQ_STARTED
445 else:
446 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000447
Greg Steindd6eefb2000-07-18 09:09:48 +0000448 if not url:
449 url = '/'
450 str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000451
Greg Steindd6eefb2000-07-18 09:09:48 +0000452 try:
453 self.send(str)
454 except socket.error, v:
455 # trap 'Broken pipe' if we're allowed to automatically reconnect
456 if v[0] != 32 or not self.auto_open:
457 raise
458 # try one more time (the socket was closed; this will reopen)
459 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000460
Greg Steindd6eefb2000-07-18 09:09:48 +0000461 if self._http_vsn == 11:
462 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000463
Greg Steindd6eefb2000-07-18 09:09:48 +0000464 # this header is issued *only* for HTTP/1.1 connections. more
465 # specifically, this means it is only issued when the client uses
466 # the new HTTPConnection() class. backwards-compat clients will
467 # be using HTTP/1.0 and those clients may be issuing this header
468 # themselves. we should NOT issue it twice; some web servers (such
469 # as Apache) barf when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000470
Jeremy Hylton8acf1e02002-03-08 19:35:51 +0000471 # If we need a non-standard port,include it in the header.
472 # If the request is going through a proxy, but the host of
473 # the actual URL, not the host of the proxy.
474
475 if url.startswith('http:'):
476 nil, netloc, nil, nil, nil = urlsplit(url)
477 self.putheader('Host', netloc)
478 elif self.port == HTTP_PORT:
479 self.putheader('Host', netloc)
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000480 else:
481 self.putheader('Host', "%s:%s" % (self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000482
Greg Steindd6eefb2000-07-18 09:09:48 +0000483 # note: we are assuming that clients will not attempt to set these
484 # headers since *this* library must deal with the
485 # consequences. this also means that when the supporting
486 # libraries are updated to recognize other forms, then this
487 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000488
Greg Steindd6eefb2000-07-18 09:09:48 +0000489 # we only want a Content-Encoding of "identity" since we don't
490 # support encodings such as x-gzip or x-deflate.
491 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000492
Greg Steindd6eefb2000-07-18 09:09:48 +0000493 # we can accept "chunked" Transfer-Encodings, but no others
494 # NOTE: no TE header implies *only* "chunked"
495 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000496
Greg Steindd6eefb2000-07-18 09:09:48 +0000497 # if TE is supplied in the header, then it must appear in a
498 # Connection header.
499 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000500
Greg Steindd6eefb2000-07-18 09:09:48 +0000501 else:
502 # For HTTP/1.0, the server will assume "not chunked"
503 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000504
Greg Steindd6eefb2000-07-18 09:09:48 +0000505 def putheader(self, header, value):
506 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000507
Greg Steindd6eefb2000-07-18 09:09:48 +0000508 For example: h.putheader('Accept', 'text/html')
509 """
510 if self.__state != _CS_REQ_STARTED:
511 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000512
Greg Steindd6eefb2000-07-18 09:09:48 +0000513 str = '%s: %s\r\n' % (header, value)
514 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000515
Greg Steindd6eefb2000-07-18 09:09:48 +0000516 def endheaders(self):
517 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000518
Greg Steindd6eefb2000-07-18 09:09:48 +0000519 if self.__state == _CS_REQ_STARTED:
520 self.__state = _CS_REQ_SENT
521 else:
522 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000523
Greg Steindd6eefb2000-07-18 09:09:48 +0000524 self.send('\r\n')
Greg Stein5e0fa402000-06-26 08:28:01 +0000525
Greg Steindd6eefb2000-07-18 09:09:48 +0000526 def request(self, method, url, body=None, headers={}):
527 """Send a complete request to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000528
Greg Steindd6eefb2000-07-18 09:09:48 +0000529 try:
530 self._send_request(method, url, body, headers)
531 except socket.error, v:
532 # trap 'Broken pipe' if we're allowed to automatically reconnect
533 if v[0] != 32 or not self.auto_open:
534 raise
535 # try one more time
536 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000537
Greg Steindd6eefb2000-07-18 09:09:48 +0000538 def _send_request(self, method, url, body, headers):
539 self.putrequest(method, url)
Greg Stein5e0fa402000-06-26 08:28:01 +0000540
Greg Steindd6eefb2000-07-18 09:09:48 +0000541 if body:
542 self.putheader('Content-Length', str(len(body)))
543 for hdr, value in headers.items():
544 self.putheader(hdr, value)
545 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000546
Greg Steindd6eefb2000-07-18 09:09:48 +0000547 if body:
548 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000549
Greg Steindd6eefb2000-07-18 09:09:48 +0000550 def getresponse(self):
551 "Get the response from the server."
Greg Stein5e0fa402000-06-26 08:28:01 +0000552
Greg Steindd6eefb2000-07-18 09:09:48 +0000553 # check if a prior response has been completed
554 if self.__response and self.__response.isclosed():
555 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000556
Greg Steindd6eefb2000-07-18 09:09:48 +0000557 #
558 # if a prior response exists, then it must be completed (otherwise, we
559 # cannot read this response's header to determine the connection-close
560 # behavior)
561 #
562 # note: if a prior response existed, but was connection-close, then the
563 # socket and response were made independent of this HTTPConnection
564 # object since a new request requires that we open a whole new
565 # connection
566 #
567 # this means the prior response had one of two states:
568 # 1) will_close: this connection was reset and the prior socket and
569 # response operate independently
570 # 2) persistent: the response was retained and we await its
571 # isclosed() status to become true.
572 #
573 if self.__state != _CS_REQ_SENT or self.__response:
574 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000575
Jeremy Hylton30f86742000-09-18 22:50:38 +0000576 if self.debuglevel > 0:
577 response = self.response_class(self.sock, self.debuglevel)
578 else:
579 response = self.response_class(self.sock)
Greg Stein5e0fa402000-06-26 08:28:01 +0000580
Greg Steindd6eefb2000-07-18 09:09:48 +0000581 response.begin()
582 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000583
Greg Steindd6eefb2000-07-18 09:09:48 +0000584 if response.will_close:
585 # this effectively passes the connection to the response
586 self.close()
587 else:
588 # remember this, so we can tell when it is complete
589 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000590
Greg Steindd6eefb2000-07-18 09:09:48 +0000591 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000592
593
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000594class FakeSocket:
Greg Steindd6eefb2000-07-18 09:09:48 +0000595 def __init__(self, sock, ssl):
596 self.__sock = sock
597 self.__ssl = ssl
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000598
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000599 def makefile(self, mode, bufsize=None):
600 """Return a readable file-like object with data from socket.
601
602 This method offers only partial support for the makefile
603 interface of a real socket. It only supports modes 'r' and
604 'rb' and the bufsize argument is ignored.
605
Tim Peters5ceadc82001-01-13 19:16:21 +0000606 The returned object contains *all* of the file data
Jeremy Hylton4d746fc2000-08-23 20:34:17 +0000607 """
Greg Steindd6eefb2000-07-18 09:09:48 +0000608 if mode != 'r' and mode != 'rb':
609 raise UnimplementedFileMode()
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000610
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000611 msgbuf = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000612 while 1:
613 try:
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000614 buf = self.__ssl.read()
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000615 except socket.sslerror, err:
616 if (err[0] == socket.SSL_ERROR_WANT_READ
Neal Norwitz22c5d772002-02-11 17:59:51 +0000617 or err[0] == socket.SSL_ERROR_WANT_WRITE):
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000618 continue
619 if err[0] == socket.SSL_ERROR_ZERO_RETURN:
620 break
621 raise
622 except socket.error, err:
Tim Petersf3623f32001-10-11 18:15:51 +0000623 if err[0] == errno.EINTR:
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000624 continue
625 raise
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000626 if buf == '':
627 break
628 msgbuf.append(buf)
629 return StringIO("".join(msgbuf))
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000630
Greg Steindd6eefb2000-07-18 09:09:48 +0000631 def send(self, stuff, flags = 0):
632 return self.__ssl.write(stuff)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000633
Greg Steindd6eefb2000-07-18 09:09:48 +0000634 def recv(self, len = 1024, flags = 0):
635 return self.__ssl.read(len)
Guido van Rossum23acc951994-02-21 16:36:04 +0000636
Greg Steindd6eefb2000-07-18 09:09:48 +0000637 def __getattr__(self, attr):
638 return getattr(self.__sock, attr)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000639
Guido van Rossum23acc951994-02-21 16:36:04 +0000640
Greg Stein5e0fa402000-06-26 08:28:01 +0000641class HTTPSConnection(HTTPConnection):
Greg Steindd6eefb2000-07-18 09:09:48 +0000642 "This class allows communication via SSL."
Greg Stein5e0fa402000-06-26 08:28:01 +0000643
Greg Steindd6eefb2000-07-18 09:09:48 +0000644 default_port = HTTPS_PORT
Greg Stein5e0fa402000-06-26 08:28:01 +0000645
Greg Steindd6eefb2000-07-18 09:09:48 +0000646 def __init__(self, host, port=None, **x509):
647 keys = x509.keys()
648 try:
649 keys.remove('key_file')
650 except ValueError:
651 pass
652 try:
653 keys.remove('cert_file')
654 except ValueError:
655 pass
656 if keys:
657 raise IllegalKeywordArgument()
658 HTTPConnection.__init__(self, host, port)
659 self.key_file = x509.get('key_file')
660 self.cert_file = x509.get('cert_file')
Greg Stein5e0fa402000-06-26 08:28:01 +0000661
Greg Steindd6eefb2000-07-18 09:09:48 +0000662 def connect(self):
663 "Connect to a host on a given (SSL) port."
Greg Stein5e0fa402000-06-26 08:28:01 +0000664
Greg Steindd6eefb2000-07-18 09:09:48 +0000665 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
666 sock.connect((self.host, self.port))
Guido van Rossum0aee7222000-12-11 20:32:20 +0000667 realsock = sock
668 if hasattr(sock, "_sock"):
669 realsock = sock._sock
670 ssl = socket.ssl(realsock, self.key_file, self.cert_file)
Greg Steindd6eefb2000-07-18 09:09:48 +0000671 self.sock = FakeSocket(sock, ssl)
Greg Stein5e0fa402000-06-26 08:28:01 +0000672
673
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000674class HTTP:
Greg Steindd6eefb2000-07-18 09:09:48 +0000675 "Compatibility class with httplib.py from 1.5."
Greg Stein5e0fa402000-06-26 08:28:01 +0000676
Greg Steindd6eefb2000-07-18 09:09:48 +0000677 _http_vsn = 10
678 _http_vsn_str = 'HTTP/1.0'
Greg Stein5e0fa402000-06-26 08:28:01 +0000679
Greg Steindd6eefb2000-07-18 09:09:48 +0000680 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000681
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000682 _connection_class = HTTPConnection
683
Greg Stein81937a42001-08-18 09:20:23 +0000684 def __init__(self, host='', port=None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000685 "Provide a default host, since the superclass requires one."
Greg Stein5e0fa402000-06-26 08:28:01 +0000686
Greg Steindd6eefb2000-07-18 09:09:48 +0000687 # some joker passed 0 explicitly, meaning default port
688 if port == 0:
689 port = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000690
Greg Steindd6eefb2000-07-18 09:09:48 +0000691 # Note that we may pass an empty string as the host; this will throw
692 # an error when we attempt to connect. Presumably, the client code
693 # will call connect before then, with a proper host.
Greg Stein81937a42001-08-18 09:20:23 +0000694 self._setup(self._connection_class(host, port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000695
Greg Stein81937a42001-08-18 09:20:23 +0000696 def _setup(self, conn):
697 self._conn = conn
698
699 # set up delegation to flesh out interface
700 self.send = conn.send
701 self.putrequest = conn.putrequest
702 self.endheaders = conn.endheaders
703 self.set_debuglevel = conn.set_debuglevel
704
705 conn._http_vsn = self._http_vsn
706 conn._http_vsn_str = self._http_vsn_str
Greg Stein5e0fa402000-06-26 08:28:01 +0000707
Greg Steindd6eefb2000-07-18 09:09:48 +0000708 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000709
Greg Steindd6eefb2000-07-18 09:09:48 +0000710 def connect(self, host=None, port=None):
711 "Accept arguments to set the host/port, since the superclass doesn't."
Greg Stein5e0fa402000-06-26 08:28:01 +0000712
Greg Steindd6eefb2000-07-18 09:09:48 +0000713 if host is not None:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000714 self._conn._set_hostport(host, port)
715 self._conn.connect()
Greg Stein5e0fa402000-06-26 08:28:01 +0000716
Greg Steindd6eefb2000-07-18 09:09:48 +0000717 def getfile(self):
718 "Provide a getfile, since the superclass' does not use this concept."
719 return self.file
Greg Stein5e0fa402000-06-26 08:28:01 +0000720
Greg Steindd6eefb2000-07-18 09:09:48 +0000721 def putheader(self, header, *values):
722 "The superclass allows only one value argument."
Guido van Rossum34735a62000-12-15 15:09:42 +0000723 self._conn.putheader(header, '\r\n\t'.join(values))
Greg Stein5e0fa402000-06-26 08:28:01 +0000724
Greg Steindd6eefb2000-07-18 09:09:48 +0000725 def getreply(self):
726 """Compat definition since superclass does not define it.
Greg Stein5e0fa402000-06-26 08:28:01 +0000727
Greg Steindd6eefb2000-07-18 09:09:48 +0000728 Returns a tuple consisting of:
729 - server status code (e.g. '200' if all goes well)
730 - server "reason" corresponding to status code
731 - any RFC822 headers in the response from the server
732 """
733 try:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000734 response = self._conn.getresponse()
Greg Steindd6eefb2000-07-18 09:09:48 +0000735 except BadStatusLine, e:
736 ### hmm. if getresponse() ever closes the socket on a bad request,
737 ### then we are going to have problems with self.sock
Greg Stein5e0fa402000-06-26 08:28:01 +0000738
Greg Steindd6eefb2000-07-18 09:09:48 +0000739 ### should we keep this behavior? do people use it?
740 # keep the socket open (as a file), and return it
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000741 self.file = self._conn.sock.makefile('rb', 0)
Greg Stein5e0fa402000-06-26 08:28:01 +0000742
Greg Steindd6eefb2000-07-18 09:09:48 +0000743 # close our socket -- we want to restart after any protocol error
744 self.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000745
Greg Steindd6eefb2000-07-18 09:09:48 +0000746 self.headers = None
747 return -1, e.line, None
Greg Stein5e0fa402000-06-26 08:28:01 +0000748
Greg Steindd6eefb2000-07-18 09:09:48 +0000749 self.headers = response.msg
750 self.file = response.fp
751 return response.status, response.reason, response.msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000752
Greg Steindd6eefb2000-07-18 09:09:48 +0000753 def close(self):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000754 self._conn.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000755
Greg Steindd6eefb2000-07-18 09:09:48 +0000756 # note that self.file == response.fp, which gets closed by the
757 # superclass. just clear the object ref here.
758 ### hmm. messy. if status==-1, then self.file is owned by us.
759 ### well... we aren't explicitly closing, but losing this ref will
760 ### do it
761 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000762
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000763if hasattr(socket, 'ssl'):
764 class HTTPS(HTTP):
765 """Compatibility with 1.5 httplib interface
766
767 Python 1.5.2 did not have an HTTPS class, but it defined an
768 interface for sending http requests that is also useful for
Tim Peters5ceadc82001-01-13 19:16:21 +0000769 https.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000770 """
771
Martin v. Löwisd7bf9742000-09-21 22:09:47 +0000772 _connection_class = HTTPSConnection
Tim Peters5ceadc82001-01-13 19:16:21 +0000773
Greg Stein81937a42001-08-18 09:20:23 +0000774 def __init__(self, host='', port=None, **x509):
775 # provide a default host, pass the X509 cert info
776
777 # urf. compensate for bad input.
778 if port == 0:
779 port = None
780 self._setup(self._connection_class(host, port, **x509))
781
782 # we never actually use these for anything, but we keep them
783 # here for compatibility with post-1.5.2 CVS.
784 self.key_file = x509.get('key_file')
785 self.cert_file = x509.get('cert_file')
786
Greg Stein5e0fa402000-06-26 08:28:01 +0000787
788class HTTPException(Exception):
Greg Steindd6eefb2000-07-18 09:09:48 +0000789 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000790
791class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000792 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000793
794class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000795 def __init__(self, version):
796 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +0000797
798class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000799 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000800
801class IllegalKeywordArgument(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000802 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000803
804class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000805 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000806
807class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000808 def __init__(self, partial):
809 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +0000810
811class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000812 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000813
814class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000815 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000816
817class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000818 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000819
820class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +0000821 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000822
823class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +0000824 def __init__(self, line):
825 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +0000826
827# for backwards compatibility
828error = HTTPException
829
830
831#
832# snarfed from httplib.py for now...
833#
Guido van Rossum23acc951994-02-21 16:36:04 +0000834def test():
Guido van Rossum41999c11997-12-09 00:12:23 +0000835 """Test this module.
836
837 The test consists of retrieving and displaying the Python
838 home page, along with the error code and error string returned
839 by the www.python.org server.
Guido van Rossum41999c11997-12-09 00:12:23 +0000840 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000841
Guido van Rossum41999c11997-12-09 00:12:23 +0000842 import sys
843 import getopt
844 opts, args = getopt.getopt(sys.argv[1:], 'd')
845 dl = 0
846 for o, a in opts:
847 if o == '-d': dl = dl + 1
848 host = 'www.python.org'
849 selector = '/'
850 if args[0:]: host = args[0]
851 if args[1:]: selector = args[1]
852 h = HTTP()
853 h.set_debuglevel(dl)
854 h.connect(host)
855 h.putrequest('GET', selector)
856 h.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000857 status, reason, headers = h.getreply()
858 print 'status =', status
859 print 'reason =', reason
Guido van Rossum41999c11997-12-09 00:12:23 +0000860 print
861 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +0000862 for header in headers.headers: print header.strip()
Guido van Rossum41999c11997-12-09 00:12:23 +0000863 print
864 print h.getfile().read()
Greg Stein5e0fa402000-06-26 08:28:01 +0000865
Jeremy Hylton8acf1e02002-03-08 19:35:51 +0000866 # minimal test that code to extract host from url works
867 class HTTP11(HTTP):
868 _http_vsn = 11
869 _http_vsn_str = 'HTTP/1.1'
870
871 h = HTTP11('www.python.org')
872 h.putrequest('GET', 'http://www.python.org/~jeremy/')
873 h.endheaders()
874 h.getreply()
875 h.close()
876
Greg Stein5e0fa402000-06-26 08:28:01 +0000877 if hasattr(socket, 'ssl'):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000878 host = 'sourceforge.net'
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000879 selector = '/projects/python'
Greg Steindd6eefb2000-07-18 09:09:48 +0000880 hs = HTTPS()
881 hs.connect(host)
882 hs.putrequest('GET', selector)
883 hs.endheaders()
884 status, reason, headers = hs.getreply()
885 print 'status =', status
886 print 'reason =', reason
887 print
888 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +0000889 for header in headers.headers: print header.strip()
Greg Steindd6eefb2000-07-18 09:09:48 +0000890 print
891 print hs.getfile().read()
Guido van Rossum23acc951994-02-21 16:36:04 +0000892
Guido van Rossuma0dfc7a1995-09-07 19:28:19 +0000893
Guido van Rossum23acc951994-02-21 16:36:04 +0000894if __name__ == '__main__':
Guido van Rossum41999c11997-12-09 00:12:23 +0000895 test()