blob: 8238f1ae0270d1e2b34f4828a85165f840df3179 [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Greg Stein5e0fa402000-06-26 08:28:01 +00006HTTPConnection go through a number of "states", which defines when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000069import errno
Guido van Rossum65ab98c1995-08-07 20:13:02 +000070import mimetools
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000071import socket
Jeremy Hylton8acf1e02002-03-08 19:35:51 +000072from urlparse import urlsplit
Guido van Rossum23acc951994-02-21 16:36:04 +000073
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000074try:
Greg Steindd6eefb2000-07-18 09:09:48 +000075 from cStringIO import StringIO
Greg Stein5e0fa402000-06-26 08:28:01 +000076except ImportError:
Greg Steindd6eefb2000-07-18 09:09:48 +000077 from StringIO import StringIO
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000078
Skip Montanaro951a8842001-06-01 16:25:38 +000079__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
80 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000081 "UnknownTransferEncoding", "UnimplementedFileMode",
82 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
83 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
84 "BadStatusLine", "error"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000085
Guido van Rossum23acc951994-02-21 16:36:04 +000086HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000087HTTPS_PORT = 443
88
Greg Stein5e0fa402000-06-26 08:28:01 +000089_UNKNOWN = 'UNKNOWN'
90
91# connection states
92_CS_IDLE = 'Idle'
93_CS_REQ_STARTED = 'Request-started'
94_CS_REQ_SENT = 'Request-sent'
95
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +000096class HTTPMessage(mimetools.Message):
97
98 def addheader(self, key, value):
99 """Add header for field key handling repeats."""
100 prev = self.dict.get(key)
101 if prev is None:
102 self.dict[key] = value
103 else:
104 combined = ", ".join((prev, value))
105 self.dict[key] = combined
106
107 def addcontinue(self, key, more):
108 """Add more field data from a continuation line."""
109 prev = self.dict[key]
110 self.dict[key] = prev + "\n " + more
111
112 def readheaders(self):
113 """Read header lines.
114
115 Read header lines up to the entirely blank line that terminates them.
116 The (normally blank) line that ends the headers is skipped, but not
117 included in the returned list. If a non-header line ends the headers,
118 (which is an error), an attempt is made to backspace over it; it is
119 never included in the returned list.
120
121 The variable self.status is set to the empty string if all went well,
122 otherwise it is an error message. The variable self.headers is a
123 completely uninterpreted list of lines contained in the header (so
124 printing them will reproduce the header exactly as it appears in the
125 file).
126
127 If multiple header fields with the same name occur, they are combined
128 according to the rules in RFC 2616 sec 4.2:
129
130 Appending each subsequent field-value to the first, each separated
131 by a comma. The order in which header fields with the same field-name
132 are received is significant to the interpretation of the combined
133 field value.
134 """
135 # XXX The implementation overrides the readheaders() method of
136 # rfc822.Message. The base class design isn't amenable to
137 # customized behavior here so the method here is a copy of the
138 # base class code with a few small changes.
139
140 self.dict = {}
141 self.unixfrom = ''
142 self.headers = list = []
143 self.status = ''
144 headerseen = ""
145 firstline = 1
146 startofline = unread = tell = None
147 if hasattr(self.fp, 'unread'):
148 unread = self.fp.unread
149 elif self.seekable:
150 tell = self.fp.tell
151 while 1:
152 if tell:
153 try:
154 startofline = tell()
155 except IOError:
156 startofline = tell = None
157 self.seekable = 0
158 line = self.fp.readline()
159 if not line:
160 self.status = 'EOF in headers'
161 break
162 # Skip unix From name time lines
163 if firstline and line.startswith('From '):
164 self.unixfrom = self.unixfrom + line
165 continue
166 firstline = 0
167 if headerseen and line[0] in ' \t':
168 # XXX Not sure if continuation lines are handled properly
169 # for http and/or for repeating headers
170 # It's a continuation line.
171 list.append(line)
172 x = self.dict[headerseen] + "\n " + line.strip()
173 self.addcontinue(headerseen, line.strip())
174 continue
175 elif self.iscomment(line):
176 # It's a comment. Ignore it.
177 continue
178 elif self.islast(line):
179 # Note! No pushback here! The delimiter line gets eaten.
180 break
181 headerseen = self.isheader(line)
182 if headerseen:
183 # It's a legal header line, save it.
184 list.append(line)
185 self.addheader(headerseen, line[len(headerseen)+1:].strip())
186 continue
187 else:
188 # It's not a header line; throw it back and stop here.
189 if not self.dict:
190 self.status = 'No headers'
191 else:
192 self.status = 'Non-header line where header expected'
193 # Try to undo the read.
194 if unread:
195 unread(line)
196 elif tell:
197 self.fp.seek(startofline)
198 else:
199 self.status = self.status + '; bad seek'
200 break
201
Greg Stein5e0fa402000-06-26 08:28:01 +0000202
203class HTTPResponse:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000204
205 # strict: If true, raise BadStatusLine if the status line can't be
206 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
207 # false because it prvents clients from talking to HTTP/0.9
208 # servers. Note that a response with a sufficiently corrupted
209 # status line will look like an HTTP/0.9 response.
210
211 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
212
213 def __init__(self, sock, debuglevel=0, strict=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000214 self.fp = sock.makefile('rb', 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000215 self.debuglevel = debuglevel
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000216 self.strict = strict
Greg Stein5e0fa402000-06-26 08:28:01 +0000217
Greg Steindd6eefb2000-07-18 09:09:48 +0000218 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000219
Greg Steindd6eefb2000-07-18 09:09:48 +0000220 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000221 self.version = _UNKNOWN # HTTP-Version
222 self.status = _UNKNOWN # Status-Code
223 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000224
Tim Peters07e99cb2001-01-14 23:47:14 +0000225 self.chunked = _UNKNOWN # is "chunked" being used?
226 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
227 self.length = _UNKNOWN # number of bytes left in response
228 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000229
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000230 def _read_status(self):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000231 # Initialize with Simple-Response defaults
Greg Stein5e0fa402000-06-26 08:28:01 +0000232 line = self.fp.readline()
Jeremy Hylton30f86742000-09-18 22:50:38 +0000233 if self.debuglevel > 0:
234 print "reply:", repr(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000235 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000236 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000237 except ValueError:
238 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000239 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000240 reason = ""
241 except ValueError:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000242 # empty version will cause next test to fail and status
243 # will be treated as 0.9 response.
244 version = ""
245 if not version.startswith('HTTP/'):
246 if self.strict:
247 self.close()
248 raise BadStatusLine(line)
249 else:
250 # assume it's a Simple-Response from an 0.9 server
251 self.fp = LineAndFileWrapper(line, self.fp)
252 return "HTTP/0.9", 200, ""
Greg Stein5e0fa402000-06-26 08:28:01 +0000253
Jeremy Hylton23d40472001-04-13 14:57:08 +0000254 # The status code is a three-digit number
255 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000256 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000257 if status < 100 or status > 999:
258 raise BadStatusLine(line)
259 except ValueError:
260 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000261 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000262
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000263 def _begin(self):
264 if self.msg is not None:
265 # we've already started reading the response
266 return
267
268 # read until we get a non-100 response
269 while 1:
270 version, status, reason = self._read_status()
271 if status != 100:
272 break
273 # skip the header from the 100 response
274 while 1:
275 skip = self.fp.readline().strip()
276 if not skip:
277 break
278 if self.debuglevel > 0:
279 print "header:", skip
280
281 self.status = status
282 self.reason = reason.strip()
Greg Steindd6eefb2000-07-18 09:09:48 +0000283 if version == 'HTTP/1.0':
284 self.version = 10
Jeremy Hylton110941a2000-10-12 19:58:36 +0000285 elif version.startswith('HTTP/1.'):
Tim Peters07e99cb2001-01-14 23:47:14 +0000286 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hylton110941a2000-10-12 19:58:36 +0000287 elif version == 'HTTP/0.9':
288 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000289 else:
290 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000291
Jeremy Hylton110941a2000-10-12 19:58:36 +0000292 if self.version == 9:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000293 self.chunked = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000294 self.will_close = 1
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000295 self.msg = HTTPMessage(StringIO())
Jeremy Hylton110941a2000-10-12 19:58:36 +0000296 return
297
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000298 self.msg = HTTPMessage(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000299 if self.debuglevel > 0:
300 for hdr in self.msg.headers:
301 print "header:", hdr,
Greg Stein5e0fa402000-06-26 08:28:01 +0000302
Greg Steindd6eefb2000-07-18 09:09:48 +0000303 # don't let the msg keep an fp
304 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000305
Greg Steindd6eefb2000-07-18 09:09:48 +0000306 # are we using the chunked-style of transfer encoding?
307 tr_enc = self.msg.getheader('transfer-encoding')
308 if tr_enc:
Guido van Rossum34735a62000-12-15 15:09:42 +0000309 if tr_enc.lower() != 'chunked':
Greg Steindd6eefb2000-07-18 09:09:48 +0000310 raise UnknownTransferEncoding()
311 self.chunked = 1
312 self.chunk_left = None
313 else:
314 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000315
Greg Steindd6eefb2000-07-18 09:09:48 +0000316 # will the connection close at the end of the response?
317 conn = self.msg.getheader('connection')
318 if conn:
Guido van Rossum34735a62000-12-15 15:09:42 +0000319 conn = conn.lower()
Greg Steindd6eefb2000-07-18 09:09:48 +0000320 # a "Connection: close" will always close the connection. if we
321 # don't see that and this is not HTTP/1.1, then the connection will
322 # close unless we see a Keep-Alive header.
Guido van Rossum34735a62000-12-15 15:09:42 +0000323 self.will_close = conn.find('close') != -1 or \
Greg Steindd6eefb2000-07-18 09:09:48 +0000324 ( self.version != 11 and \
325 not self.msg.getheader('keep-alive') )
326 else:
327 # for HTTP/1.1, the connection will always remain open
328 # otherwise, it will remain open IFF we see a Keep-Alive header
329 self.will_close = self.version != 11 and \
330 not self.msg.getheader('keep-alive')
Greg Stein5e0fa402000-06-26 08:28:01 +0000331
Greg Steindd6eefb2000-07-18 09:09:48 +0000332 # do we have a Content-Length?
333 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
334 length = self.msg.getheader('content-length')
335 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000336 try:
337 self.length = int(length)
338 except ValueError:
339 self.length = None
Greg Steindd6eefb2000-07-18 09:09:48 +0000340 else:
341 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000342
Greg Steindd6eefb2000-07-18 09:09:48 +0000343 # does the body have a fixed length? (of zero)
Tim Peters07e99cb2001-01-14 23:47:14 +0000344 if (status == 204 or # No Content
345 status == 304 or # Not Modified
346 100 <= status < 200): # 1xx codes
Greg Steindd6eefb2000-07-18 09:09:48 +0000347 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000348
Greg Steindd6eefb2000-07-18 09:09:48 +0000349 # if the connection remains open, and we aren't using chunked, and
350 # a content-length was not provided, then assume that the connection
351 # WILL close.
352 if not self.will_close and \
353 not self.chunked and \
354 self.length is None:
355 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000356
Greg Steindd6eefb2000-07-18 09:09:48 +0000357 def close(self):
358 if self.fp:
359 self.fp.close()
360 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000361
Greg Steindd6eefb2000-07-18 09:09:48 +0000362 def isclosed(self):
363 # NOTE: it is possible that we will not ever call self.close(). This
364 # case occurs when will_close is TRUE, length is None, and we
365 # read up to the last byte, but NOT past it.
366 #
367 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
368 # called, meaning self.isclosed() is meaningful.
369 return self.fp is None
370
371 def read(self, amt=None):
372 if self.fp is None:
373 return ''
374
375 if self.chunked:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000376 assert self.chunked != _UNKNOWN
Greg Steindd6eefb2000-07-18 09:09:48 +0000377 chunk_left = self.chunk_left
378 value = ''
379 while 1:
380 if chunk_left is None:
381 line = self.fp.readline()
Guido van Rossum34735a62000-12-15 15:09:42 +0000382 i = line.find(';')
Greg Steindd6eefb2000-07-18 09:09:48 +0000383 if i >= 0:
Tim Peters07e99cb2001-01-14 23:47:14 +0000384 line = line[:i] # strip chunk-extensions
Guido van Rossum34735a62000-12-15 15:09:42 +0000385 chunk_left = int(line, 16)
Greg Steindd6eefb2000-07-18 09:09:48 +0000386 if chunk_left == 0:
387 break
388 if amt is None:
389 value = value + self._safe_read(chunk_left)
390 elif amt < chunk_left:
391 value = value + self._safe_read(amt)
392 self.chunk_left = chunk_left - amt
393 return value
394 elif amt == chunk_left:
395 value = value + self._safe_read(amt)
Tim Peters07e99cb2001-01-14 23:47:14 +0000396 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000397 self.chunk_left = None
398 return value
399 else:
400 value = value + self._safe_read(chunk_left)
401 amt = amt - chunk_left
402
403 # we read the whole chunk, get another
Tim Peters07e99cb2001-01-14 23:47:14 +0000404 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000405 chunk_left = None
406
407 # read and discard trailer up to the CRLF terminator
408 ### note: we shouldn't have any trailers!
409 while 1:
410 line = self.fp.readline()
411 if line == '\r\n':
412 break
413
414 # we read everything; close the "file"
415 self.close()
416
417 return value
418
419 elif amt is None:
420 # unbounded read
421 if self.will_close:
422 s = self.fp.read()
423 else:
424 s = self._safe_read(self.length)
Tim Peters07e99cb2001-01-14 23:47:14 +0000425 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000426 return s
427
428 if self.length is not None:
429 if amt > self.length:
430 # clip the read to the "end of response"
431 amt = self.length
432 self.length = self.length - amt
433
434 # we do not use _safe_read() here because this may be a .will_close
435 # connection, and the user is reading more bytes than will be provided
436 # (for example, reading in 1k chunks)
437 s = self.fp.read(amt)
438
Greg Steindd6eefb2000-07-18 09:09:48 +0000439 return s
440
441 def _safe_read(self, amt):
442 """Read the number of bytes requested, compensating for partial reads.
443
444 Normally, we have a blocking socket, but a read() can be interrupted
445 by a signal (resulting in a partial read).
446
447 Note that we cannot distinguish between EOF and an interrupt when zero
448 bytes have been read. IncompleteRead() will be raised in this
449 situation.
450
451 This function should be used when <amt> bytes "should" be present for
452 reading. If the bytes are truly not available (due to EOF), then the
453 IncompleteRead exception can be used to detect the problem.
454 """
455 s = ''
456 while amt > 0:
457 chunk = self.fp.read(amt)
458 if not chunk:
459 raise IncompleteRead(s)
460 s = s + chunk
461 amt = amt - len(chunk)
462 return s
463
464 def getheader(self, name, default=None):
465 if self.msg is None:
466 raise ResponseNotReady()
467 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000468
469
470class HTTPConnection:
471
Greg Steindd6eefb2000-07-18 09:09:48 +0000472 _http_vsn = 11
473 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000474
Greg Steindd6eefb2000-07-18 09:09:48 +0000475 response_class = HTTPResponse
476 default_port = HTTP_PORT
477 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000478 debuglevel = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000479 strict = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000480
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000481 def __init__(self, host, port=None, strict=None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000482 self.sock = None
483 self.__response = None
484 self.__state = _CS_IDLE
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000485
Greg Steindd6eefb2000-07-18 09:09:48 +0000486 self._set_hostport(host, port)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000487 if strict is not None:
488 self.strict = strict
Greg Stein5e0fa402000-06-26 08:28:01 +0000489
Greg Steindd6eefb2000-07-18 09:09:48 +0000490 def _set_hostport(self, host, port):
491 if port is None:
Guido van Rossum34735a62000-12-15 15:09:42 +0000492 i = host.find(':')
Greg Steindd6eefb2000-07-18 09:09:48 +0000493 if i >= 0:
Skip Montanaro9d389972002-03-24 16:53:50 +0000494 try:
495 port = int(host[i+1:])
496 except ValueError:
Jeremy Hyltonfbd79942002-07-02 20:19:08 +0000497 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000498 host = host[:i]
499 else:
500 port = self.default_port
501 self.host = host
502 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000503
Jeremy Hylton30f86742000-09-18 22:50:38 +0000504 def set_debuglevel(self, level):
505 self.debuglevel = level
506
Greg Steindd6eefb2000-07-18 09:09:48 +0000507 def connect(self):
508 """Connect to the host and port specified in __init__."""
Martin v. Löwis2ad25692001-07-31 08:40:21 +0000509 msg = "getaddrinfo returns an empty list"
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000510 for res in socket.getaddrinfo(self.host, self.port, 0,
511 socket.SOCK_STREAM):
Martin v. Löwis4eb59402001-07-26 13:37:33 +0000512 af, socktype, proto, canonname, sa = res
513 try:
514 self.sock = socket.socket(af, socktype, proto)
515 if self.debuglevel > 0:
516 print "connect: (%s, %s)" % (self.host, self.port)
517 self.sock.connect(sa)
518 except socket.error, msg:
519 if self.debuglevel > 0:
520 print 'connect fail:', (self.host, self.port)
Martin v. Löwis322c0d12001-10-07 08:53:32 +0000521 if self.sock:
522 self.sock.close()
Martin v. Löwis4eb59402001-07-26 13:37:33 +0000523 self.sock = None
524 continue
525 break
526 if not self.sock:
527 raise socket.error, msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000528
Greg Steindd6eefb2000-07-18 09:09:48 +0000529 def close(self):
530 """Close the connection to the HTTP server."""
531 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000532 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000533 self.sock = None
534 if self.__response:
535 self.__response.close()
536 self.__response = None
537 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000538
Greg Steindd6eefb2000-07-18 09:09:48 +0000539 def send(self, str):
540 """Send `str' to the server."""
541 if self.sock is None:
542 if self.auto_open:
543 self.connect()
544 else:
545 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000546
Greg Steindd6eefb2000-07-18 09:09:48 +0000547 # send the data to the server. if we get a broken pipe, then close
548 # the socket. we want to reconnect when somebody tries to send again.
549 #
550 # NOTE: we DO propagate the error, though, because we cannot simply
551 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000552 if self.debuglevel > 0:
553 print "send:", repr(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000554 try:
Martin v. Löwise12454f2002-02-16 23:06:19 +0000555 self.sock.sendall(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000556 except socket.error, v:
Tim Peters07e99cb2001-01-14 23:47:14 +0000557 if v[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000558 self.close()
559 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000560
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000561 def putrequest(self, method, url, skip_host=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000562 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000563
Greg Steindd6eefb2000-07-18 09:09:48 +0000564 `method' specifies an HTTP request method, e.g. 'GET'.
565 `url' specifies the object being requested, e.g. '/index.html'.
566 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000567
Greg Steindd6eefb2000-07-18 09:09:48 +0000568 # check if a prior response has been completed
569 if self.__response and self.__response.isclosed():
570 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000571
Greg Steindd6eefb2000-07-18 09:09:48 +0000572 #
573 # in certain cases, we cannot issue another request on this connection.
574 # this occurs when:
575 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
576 # 2) a response to a previous request has signalled that it is going
577 # to close the connection upon completion.
578 # 3) the headers for the previous response have not been read, thus
579 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
580 #
581 # if there is no prior response, then we can request at will.
582 #
583 # if point (2) is true, then we will have passed the socket to the
584 # response (effectively meaning, "there is no prior response"), and
585 # will open a new one when a new request is made.
586 #
587 # Note: if a prior response exists, then we *can* start a new request.
588 # We are not allowed to begin fetching the response to this new
589 # request, however, until that prior response is complete.
590 #
591 if self.__state == _CS_IDLE:
592 self.__state = _CS_REQ_STARTED
593 else:
594 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000595
Greg Steindd6eefb2000-07-18 09:09:48 +0000596 if not url:
597 url = '/'
598 str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000599
Greg Steindd6eefb2000-07-18 09:09:48 +0000600 try:
601 self.send(str)
602 except socket.error, v:
603 # trap 'Broken pipe' if we're allowed to automatically reconnect
604 if v[0] != 32 or not self.auto_open:
605 raise
606 # try one more time (the socket was closed; this will reopen)
607 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000608
Greg Steindd6eefb2000-07-18 09:09:48 +0000609 if self._http_vsn == 11:
610 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000611
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000612 if not skip_host:
613 # this header is issued *only* for HTTP/1.1
614 # connections. more specifically, this means it is
615 # only issued when the client uses the new
616 # HTTPConnection() class. backwards-compat clients
617 # will be using HTTP/1.0 and those clients may be
618 # issuing this header themselves. we should NOT issue
619 # it twice; some web servers (such as Apache) barf
620 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000621
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000622 # If we need a non-standard port,include it in the
623 # header. If the request is going through a proxy,
624 # but the host of the actual URL, not the host of the
625 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +0000626
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000627 netloc = ''
628 if url.startswith('http'):
629 nil, netloc, nil, nil, nil = urlsplit(url)
630
631 if netloc:
632 self.putheader('Host', netloc)
633 elif self.port == HTTP_PORT:
634 self.putheader('Host', self.host)
635 else:
636 self.putheader('Host', "%s:%s" % (self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000637
Greg Steindd6eefb2000-07-18 09:09:48 +0000638 # note: we are assuming that clients will not attempt to set these
639 # headers since *this* library must deal with the
640 # consequences. this also means that when the supporting
641 # libraries are updated to recognize other forms, then this
642 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000643
Greg Steindd6eefb2000-07-18 09:09:48 +0000644 # we only want a Content-Encoding of "identity" since we don't
645 # support encodings such as x-gzip or x-deflate.
646 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000647
Greg Steindd6eefb2000-07-18 09:09:48 +0000648 # we can accept "chunked" Transfer-Encodings, but no others
649 # NOTE: no TE header implies *only* "chunked"
650 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000651
Greg Steindd6eefb2000-07-18 09:09:48 +0000652 # if TE is supplied in the header, then it must appear in a
653 # Connection header.
654 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000655
Greg Steindd6eefb2000-07-18 09:09:48 +0000656 else:
657 # For HTTP/1.0, the server will assume "not chunked"
658 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000659
Greg Steindd6eefb2000-07-18 09:09:48 +0000660 def putheader(self, header, value):
661 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000662
Greg Steindd6eefb2000-07-18 09:09:48 +0000663 For example: h.putheader('Accept', 'text/html')
664 """
665 if self.__state != _CS_REQ_STARTED:
666 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000667
Greg Steindd6eefb2000-07-18 09:09:48 +0000668 str = '%s: %s\r\n' % (header, value)
669 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000670
Greg Steindd6eefb2000-07-18 09:09:48 +0000671 def endheaders(self):
672 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000673
Greg Steindd6eefb2000-07-18 09:09:48 +0000674 if self.__state == _CS_REQ_STARTED:
675 self.__state = _CS_REQ_SENT
676 else:
677 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000678
Greg Steindd6eefb2000-07-18 09:09:48 +0000679 self.send('\r\n')
Greg Stein5e0fa402000-06-26 08:28:01 +0000680
Greg Steindd6eefb2000-07-18 09:09:48 +0000681 def request(self, method, url, body=None, headers={}):
682 """Send a complete request to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000683
Greg Steindd6eefb2000-07-18 09:09:48 +0000684 try:
685 self._send_request(method, url, body, headers)
686 except socket.error, v:
687 # trap 'Broken pipe' if we're allowed to automatically reconnect
688 if v[0] != 32 or not self.auto_open:
689 raise
690 # try one more time
691 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000692
Greg Steindd6eefb2000-07-18 09:09:48 +0000693 def _send_request(self, method, url, body, headers):
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000694 # If headers already contains a host header, then define the
695 # optional skip_host argument to putrequest(). The check is
696 # harder because field names are case insensitive.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000697 if 'Host' in (headers
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000698 or [k for k in headers.iterkeys() if k.lower() == "host"]):
699 self.putrequest(method, url, skip_host=1)
700 else:
701 self.putrequest(method, url)
Greg Stein5e0fa402000-06-26 08:28:01 +0000702
Greg Steindd6eefb2000-07-18 09:09:48 +0000703 if body:
704 self.putheader('Content-Length', str(len(body)))
705 for hdr, value in headers.items():
706 self.putheader(hdr, value)
707 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000708
Greg Steindd6eefb2000-07-18 09:09:48 +0000709 if body:
710 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000711
Greg Steindd6eefb2000-07-18 09:09:48 +0000712 def getresponse(self):
713 "Get the response from the server."
Greg Stein5e0fa402000-06-26 08:28:01 +0000714
Greg Steindd6eefb2000-07-18 09:09:48 +0000715 # check if a prior response has been completed
716 if self.__response and self.__response.isclosed():
717 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000718
Greg Steindd6eefb2000-07-18 09:09:48 +0000719 #
720 # if a prior response exists, then it must be completed (otherwise, we
721 # cannot read this response's header to determine the connection-close
722 # behavior)
723 #
724 # note: if a prior response existed, but was connection-close, then the
725 # socket and response were made independent of this HTTPConnection
726 # object since a new request requires that we open a whole new
727 # connection
728 #
729 # this means the prior response had one of two states:
730 # 1) will_close: this connection was reset and the prior socket and
731 # response operate independently
732 # 2) persistent: the response was retained and we await its
733 # isclosed() status to become true.
734 #
735 if self.__state != _CS_REQ_SENT or self.__response:
736 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000737
Jeremy Hylton30f86742000-09-18 22:50:38 +0000738 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000739 response = self.response_class(self.sock, self.debuglevel,
740 strict=self.strict)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000741 else:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000742 response = self.response_class(self.sock, strict=self.strict)
Greg Stein5e0fa402000-06-26 08:28:01 +0000743
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000744 response._begin()
745 assert response.will_close != _UNKNOWN
Greg Steindd6eefb2000-07-18 09:09:48 +0000746 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000747
Greg Steindd6eefb2000-07-18 09:09:48 +0000748 if response.will_close:
749 # this effectively passes the connection to the response
750 self.close()
751 else:
752 # remember this, so we can tell when it is complete
753 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000754
Greg Steindd6eefb2000-07-18 09:09:48 +0000755 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000756
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000757class SSLFile:
758 """File-like object wrapping an SSL socket."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000759
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000760 BUFSIZE = 8192
761
762 def __init__(self, sock, ssl, bufsize=None):
763 self._sock = sock
764 self._ssl = ssl
765 self._buf = ''
766 self._bufsize = bufsize or self.__class__.BUFSIZE
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000767
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000768 def _read(self):
769 buf = ''
770 # put in a loop so that we retry on transient errors
Greg Steindd6eefb2000-07-18 09:09:48 +0000771 while 1:
772 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000773 buf = self._ssl.read(self._bufsize)
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000774 except socket.sslerror, err:
775 if (err[0] == socket.SSL_ERROR_WANT_READ
Neal Norwitz22c5d772002-02-11 17:59:51 +0000776 or err[0] == socket.SSL_ERROR_WANT_WRITE):
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000777 continue
Martin v. Löwis6af3e2d2002-04-20 07:47:40 +0000778 if (err[0] == socket.SSL_ERROR_ZERO_RETURN
779 or err[0] == socket.SSL_ERROR_EOF):
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000780 break
781 raise
782 except socket.error, err:
Tim Petersf3623f32001-10-11 18:15:51 +0000783 if err[0] == errno.EINTR:
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000784 continue
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000785 if err[0] == errno.EBADF:
786 # XXX socket was closed?
787 break
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000788 raise
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000789 else:
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000790 break
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000791 return buf
792
793 def read(self, size=None):
794 L = [self._buf]
795 avail = len(self._buf)
796 while size is None or avail < size:
797 s = self._read()
798 if s == '':
799 break
800 L.append(s)
801 avail += len(s)
802 all = "".join(L)
803 if size is None:
804 self._buf = ''
805 return all
806 else:
807 self._buf = all[size:]
808 return all[:size]
809
810 def readline(self):
811 L = [self._buf]
812 self._buf = ''
813 while 1:
814 i = L[-1].find("\n")
815 if i >= 0:
816 break
817 s = self._read()
818 if s == '':
819 break
820 L.append(s)
821 if i == -1:
822 # loop exited because there is no more data
823 return "".join(L)
824 else:
825 all = "".join(L)
826 # XXX could do enough bookkeeping not to do a 2nd search
827 i = all.find("\n") + 1
828 line = all[:i]
829 self._buf = all[i:]
830 return line
831
832 def close(self):
833 self._sock.close()
834
835class FakeSocket:
836 def __init__(self, sock, ssl):
837 self.__sock = sock
838 self.__ssl = ssl
839
840 def makefile(self, mode, bufsize=None):
841 if mode != 'r' and mode != 'rb':
842 raise UnimplementedFileMode()
843 return SSLFile(self.__sock, self.__ssl, bufsize)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000844
Greg Steindd6eefb2000-07-18 09:09:48 +0000845 def send(self, stuff, flags = 0):
846 return self.__ssl.write(stuff)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000847
Andrew M. Kuchlinga3c0b932002-03-18 22:51:48 +0000848 def sendall(self, stuff, flags = 0):
849 return self.__ssl.write(stuff)
850
Greg Steindd6eefb2000-07-18 09:09:48 +0000851 def recv(self, len = 1024, flags = 0):
852 return self.__ssl.read(len)
Guido van Rossum23acc951994-02-21 16:36:04 +0000853
Greg Steindd6eefb2000-07-18 09:09:48 +0000854 def __getattr__(self, attr):
855 return getattr(self.__sock, attr)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000856
Guido van Rossum23acc951994-02-21 16:36:04 +0000857
Greg Stein5e0fa402000-06-26 08:28:01 +0000858class HTTPSConnection(HTTPConnection):
Greg Steindd6eefb2000-07-18 09:09:48 +0000859 "This class allows communication via SSL."
Greg Stein5e0fa402000-06-26 08:28:01 +0000860
Greg Steindd6eefb2000-07-18 09:09:48 +0000861 default_port = HTTPS_PORT
Greg Stein5e0fa402000-06-26 08:28:01 +0000862
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000863 def __init__(self, host, port=None, key_file=None, cert_file=None,
864 strict=None):
865 HTTPConnection.__init__(self, host, port, strict)
Jeremy Hylton7c75c992002-06-28 23:38:14 +0000866 self.key_file = key_file
867 self.cert_file = cert_file
Greg Stein5e0fa402000-06-26 08:28:01 +0000868
Greg Steindd6eefb2000-07-18 09:09:48 +0000869 def connect(self):
870 "Connect to a host on a given (SSL) port."
Greg Stein5e0fa402000-06-26 08:28:01 +0000871
Greg Steindd6eefb2000-07-18 09:09:48 +0000872 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
873 sock.connect((self.host, self.port))
Guido van Rossum0aee7222000-12-11 20:32:20 +0000874 realsock = sock
875 if hasattr(sock, "_sock"):
876 realsock = sock._sock
877 ssl = socket.ssl(realsock, self.key_file, self.cert_file)
Greg Steindd6eefb2000-07-18 09:09:48 +0000878 self.sock = FakeSocket(sock, ssl)
Greg Stein5e0fa402000-06-26 08:28:01 +0000879
880
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000881class HTTP:
Greg Steindd6eefb2000-07-18 09:09:48 +0000882 "Compatibility class with httplib.py from 1.5."
Greg Stein5e0fa402000-06-26 08:28:01 +0000883
Greg Steindd6eefb2000-07-18 09:09:48 +0000884 _http_vsn = 10
885 _http_vsn_str = 'HTTP/1.0'
Greg Stein5e0fa402000-06-26 08:28:01 +0000886
Greg Steindd6eefb2000-07-18 09:09:48 +0000887 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000888
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000889 _connection_class = HTTPConnection
890
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000891 def __init__(self, host='', port=None, strict=None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000892 "Provide a default host, since the superclass requires one."
Greg Stein5e0fa402000-06-26 08:28:01 +0000893
Greg Steindd6eefb2000-07-18 09:09:48 +0000894 # some joker passed 0 explicitly, meaning default port
895 if port == 0:
896 port = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000897
Greg Steindd6eefb2000-07-18 09:09:48 +0000898 # Note that we may pass an empty string as the host; this will throw
899 # an error when we attempt to connect. Presumably, the client code
900 # will call connect before then, with a proper host.
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000901 self._setup(self._connection_class(host, port, strict))
Greg Stein5e0fa402000-06-26 08:28:01 +0000902
Greg Stein81937a42001-08-18 09:20:23 +0000903 def _setup(self, conn):
904 self._conn = conn
905
906 # set up delegation to flesh out interface
907 self.send = conn.send
908 self.putrequest = conn.putrequest
909 self.endheaders = conn.endheaders
910 self.set_debuglevel = conn.set_debuglevel
911
912 conn._http_vsn = self._http_vsn
913 conn._http_vsn_str = self._http_vsn_str
Greg Stein5e0fa402000-06-26 08:28:01 +0000914
Greg Steindd6eefb2000-07-18 09:09:48 +0000915 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000916
Greg Steindd6eefb2000-07-18 09:09:48 +0000917 def connect(self, host=None, port=None):
918 "Accept arguments to set the host/port, since the superclass doesn't."
Greg Stein5e0fa402000-06-26 08:28:01 +0000919
Greg Steindd6eefb2000-07-18 09:09:48 +0000920 if host is not None:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000921 self._conn._set_hostport(host, port)
922 self._conn.connect()
Greg Stein5e0fa402000-06-26 08:28:01 +0000923
Greg Steindd6eefb2000-07-18 09:09:48 +0000924 def getfile(self):
925 "Provide a getfile, since the superclass' does not use this concept."
926 return self.file
Greg Stein5e0fa402000-06-26 08:28:01 +0000927
Greg Steindd6eefb2000-07-18 09:09:48 +0000928 def putheader(self, header, *values):
929 "The superclass allows only one value argument."
Guido van Rossum34735a62000-12-15 15:09:42 +0000930 self._conn.putheader(header, '\r\n\t'.join(values))
Greg Stein5e0fa402000-06-26 08:28:01 +0000931
Greg Steindd6eefb2000-07-18 09:09:48 +0000932 def getreply(self):
933 """Compat definition since superclass does not define it.
Greg Stein5e0fa402000-06-26 08:28:01 +0000934
Greg Steindd6eefb2000-07-18 09:09:48 +0000935 Returns a tuple consisting of:
936 - server status code (e.g. '200' if all goes well)
937 - server "reason" corresponding to status code
938 - any RFC822 headers in the response from the server
939 """
940 try:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000941 response = self._conn.getresponse()
Greg Steindd6eefb2000-07-18 09:09:48 +0000942 except BadStatusLine, e:
943 ### hmm. if getresponse() ever closes the socket on a bad request,
944 ### then we are going to have problems with self.sock
Greg Stein5e0fa402000-06-26 08:28:01 +0000945
Greg Steindd6eefb2000-07-18 09:09:48 +0000946 ### should we keep this behavior? do people use it?
947 # keep the socket open (as a file), and return it
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000948 self.file = self._conn.sock.makefile('rb', 0)
Greg Stein5e0fa402000-06-26 08:28:01 +0000949
Greg Steindd6eefb2000-07-18 09:09:48 +0000950 # close our socket -- we want to restart after any protocol error
951 self.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000952
Greg Steindd6eefb2000-07-18 09:09:48 +0000953 self.headers = None
954 return -1, e.line, None
Greg Stein5e0fa402000-06-26 08:28:01 +0000955
Greg Steindd6eefb2000-07-18 09:09:48 +0000956 self.headers = response.msg
957 self.file = response.fp
958 return response.status, response.reason, response.msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000959
Greg Steindd6eefb2000-07-18 09:09:48 +0000960 def close(self):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000961 self._conn.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000962
Greg Steindd6eefb2000-07-18 09:09:48 +0000963 # note that self.file == response.fp, which gets closed by the
964 # superclass. just clear the object ref here.
965 ### hmm. messy. if status==-1, then self.file is owned by us.
966 ### well... we aren't explicitly closing, but losing this ref will
967 ### do it
968 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000969
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000970if hasattr(socket, 'ssl'):
971 class HTTPS(HTTP):
972 """Compatibility with 1.5 httplib interface
973
974 Python 1.5.2 did not have an HTTPS class, but it defined an
975 interface for sending http requests that is also useful for
Tim Peters5ceadc82001-01-13 19:16:21 +0000976 https.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000977 """
978
Martin v. Löwisd7bf9742000-09-21 22:09:47 +0000979 _connection_class = HTTPSConnection
Tim Peters5ceadc82001-01-13 19:16:21 +0000980
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000981 def __init__(self, host='', port=None, key_file=None, cert_file=None,
982 strict=None):
Greg Stein81937a42001-08-18 09:20:23 +0000983 # provide a default host, pass the X509 cert info
984
985 # urf. compensate for bad input.
986 if port == 0:
987 port = None
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000988 self._setup(self._connection_class(host, port, key_file,
989 cert_file, strict))
Greg Stein81937a42001-08-18 09:20:23 +0000990
991 # we never actually use these for anything, but we keep them
992 # here for compatibility with post-1.5.2 CVS.
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000993 self.key_file = key_file
994 self.cert_file = cert_file
Greg Stein81937a42001-08-18 09:20:23 +0000995
Greg Stein5e0fa402000-06-26 08:28:01 +0000996
997class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +0000998 # Subclasses that define an __init__ must call Exception.__init__
999 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +00001000 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001001
1002class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001003 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001004
Skip Montanaro9d389972002-03-24 16:53:50 +00001005class InvalidURL(HTTPException):
1006 pass
1007
Greg Stein5e0fa402000-06-26 08:28:01 +00001008class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001009 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001010 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001011 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001012
1013class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001014 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001015
Greg Stein5e0fa402000-06-26 08:28:01 +00001016class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001017 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001018
1019class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001020 def __init__(self, partial):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001021 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001022 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +00001023
1024class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001025 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001026
1027class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001028 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001029
1030class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001031 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001032
1033class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001034 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001035
1036class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001037 def __init__(self, line):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001038 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001039 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001040
1041# for backwards compatibility
1042error = HTTPException
1043
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001044class LineAndFileWrapper:
1045 """A limited file-like object for HTTP/0.9 responses."""
1046
1047 # The status-line parsing code calls readline(), which normally
1048 # get the HTTP status line. For a 0.9 response, however, this is
1049 # actually the first line of the body! Clients need to get a
1050 # readable file object that contains that line.
1051
1052 def __init__(self, line, file):
1053 self._line = line
1054 self._file = file
1055 self._line_consumed = 0
1056 self._line_offset = 0
1057 self._line_left = len(line)
1058
1059 def __getattr__(self, attr):
1060 return getattr(self._file, attr)
1061
1062 def _done(self):
1063 # called when the last byte is read from the line. After the
1064 # call, all read methods are delegated to the underlying file
1065 # obhect.
1066 self._line_consumed = 1
1067 self.read = self._file.read
1068 self.readline = self._file.readline
1069 self.readlines = self._file.readlines
1070
1071 def read(self, amt=None):
1072 assert not self._line_consumed and self._line_left
1073 if amt is None or amt > self._line_left:
1074 s = self._line[self._line_offset:]
1075 self._done()
1076 if amt is None:
1077 return s + self._file.read()
1078 else:
1079 return s + self._file.read(amt - len(s))
1080 else:
1081 assert amt <= self._line_left
1082 i = self._line_offset
1083 j = i + amt
1084 s = self._line[i:j]
1085 self._line_offset = j
1086 self._line_left -= amt
1087 if self._line_left == 0:
1088 self._done()
1089 return s
1090
1091 def readline(self):
1092 s = self._line[self._line_offset:]
1093 self._done()
1094 return s
1095
1096 def readlines(self, size=None):
1097 L = [self._line[self._line_offset:]]
1098 self._done()
1099 if size is None:
1100 return L + self._file.readlines()
1101 else:
1102 return L + self._file.readlines(size)
Greg Stein5e0fa402000-06-26 08:28:01 +00001103
1104#
1105# snarfed from httplib.py for now...
1106#
Guido van Rossum23acc951994-02-21 16:36:04 +00001107def test():
Guido van Rossum41999c11997-12-09 00:12:23 +00001108 """Test this module.
1109
1110 The test consists of retrieving and displaying the Python
1111 home page, along with the error code and error string returned
1112 by the www.python.org server.
Guido van Rossum41999c11997-12-09 00:12:23 +00001113 """
Greg Stein5e0fa402000-06-26 08:28:01 +00001114
Guido van Rossum41999c11997-12-09 00:12:23 +00001115 import sys
1116 import getopt
1117 opts, args = getopt.getopt(sys.argv[1:], 'd')
1118 dl = 0
1119 for o, a in opts:
1120 if o == '-d': dl = dl + 1
1121 host = 'www.python.org'
1122 selector = '/'
1123 if args[0:]: host = args[0]
1124 if args[1:]: selector = args[1]
1125 h = HTTP()
1126 h.set_debuglevel(dl)
1127 h.connect(host)
1128 h.putrequest('GET', selector)
1129 h.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +00001130 status, reason, headers = h.getreply()
1131 print 'status =', status
1132 print 'reason =', reason
Guido van Rossum41999c11997-12-09 00:12:23 +00001133 print
1134 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +00001135 for header in headers.headers: print header.strip()
Guido van Rossum41999c11997-12-09 00:12:23 +00001136 print
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001137 print "read", len(h.getfile().read())
Greg Stein5e0fa402000-06-26 08:28:01 +00001138
Jeremy Hylton8acf1e02002-03-08 19:35:51 +00001139 # minimal test that code to extract host from url works
1140 class HTTP11(HTTP):
1141 _http_vsn = 11
1142 _http_vsn_str = 'HTTP/1.1'
1143
1144 h = HTTP11('www.python.org')
1145 h.putrequest('GET', 'http://www.python.org/~jeremy/')
1146 h.endheaders()
1147 h.getreply()
1148 h.close()
1149
Greg Stein5e0fa402000-06-26 08:28:01 +00001150 if hasattr(socket, 'ssl'):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001151 host = 'sourceforge.net'
Jeremy Hylton42dd01a2001-02-01 23:35:20 +00001152 selector = '/projects/python'
Greg Steindd6eefb2000-07-18 09:09:48 +00001153 hs = HTTPS()
1154 hs.connect(host)
1155 hs.putrequest('GET', selector)
1156 hs.endheaders()
1157 status, reason, headers = hs.getreply()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001158 # XXX why does this give a 302 response?
Greg Steindd6eefb2000-07-18 09:09:48 +00001159 print 'status =', status
1160 print 'reason =', reason
1161 print
1162 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +00001163 for header in headers.headers: print header.strip()
Greg Steindd6eefb2000-07-18 09:09:48 +00001164 print
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001165 print "read", len(hs.getfile().read())
Guido van Rossum23acc951994-02-21 16:36:04 +00001166
Guido van Rossuma0dfc7a1995-09-07 19:28:19 +00001167
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001168 # Test a buggy server -- returns garbled status line.
1169 # http://www.yahoo.com/promotions/mom_com97/supermom.html
1170 c = HTTPConnection("promotions.yahoo.com")
1171 c.set_debuglevel(1)
1172 c.connect()
1173 c.request("GET", "/promotions/mom_com97/supermom.html")
1174 r = c.getresponse()
1175 print r.status, r.version
1176 lines = r.read().split("\n")
1177 print "\n".join(lines[:5])
1178
1179 c = HTTPConnection("promotions.yahoo.com", strict=1)
1180 c.set_debuglevel(1)
1181 c.connect()
1182 c.request("GET", "/promotions/mom_com97/supermom.html")
1183 try:
1184 r = c.getresponse()
1185 except BadStatusLine, err:
1186 print "strict mode failed as expected"
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001187 print err
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001188 else:
1189 print "XXX strict mode should have failed"
1190
1191 for strict in 0, 1:
1192 h = HTTP(strict=strict)
1193 h.connect("promotions.yahoo.com")
1194 h.putrequest('GET', "/promotions/mom_com97/supermom.html")
1195 h.endheaders()
1196 status, reason, headers = h.getreply()
1197 assert (strict and status == -1) or status == 200, (strict, status)
1198
Guido van Rossum23acc951994-02-21 16:36:04 +00001199if __name__ == '__main__':
Guido van Rossum41999c11997-12-09 00:12:23 +00001200 test()