blob: 1db930ea932e6756549ffc078c1186534a18caa0 [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Greg Stein5e0fa402000-06-26 08:28:01 +00006HTTPConnection go through a number of "states", which defines when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000069import errno
Guido van Rossum65ab98c1995-08-07 20:13:02 +000070import mimetools
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000071import socket
Jeremy Hylton8acf1e02002-03-08 19:35:51 +000072from urlparse import urlsplit
Guido van Rossum23acc951994-02-21 16:36:04 +000073
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000074try:
Greg Steindd6eefb2000-07-18 09:09:48 +000075 from cStringIO import StringIO
Greg Stein5e0fa402000-06-26 08:28:01 +000076except ImportError:
Greg Steindd6eefb2000-07-18 09:09:48 +000077 from StringIO import StringIO
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000078
Skip Montanaro951a8842001-06-01 16:25:38 +000079__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
80 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000081 "UnknownTransferEncoding", "UnimplementedFileMode",
82 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
83 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
84 "BadStatusLine", "error"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000085
Guido van Rossum23acc951994-02-21 16:36:04 +000086HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000087HTTPS_PORT = 443
88
Greg Stein5e0fa402000-06-26 08:28:01 +000089_UNKNOWN = 'UNKNOWN'
90
91# connection states
92_CS_IDLE = 'Idle'
93_CS_REQ_STARTED = 'Request-started'
94_CS_REQ_SENT = 'Request-sent'
95
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +000096class HTTPMessage(mimetools.Message):
97
98 def addheader(self, key, value):
99 """Add header for field key handling repeats."""
100 prev = self.dict.get(key)
101 if prev is None:
102 self.dict[key] = value
103 else:
104 combined = ", ".join((prev, value))
105 self.dict[key] = combined
106
107 def addcontinue(self, key, more):
108 """Add more field data from a continuation line."""
109 prev = self.dict[key]
110 self.dict[key] = prev + "\n " + more
111
112 def readheaders(self):
113 """Read header lines.
114
115 Read header lines up to the entirely blank line that terminates them.
116 The (normally blank) line that ends the headers is skipped, but not
117 included in the returned list. If a non-header line ends the headers,
118 (which is an error), an attempt is made to backspace over it; it is
119 never included in the returned list.
120
121 The variable self.status is set to the empty string if all went well,
122 otherwise it is an error message. The variable self.headers is a
123 completely uninterpreted list of lines contained in the header (so
124 printing them will reproduce the header exactly as it appears in the
125 file).
126
127 If multiple header fields with the same name occur, they are combined
128 according to the rules in RFC 2616 sec 4.2:
129
130 Appending each subsequent field-value to the first, each separated
131 by a comma. The order in which header fields with the same field-name
132 are received is significant to the interpretation of the combined
133 field value.
134 """
135 # XXX The implementation overrides the readheaders() method of
136 # rfc822.Message. The base class design isn't amenable to
137 # customized behavior here so the method here is a copy of the
138 # base class code with a few small changes.
139
140 self.dict = {}
141 self.unixfrom = ''
142 self.headers = list = []
143 self.status = ''
144 headerseen = ""
145 firstline = 1
146 startofline = unread = tell = None
147 if hasattr(self.fp, 'unread'):
148 unread = self.fp.unread
149 elif self.seekable:
150 tell = self.fp.tell
151 while 1:
152 if tell:
153 try:
154 startofline = tell()
155 except IOError:
156 startofline = tell = None
157 self.seekable = 0
158 line = self.fp.readline()
159 if not line:
160 self.status = 'EOF in headers'
161 break
162 # Skip unix From name time lines
163 if firstline and line.startswith('From '):
164 self.unixfrom = self.unixfrom + line
165 continue
166 firstline = 0
167 if headerseen and line[0] in ' \t':
168 # XXX Not sure if continuation lines are handled properly
169 # for http and/or for repeating headers
170 # It's a continuation line.
171 list.append(line)
172 x = self.dict[headerseen] + "\n " + line.strip()
173 self.addcontinue(headerseen, line.strip())
174 continue
175 elif self.iscomment(line):
176 # It's a comment. Ignore it.
177 continue
178 elif self.islast(line):
179 # Note! No pushback here! The delimiter line gets eaten.
180 break
181 headerseen = self.isheader(line)
182 if headerseen:
183 # It's a legal header line, save it.
184 list.append(line)
185 self.addheader(headerseen, line[len(headerseen)+1:].strip())
186 continue
187 else:
188 # It's not a header line; throw it back and stop here.
189 if not self.dict:
190 self.status = 'No headers'
191 else:
192 self.status = 'Non-header line where header expected'
193 # Try to undo the read.
194 if unread:
195 unread(line)
196 elif tell:
197 self.fp.seek(startofline)
198 else:
199 self.status = self.status + '; bad seek'
200 break
201
Greg Stein5e0fa402000-06-26 08:28:01 +0000202
203class HTTPResponse:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000204
205 # strict: If true, raise BadStatusLine if the status line can't be
206 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
207 # false because it prvents clients from talking to HTTP/0.9
208 # servers. Note that a response with a sufficiently corrupted
209 # status line will look like an HTTP/0.9 response.
210
211 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
212
213 def __init__(self, sock, debuglevel=0, strict=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000214 self.fp = sock.makefile('rb', 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000215 self.debuglevel = debuglevel
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000216 self.strict = strict
Greg Stein5e0fa402000-06-26 08:28:01 +0000217
Greg Steindd6eefb2000-07-18 09:09:48 +0000218 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000219
Greg Steindd6eefb2000-07-18 09:09:48 +0000220 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000221 self.version = _UNKNOWN # HTTP-Version
222 self.status = _UNKNOWN # Status-Code
223 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000224
Tim Peters07e99cb2001-01-14 23:47:14 +0000225 self.chunked = _UNKNOWN # is "chunked" being used?
226 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
227 self.length = _UNKNOWN # number of bytes left in response
228 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000229
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000230 def _read_status(self):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000231 # Initialize with Simple-Response defaults
Greg Stein5e0fa402000-06-26 08:28:01 +0000232 line = self.fp.readline()
Jeremy Hylton30f86742000-09-18 22:50:38 +0000233 if self.debuglevel > 0:
234 print "reply:", repr(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000235 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000236 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000237 except ValueError:
238 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000239 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000240 reason = ""
241 except ValueError:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000242 # empty version will cause next test to fail and status
243 # will be treated as 0.9 response.
244 version = ""
245 if not version.startswith('HTTP/'):
246 if self.strict:
247 self.close()
248 raise BadStatusLine(line)
249 else:
250 # assume it's a Simple-Response from an 0.9 server
251 self.fp = LineAndFileWrapper(line, self.fp)
252 return "HTTP/0.9", 200, ""
Greg Stein5e0fa402000-06-26 08:28:01 +0000253
Jeremy Hylton23d40472001-04-13 14:57:08 +0000254 # The status code is a three-digit number
255 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000256 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000257 if status < 100 or status > 999:
258 raise BadStatusLine(line)
259 except ValueError:
260 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000261 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000262
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000263 def _begin(self):
264 if self.msg is not None:
265 # we've already started reading the response
266 return
267
268 # read until we get a non-100 response
269 while 1:
270 version, status, reason = self._read_status()
271 if status != 100:
272 break
273 # skip the header from the 100 response
274 while 1:
275 skip = self.fp.readline().strip()
276 if not skip:
277 break
278 if self.debuglevel > 0:
279 print "header:", skip
280
281 self.status = status
282 self.reason = reason.strip()
Greg Steindd6eefb2000-07-18 09:09:48 +0000283 if version == 'HTTP/1.0':
284 self.version = 10
Jeremy Hylton110941a2000-10-12 19:58:36 +0000285 elif version.startswith('HTTP/1.'):
Tim Peters07e99cb2001-01-14 23:47:14 +0000286 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hylton110941a2000-10-12 19:58:36 +0000287 elif version == 'HTTP/0.9':
288 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000289 else:
290 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000291
Jeremy Hylton110941a2000-10-12 19:58:36 +0000292 if self.version == 9:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000293 self.chunked = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000294 self.will_close = 1
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000295 self.msg = HTTPMessage(StringIO())
Jeremy Hylton110941a2000-10-12 19:58:36 +0000296 return
297
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000298 self.msg = HTTPMessage(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000299 if self.debuglevel > 0:
300 for hdr in self.msg.headers:
301 print "header:", hdr,
Greg Stein5e0fa402000-06-26 08:28:01 +0000302
Greg Steindd6eefb2000-07-18 09:09:48 +0000303 # don't let the msg keep an fp
304 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000305
Greg Steindd6eefb2000-07-18 09:09:48 +0000306 # are we using the chunked-style of transfer encoding?
307 tr_enc = self.msg.getheader('transfer-encoding')
308 if tr_enc:
Guido van Rossum34735a62000-12-15 15:09:42 +0000309 if tr_enc.lower() != 'chunked':
Greg Steindd6eefb2000-07-18 09:09:48 +0000310 raise UnknownTransferEncoding()
311 self.chunked = 1
312 self.chunk_left = None
313 else:
314 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000315
Greg Steindd6eefb2000-07-18 09:09:48 +0000316 # will the connection close at the end of the response?
317 conn = self.msg.getheader('connection')
318 if conn:
Guido van Rossum34735a62000-12-15 15:09:42 +0000319 conn = conn.lower()
Greg Steindd6eefb2000-07-18 09:09:48 +0000320 # a "Connection: close" will always close the connection. if we
321 # don't see that and this is not HTTP/1.1, then the connection will
322 # close unless we see a Keep-Alive header.
Guido van Rossum34735a62000-12-15 15:09:42 +0000323 self.will_close = conn.find('close') != -1 or \
Greg Steindd6eefb2000-07-18 09:09:48 +0000324 ( self.version != 11 and \
325 not self.msg.getheader('keep-alive') )
326 else:
327 # for HTTP/1.1, the connection will always remain open
328 # otherwise, it will remain open IFF we see a Keep-Alive header
329 self.will_close = self.version != 11 and \
330 not self.msg.getheader('keep-alive')
Greg Stein5e0fa402000-06-26 08:28:01 +0000331
Greg Steindd6eefb2000-07-18 09:09:48 +0000332 # do we have a Content-Length?
333 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
334 length = self.msg.getheader('content-length')
335 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000336 try:
337 self.length = int(length)
338 except ValueError:
339 self.length = None
Greg Steindd6eefb2000-07-18 09:09:48 +0000340 else:
341 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000342
Greg Steindd6eefb2000-07-18 09:09:48 +0000343 # does the body have a fixed length? (of zero)
Tim Peters07e99cb2001-01-14 23:47:14 +0000344 if (status == 204 or # No Content
345 status == 304 or # Not Modified
346 100 <= status < 200): # 1xx codes
Greg Steindd6eefb2000-07-18 09:09:48 +0000347 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000348
Greg Steindd6eefb2000-07-18 09:09:48 +0000349 # if the connection remains open, and we aren't using chunked, and
350 # a content-length was not provided, then assume that the connection
351 # WILL close.
352 if not self.will_close and \
353 not self.chunked and \
354 self.length is None:
355 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000356
Greg Steindd6eefb2000-07-18 09:09:48 +0000357 def close(self):
358 if self.fp:
359 self.fp.close()
360 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000361
Greg Steindd6eefb2000-07-18 09:09:48 +0000362 def isclosed(self):
363 # NOTE: it is possible that we will not ever call self.close(). This
364 # case occurs when will_close is TRUE, length is None, and we
365 # read up to the last byte, but NOT past it.
366 #
367 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
368 # called, meaning self.isclosed() is meaningful.
369 return self.fp is None
370
371 def read(self, amt=None):
372 if self.fp is None:
373 return ''
374
375 if self.chunked:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000376 assert self.chunked != _UNKNOWN
Greg Steindd6eefb2000-07-18 09:09:48 +0000377 chunk_left = self.chunk_left
378 value = ''
379 while 1:
380 if chunk_left is None:
381 line = self.fp.readline()
Guido van Rossum34735a62000-12-15 15:09:42 +0000382 i = line.find(';')
Greg Steindd6eefb2000-07-18 09:09:48 +0000383 if i >= 0:
Tim Peters07e99cb2001-01-14 23:47:14 +0000384 line = line[:i] # strip chunk-extensions
Guido van Rossum34735a62000-12-15 15:09:42 +0000385 chunk_left = int(line, 16)
Greg Steindd6eefb2000-07-18 09:09:48 +0000386 if chunk_left == 0:
387 break
388 if amt is None:
389 value = value + self._safe_read(chunk_left)
390 elif amt < chunk_left:
391 value = value + self._safe_read(amt)
392 self.chunk_left = chunk_left - amt
393 return value
394 elif amt == chunk_left:
395 value = value + self._safe_read(amt)
Tim Peters07e99cb2001-01-14 23:47:14 +0000396 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000397 self.chunk_left = None
398 return value
399 else:
400 value = value + self._safe_read(chunk_left)
401 amt = amt - chunk_left
402
403 # we read the whole chunk, get another
Tim Peters07e99cb2001-01-14 23:47:14 +0000404 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000405 chunk_left = None
406
407 # read and discard trailer up to the CRLF terminator
408 ### note: we shouldn't have any trailers!
409 while 1:
410 line = self.fp.readline()
411 if line == '\r\n':
412 break
413
414 # we read everything; close the "file"
415 self.close()
416
417 return value
418
419 elif amt is None:
420 # unbounded read
421 if self.will_close:
422 s = self.fp.read()
423 else:
424 s = self._safe_read(self.length)
Tim Peters07e99cb2001-01-14 23:47:14 +0000425 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000426 return s
427
428 if self.length is not None:
429 if amt > self.length:
430 # clip the read to the "end of response"
431 amt = self.length
432 self.length = self.length - amt
433
434 # we do not use _safe_read() here because this may be a .will_close
435 # connection, and the user is reading more bytes than will be provided
436 # (for example, reading in 1k chunks)
437 s = self.fp.read(amt)
438
Greg Steindd6eefb2000-07-18 09:09:48 +0000439 return s
440
441 def _safe_read(self, amt):
442 """Read the number of bytes requested, compensating for partial reads.
443
444 Normally, we have a blocking socket, but a read() can be interrupted
445 by a signal (resulting in a partial read).
446
447 Note that we cannot distinguish between EOF and an interrupt when zero
448 bytes have been read. IncompleteRead() will be raised in this
449 situation.
450
451 This function should be used when <amt> bytes "should" be present for
452 reading. If the bytes are truly not available (due to EOF), then the
453 IncompleteRead exception can be used to detect the problem.
454 """
455 s = ''
456 while amt > 0:
457 chunk = self.fp.read(amt)
458 if not chunk:
459 raise IncompleteRead(s)
460 s = s + chunk
461 amt = amt - len(chunk)
462 return s
463
464 def getheader(self, name, default=None):
465 if self.msg is None:
466 raise ResponseNotReady()
467 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000468
469
470class HTTPConnection:
471
Greg Steindd6eefb2000-07-18 09:09:48 +0000472 _http_vsn = 11
473 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000474
Greg Steindd6eefb2000-07-18 09:09:48 +0000475 response_class = HTTPResponse
476 default_port = HTTP_PORT
477 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000478 debuglevel = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000479 strict = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000480
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000481 def __init__(self, host, port=None, strict=None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000482 self.sock = None
483 self.__response = None
484 self.__state = _CS_IDLE
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000485
Greg Steindd6eefb2000-07-18 09:09:48 +0000486 self._set_hostport(host, port)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000487 if strict is not None:
488 self.strict = strict
Greg Stein5e0fa402000-06-26 08:28:01 +0000489
Greg Steindd6eefb2000-07-18 09:09:48 +0000490 def _set_hostport(self, host, port):
491 if port is None:
Guido van Rossum34735a62000-12-15 15:09:42 +0000492 i = host.find(':')
Greg Steindd6eefb2000-07-18 09:09:48 +0000493 if i >= 0:
Skip Montanaro9d389972002-03-24 16:53:50 +0000494 try:
495 port = int(host[i+1:])
496 except ValueError:
Jeremy Hyltonfbd79942002-07-02 20:19:08 +0000497 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000498 host = host[:i]
499 else:
500 port = self.default_port
501 self.host = host
502 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000503
Jeremy Hylton30f86742000-09-18 22:50:38 +0000504 def set_debuglevel(self, level):
505 self.debuglevel = level
506
Greg Steindd6eefb2000-07-18 09:09:48 +0000507 def connect(self):
508 """Connect to the host and port specified in __init__."""
Martin v. Löwis2ad25692001-07-31 08:40:21 +0000509 msg = "getaddrinfo returns an empty list"
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000510 for res in socket.getaddrinfo(self.host, self.port, 0,
511 socket.SOCK_STREAM):
Martin v. Löwis4eb59402001-07-26 13:37:33 +0000512 af, socktype, proto, canonname, sa = res
513 try:
514 self.sock = socket.socket(af, socktype, proto)
515 if self.debuglevel > 0:
516 print "connect: (%s, %s)" % (self.host, self.port)
517 self.sock.connect(sa)
518 except socket.error, msg:
519 if self.debuglevel > 0:
520 print 'connect fail:', (self.host, self.port)
Martin v. Löwis322c0d12001-10-07 08:53:32 +0000521 if self.sock:
522 self.sock.close()
Martin v. Löwis4eb59402001-07-26 13:37:33 +0000523 self.sock = None
524 continue
525 break
526 if not self.sock:
527 raise socket.error, msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000528
Greg Steindd6eefb2000-07-18 09:09:48 +0000529 def close(self):
530 """Close the connection to the HTTP server."""
531 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000532 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000533 self.sock = None
534 if self.__response:
535 self.__response.close()
536 self.__response = None
537 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000538
Greg Steindd6eefb2000-07-18 09:09:48 +0000539 def send(self, str):
540 """Send `str' to the server."""
541 if self.sock is None:
542 if self.auto_open:
543 self.connect()
544 else:
545 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000546
Greg Steindd6eefb2000-07-18 09:09:48 +0000547 # send the data to the server. if we get a broken pipe, then close
548 # the socket. we want to reconnect when somebody tries to send again.
549 #
550 # NOTE: we DO propagate the error, though, because we cannot simply
551 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000552 if self.debuglevel > 0:
553 print "send:", repr(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000554 try:
Martin v. Löwise12454f2002-02-16 23:06:19 +0000555 self.sock.sendall(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000556 except socket.error, v:
Tim Peters07e99cb2001-01-14 23:47:14 +0000557 if v[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000558 self.close()
559 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000560
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000561 def putrequest(self, method, url, skip_host=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000562 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000563
Greg Steindd6eefb2000-07-18 09:09:48 +0000564 `method' specifies an HTTP request method, e.g. 'GET'.
565 `url' specifies the object being requested, e.g. '/index.html'.
566 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000567
Greg Steindd6eefb2000-07-18 09:09:48 +0000568 # check if a prior response has been completed
569 if self.__response and self.__response.isclosed():
570 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000571
Greg Steindd6eefb2000-07-18 09:09:48 +0000572 #
573 # in certain cases, we cannot issue another request on this connection.
574 # this occurs when:
575 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
576 # 2) a response to a previous request has signalled that it is going
577 # to close the connection upon completion.
578 # 3) the headers for the previous response have not been read, thus
579 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
580 #
581 # if there is no prior response, then we can request at will.
582 #
583 # if point (2) is true, then we will have passed the socket to the
584 # response (effectively meaning, "there is no prior response"), and
585 # will open a new one when a new request is made.
586 #
587 # Note: if a prior response exists, then we *can* start a new request.
588 # We are not allowed to begin fetching the response to this new
589 # request, however, until that prior response is complete.
590 #
591 if self.__state == _CS_IDLE:
592 self.__state = _CS_REQ_STARTED
593 else:
594 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000595
Greg Steindd6eefb2000-07-18 09:09:48 +0000596 if not url:
597 url = '/'
598 str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000599
Greg Steindd6eefb2000-07-18 09:09:48 +0000600 try:
601 self.send(str)
602 except socket.error, v:
603 # trap 'Broken pipe' if we're allowed to automatically reconnect
604 if v[0] != 32 or not self.auto_open:
605 raise
606 # try one more time (the socket was closed; this will reopen)
607 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000608
Greg Steindd6eefb2000-07-18 09:09:48 +0000609 if self._http_vsn == 11:
610 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000611
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000612 if not skip_host:
613 # this header is issued *only* for HTTP/1.1
614 # connections. more specifically, this means it is
615 # only issued when the client uses the new
616 # HTTPConnection() class. backwards-compat clients
617 # will be using HTTP/1.0 and those clients may be
618 # issuing this header themselves. we should NOT issue
619 # it twice; some web servers (such as Apache) barf
620 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000621
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000622 # If we need a non-standard port,include it in the
623 # header. If the request is going through a proxy,
624 # but the host of the actual URL, not the host of the
625 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +0000626
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000627 netloc = ''
628 if url.startswith('http'):
629 nil, netloc, nil, nil, nil = urlsplit(url)
630
631 if netloc:
632 self.putheader('Host', netloc)
633 elif self.port == HTTP_PORT:
634 self.putheader('Host', self.host)
635 else:
636 self.putheader('Host', "%s:%s" % (self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000637
Greg Steindd6eefb2000-07-18 09:09:48 +0000638 # note: we are assuming that clients will not attempt to set these
639 # headers since *this* library must deal with the
640 # consequences. this also means that when the supporting
641 # libraries are updated to recognize other forms, then this
642 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000643
Greg Steindd6eefb2000-07-18 09:09:48 +0000644 # we only want a Content-Encoding of "identity" since we don't
645 # support encodings such as x-gzip or x-deflate.
646 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000647
Greg Steindd6eefb2000-07-18 09:09:48 +0000648 # we can accept "chunked" Transfer-Encodings, but no others
649 # NOTE: no TE header implies *only* "chunked"
650 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000651
Greg Steindd6eefb2000-07-18 09:09:48 +0000652 # if TE is supplied in the header, then it must appear in a
653 # Connection header.
654 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000655
Greg Steindd6eefb2000-07-18 09:09:48 +0000656 else:
657 # For HTTP/1.0, the server will assume "not chunked"
658 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000659
Greg Steindd6eefb2000-07-18 09:09:48 +0000660 def putheader(self, header, value):
661 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000662
Greg Steindd6eefb2000-07-18 09:09:48 +0000663 For example: h.putheader('Accept', 'text/html')
664 """
665 if self.__state != _CS_REQ_STARTED:
666 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000667
Greg Steindd6eefb2000-07-18 09:09:48 +0000668 str = '%s: %s\r\n' % (header, value)
669 self.send(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000670
Greg Steindd6eefb2000-07-18 09:09:48 +0000671 def endheaders(self):
672 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000673
Greg Steindd6eefb2000-07-18 09:09:48 +0000674 if self.__state == _CS_REQ_STARTED:
675 self.__state = _CS_REQ_SENT
676 else:
677 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000678
Greg Steindd6eefb2000-07-18 09:09:48 +0000679 self.send('\r\n')
Greg Stein5e0fa402000-06-26 08:28:01 +0000680
Greg Steindd6eefb2000-07-18 09:09:48 +0000681 def request(self, method, url, body=None, headers={}):
682 """Send a complete request to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000683
Greg Steindd6eefb2000-07-18 09:09:48 +0000684 try:
685 self._send_request(method, url, body, headers)
686 except socket.error, v:
687 # trap 'Broken pipe' if we're allowed to automatically reconnect
688 if v[0] != 32 or not self.auto_open:
689 raise
690 # try one more time
691 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000692
Greg Steindd6eefb2000-07-18 09:09:48 +0000693 def _send_request(self, method, url, body, headers):
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000694 # If headers already contains a host header, then define the
695 # optional skip_host argument to putrequest(). The check is
696 # harder because field names are case insensitive.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000697 if 'Host' in (headers
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000698 or [k for k in headers.iterkeys() if k.lower() == "host"]):
699 self.putrequest(method, url, skip_host=1)
700 else:
701 self.putrequest(method, url)
Greg Stein5e0fa402000-06-26 08:28:01 +0000702
Greg Steindd6eefb2000-07-18 09:09:48 +0000703 if body:
704 self.putheader('Content-Length', str(len(body)))
705 for hdr, value in headers.items():
706 self.putheader(hdr, value)
707 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000708
Greg Steindd6eefb2000-07-18 09:09:48 +0000709 if body:
710 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000711
Greg Steindd6eefb2000-07-18 09:09:48 +0000712 def getresponse(self):
713 "Get the response from the server."
Greg Stein5e0fa402000-06-26 08:28:01 +0000714
Greg Steindd6eefb2000-07-18 09:09:48 +0000715 # check if a prior response has been completed
716 if self.__response and self.__response.isclosed():
717 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000718
Greg Steindd6eefb2000-07-18 09:09:48 +0000719 #
720 # if a prior response exists, then it must be completed (otherwise, we
721 # cannot read this response's header to determine the connection-close
722 # behavior)
723 #
724 # note: if a prior response existed, but was connection-close, then the
725 # socket and response were made independent of this HTTPConnection
726 # object since a new request requires that we open a whole new
727 # connection
728 #
729 # this means the prior response had one of two states:
730 # 1) will_close: this connection was reset and the prior socket and
731 # response operate independently
732 # 2) persistent: the response was retained and we await its
733 # isclosed() status to become true.
734 #
735 if self.__state != _CS_REQ_SENT or self.__response:
736 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000737
Jeremy Hylton30f86742000-09-18 22:50:38 +0000738 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000739 response = self.response_class(self.sock, self.debuglevel,
740 strict=self.strict)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000741 else:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000742 response = self.response_class(self.sock, strict=self.strict)
Greg Stein5e0fa402000-06-26 08:28:01 +0000743
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000744 response._begin()
745 assert response.will_close != _UNKNOWN
Greg Steindd6eefb2000-07-18 09:09:48 +0000746 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000747
Greg Steindd6eefb2000-07-18 09:09:48 +0000748 if response.will_close:
749 # this effectively passes the connection to the response
750 self.close()
751 else:
752 # remember this, so we can tell when it is complete
753 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000754
Greg Steindd6eefb2000-07-18 09:09:48 +0000755 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000756
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000757# The next several classes are used to define FakeSocket,a socket-like
758# interface to an SSL connection.
759
760# The primary complexity comes from faking a makefile() method. The
761# standard socket makefile() implementation calls dup() on the socket
762# file descriptor. As a consequence, clients can call close() on the
763# parent socket and its makefile children in any order. The underlying
764# socket isn't closed until they are all closed.
765
766# The implementation uses reference counting to keep the socket open
767# until the last client calls close(). SharedSocket keeps track of
768# the reference counting and SharedSocketClient provides an constructor
769# and close() method that call incref() and decref() correctly.
770
771class SharedSocket:
772
773 def __init__(self, sock):
774 self.sock = sock
775 self._refcnt = 0
776
777 def incref(self):
778 self._refcnt += 1
779
780 def decref(self):
781 self._refcnt -= 1
782 assert self._refcnt >= 0
783 if self._refcnt == 0:
784 self.sock.close()
785
786 def __del__(self):
787 self.sock.close()
788
789class SharedSocketClient:
790
791 def __init__(self, shared):
792 self._closed = 0
793 self._shared = shared
794 self._shared.incref()
795 self._sock = shared.sock
796
797 def close(self):
798 if not self._closed:
799 self._shared.decref()
800 self._closed = 1
801 self._shared = None
802
803class SSLFile(SharedSocketClient):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000804 """File-like object wrapping an SSL socket."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000805
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000806 BUFSIZE = 8192
807
808 def __init__(self, sock, ssl, bufsize=None):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000809 SharedSocketClient.__init__(self, sock)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000810 self._ssl = ssl
811 self._buf = ''
812 self._bufsize = bufsize or self.__class__.BUFSIZE
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000813
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000814 def _read(self):
815 buf = ''
816 # put in a loop so that we retry on transient errors
Greg Steindd6eefb2000-07-18 09:09:48 +0000817 while 1:
818 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000819 buf = self._ssl.read(self._bufsize)
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000820 except socket.sslerror, err:
821 if (err[0] == socket.SSL_ERROR_WANT_READ
Neal Norwitz22c5d772002-02-11 17:59:51 +0000822 or err[0] == socket.SSL_ERROR_WANT_WRITE):
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000823 continue
Martin v. Löwis6af3e2d2002-04-20 07:47:40 +0000824 if (err[0] == socket.SSL_ERROR_ZERO_RETURN
825 or err[0] == socket.SSL_ERROR_EOF):
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000826 break
827 raise
828 except socket.error, err:
Tim Petersf3623f32001-10-11 18:15:51 +0000829 if err[0] == errno.EINTR:
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000830 continue
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000831 if err[0] == errno.EBADF:
832 # XXX socket was closed?
833 break
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000834 raise
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000835 else:
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000836 break
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000837 return buf
838
839 def read(self, size=None):
840 L = [self._buf]
841 avail = len(self._buf)
842 while size is None or avail < size:
843 s = self._read()
844 if s == '':
845 break
846 L.append(s)
847 avail += len(s)
848 all = "".join(L)
849 if size is None:
850 self._buf = ''
851 return all
852 else:
853 self._buf = all[size:]
854 return all[:size]
855
856 def readline(self):
857 L = [self._buf]
858 self._buf = ''
859 while 1:
860 i = L[-1].find("\n")
861 if i >= 0:
862 break
863 s = self._read()
864 if s == '':
865 break
866 L.append(s)
867 if i == -1:
868 # loop exited because there is no more data
869 return "".join(L)
870 else:
871 all = "".join(L)
872 # XXX could do enough bookkeeping not to do a 2nd search
873 i = all.find("\n") + 1
874 line = all[:i]
875 self._buf = all[i:]
876 return line
877
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000878class FakeSocket(SharedSocketClient):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000879
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000880 class _closedsocket:
881 def __getattr__(self, name):
882 raise error(9, 'Bad file descriptor')
883
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000884 def __init__(self, sock, ssl):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000885 sock = SharedSocket(sock)
886 SharedSocketClient.__init__(self, sock)
887 self._ssl = ssl
888
889 def close(self):
890 SharedSocketClient.close(self)
891 self._sock = self.__class__._closedsocket()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000892
893 def makefile(self, mode, bufsize=None):
894 if mode != 'r' and mode != 'rb':
895 raise UnimplementedFileMode()
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000896 return SSLFile(self._shared, self._ssl, bufsize)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000897
Greg Steindd6eefb2000-07-18 09:09:48 +0000898 def send(self, stuff, flags = 0):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000899 return self._ssl.write(stuff)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000900
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000901 sendall = send
Andrew M. Kuchlinga3c0b932002-03-18 22:51:48 +0000902
Greg Steindd6eefb2000-07-18 09:09:48 +0000903 def recv(self, len = 1024, flags = 0):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000904 return self._ssl.read(len)
Guido van Rossum23acc951994-02-21 16:36:04 +0000905
Greg Steindd6eefb2000-07-18 09:09:48 +0000906 def __getattr__(self, attr):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000907 return getattr(self._sock, attr)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000908
Guido van Rossum23acc951994-02-21 16:36:04 +0000909
Greg Stein5e0fa402000-06-26 08:28:01 +0000910class HTTPSConnection(HTTPConnection):
Greg Steindd6eefb2000-07-18 09:09:48 +0000911 "This class allows communication via SSL."
Greg Stein5e0fa402000-06-26 08:28:01 +0000912
Greg Steindd6eefb2000-07-18 09:09:48 +0000913 default_port = HTTPS_PORT
Greg Stein5e0fa402000-06-26 08:28:01 +0000914
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000915 def __init__(self, host, port=None, key_file=None, cert_file=None,
916 strict=None):
917 HTTPConnection.__init__(self, host, port, strict)
Jeremy Hylton7c75c992002-06-28 23:38:14 +0000918 self.key_file = key_file
919 self.cert_file = cert_file
Greg Stein5e0fa402000-06-26 08:28:01 +0000920
Greg Steindd6eefb2000-07-18 09:09:48 +0000921 def connect(self):
922 "Connect to a host on a given (SSL) port."
Greg Stein5e0fa402000-06-26 08:28:01 +0000923
Greg Steindd6eefb2000-07-18 09:09:48 +0000924 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
925 sock.connect((self.host, self.port))
Guido van Rossum0aee7222000-12-11 20:32:20 +0000926 realsock = sock
927 if hasattr(sock, "_sock"):
928 realsock = sock._sock
929 ssl = socket.ssl(realsock, self.key_file, self.cert_file)
Greg Steindd6eefb2000-07-18 09:09:48 +0000930 self.sock = FakeSocket(sock, ssl)
Greg Stein5e0fa402000-06-26 08:28:01 +0000931
932
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000933class HTTP:
Greg Steindd6eefb2000-07-18 09:09:48 +0000934 "Compatibility class with httplib.py from 1.5."
Greg Stein5e0fa402000-06-26 08:28:01 +0000935
Greg Steindd6eefb2000-07-18 09:09:48 +0000936 _http_vsn = 10
937 _http_vsn_str = 'HTTP/1.0'
Greg Stein5e0fa402000-06-26 08:28:01 +0000938
Greg Steindd6eefb2000-07-18 09:09:48 +0000939 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000940
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000941 _connection_class = HTTPConnection
942
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000943 def __init__(self, host='', port=None, strict=None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000944 "Provide a default host, since the superclass requires one."
Greg Stein5e0fa402000-06-26 08:28:01 +0000945
Greg Steindd6eefb2000-07-18 09:09:48 +0000946 # some joker passed 0 explicitly, meaning default port
947 if port == 0:
948 port = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000949
Greg Steindd6eefb2000-07-18 09:09:48 +0000950 # Note that we may pass an empty string as the host; this will throw
951 # an error when we attempt to connect. Presumably, the client code
952 # will call connect before then, with a proper host.
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000953 self._setup(self._connection_class(host, port, strict))
Greg Stein5e0fa402000-06-26 08:28:01 +0000954
Greg Stein81937a42001-08-18 09:20:23 +0000955 def _setup(self, conn):
956 self._conn = conn
957
958 # set up delegation to flesh out interface
959 self.send = conn.send
960 self.putrequest = conn.putrequest
961 self.endheaders = conn.endheaders
962 self.set_debuglevel = conn.set_debuglevel
963
964 conn._http_vsn = self._http_vsn
965 conn._http_vsn_str = self._http_vsn_str
Greg Stein5e0fa402000-06-26 08:28:01 +0000966
Greg Steindd6eefb2000-07-18 09:09:48 +0000967 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000968
Greg Steindd6eefb2000-07-18 09:09:48 +0000969 def connect(self, host=None, port=None):
970 "Accept arguments to set the host/port, since the superclass doesn't."
Greg Stein5e0fa402000-06-26 08:28:01 +0000971
Greg Steindd6eefb2000-07-18 09:09:48 +0000972 if host is not None:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000973 self._conn._set_hostport(host, port)
974 self._conn.connect()
Greg Stein5e0fa402000-06-26 08:28:01 +0000975
Greg Steindd6eefb2000-07-18 09:09:48 +0000976 def getfile(self):
977 "Provide a getfile, since the superclass' does not use this concept."
978 return self.file
Greg Stein5e0fa402000-06-26 08:28:01 +0000979
Greg Steindd6eefb2000-07-18 09:09:48 +0000980 def putheader(self, header, *values):
981 "The superclass allows only one value argument."
Guido van Rossum34735a62000-12-15 15:09:42 +0000982 self._conn.putheader(header, '\r\n\t'.join(values))
Greg Stein5e0fa402000-06-26 08:28:01 +0000983
Greg Steindd6eefb2000-07-18 09:09:48 +0000984 def getreply(self):
985 """Compat definition since superclass does not define it.
Greg Stein5e0fa402000-06-26 08:28:01 +0000986
Greg Steindd6eefb2000-07-18 09:09:48 +0000987 Returns a tuple consisting of:
988 - server status code (e.g. '200' if all goes well)
989 - server "reason" corresponding to status code
990 - any RFC822 headers in the response from the server
991 """
992 try:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000993 response = self._conn.getresponse()
Greg Steindd6eefb2000-07-18 09:09:48 +0000994 except BadStatusLine, e:
995 ### hmm. if getresponse() ever closes the socket on a bad request,
996 ### then we are going to have problems with self.sock
Greg Stein5e0fa402000-06-26 08:28:01 +0000997
Greg Steindd6eefb2000-07-18 09:09:48 +0000998 ### should we keep this behavior? do people use it?
999 # keep the socket open (as a file), and return it
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001000 self.file = self._conn.sock.makefile('rb', 0)
Greg Stein5e0fa402000-06-26 08:28:01 +00001001
Greg Steindd6eefb2000-07-18 09:09:48 +00001002 # close our socket -- we want to restart after any protocol error
1003 self.close()
Greg Stein5e0fa402000-06-26 08:28:01 +00001004
Greg Steindd6eefb2000-07-18 09:09:48 +00001005 self.headers = None
1006 return -1, e.line, None
Greg Stein5e0fa402000-06-26 08:28:01 +00001007
Greg Steindd6eefb2000-07-18 09:09:48 +00001008 self.headers = response.msg
1009 self.file = response.fp
1010 return response.status, response.reason, response.msg
Greg Stein5e0fa402000-06-26 08:28:01 +00001011
Greg Steindd6eefb2000-07-18 09:09:48 +00001012 def close(self):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001013 self._conn.close()
Greg Stein5e0fa402000-06-26 08:28:01 +00001014
Greg Steindd6eefb2000-07-18 09:09:48 +00001015 # note that self.file == response.fp, which gets closed by the
1016 # superclass. just clear the object ref here.
1017 ### hmm. messy. if status==-1, then self.file is owned by us.
1018 ### well... we aren't explicitly closing, but losing this ref will
1019 ### do it
1020 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001021
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001022if hasattr(socket, 'ssl'):
1023 class HTTPS(HTTP):
1024 """Compatibility with 1.5 httplib interface
1025
1026 Python 1.5.2 did not have an HTTPS class, but it defined an
1027 interface for sending http requests that is also useful for
Tim Peters5ceadc82001-01-13 19:16:21 +00001028 https.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001029 """
1030
Martin v. Löwisd7bf9742000-09-21 22:09:47 +00001031 _connection_class = HTTPSConnection
Tim Peters5ceadc82001-01-13 19:16:21 +00001032
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001033 def __init__(self, host='', port=None, key_file=None, cert_file=None,
1034 strict=None):
Greg Stein81937a42001-08-18 09:20:23 +00001035 # provide a default host, pass the X509 cert info
1036
1037 # urf. compensate for bad input.
1038 if port == 0:
1039 port = None
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001040 self._setup(self._connection_class(host, port, key_file,
1041 cert_file, strict))
Greg Stein81937a42001-08-18 09:20:23 +00001042
1043 # we never actually use these for anything, but we keep them
1044 # here for compatibility with post-1.5.2 CVS.
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001045 self.key_file = key_file
1046 self.cert_file = cert_file
Greg Stein81937a42001-08-18 09:20:23 +00001047
Greg Stein5e0fa402000-06-26 08:28:01 +00001048
1049class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001050 # Subclasses that define an __init__ must call Exception.__init__
1051 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +00001052 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001053
1054class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001055 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001056
Skip Montanaro9d389972002-03-24 16:53:50 +00001057class InvalidURL(HTTPException):
1058 pass
1059
Greg Stein5e0fa402000-06-26 08:28:01 +00001060class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001061 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001062 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001063 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001064
1065class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001066 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001067
Greg Stein5e0fa402000-06-26 08:28:01 +00001068class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001069 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001070
1071class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001072 def __init__(self, partial):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001073 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001074 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +00001075
1076class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001077 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001078
1079class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001080 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001081
1082class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001083 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001084
1085class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001086 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001087
1088class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001089 def __init__(self, line):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001090 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001091 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001092
1093# for backwards compatibility
1094error = HTTPException
1095
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001096class LineAndFileWrapper:
1097 """A limited file-like object for HTTP/0.9 responses."""
1098
1099 # The status-line parsing code calls readline(), which normally
1100 # get the HTTP status line. For a 0.9 response, however, this is
1101 # actually the first line of the body! Clients need to get a
1102 # readable file object that contains that line.
1103
1104 def __init__(self, line, file):
1105 self._line = line
1106 self._file = file
1107 self._line_consumed = 0
1108 self._line_offset = 0
1109 self._line_left = len(line)
1110
1111 def __getattr__(self, attr):
1112 return getattr(self._file, attr)
1113
1114 def _done(self):
1115 # called when the last byte is read from the line. After the
1116 # call, all read methods are delegated to the underlying file
1117 # obhect.
1118 self._line_consumed = 1
1119 self.read = self._file.read
1120 self.readline = self._file.readline
1121 self.readlines = self._file.readlines
1122
1123 def read(self, amt=None):
1124 assert not self._line_consumed and self._line_left
1125 if amt is None or amt > self._line_left:
1126 s = self._line[self._line_offset:]
1127 self._done()
1128 if amt is None:
1129 return s + self._file.read()
1130 else:
1131 return s + self._file.read(amt - len(s))
1132 else:
1133 assert amt <= self._line_left
1134 i = self._line_offset
1135 j = i + amt
1136 s = self._line[i:j]
1137 self._line_offset = j
1138 self._line_left -= amt
1139 if self._line_left == 0:
1140 self._done()
1141 return s
1142
1143 def readline(self):
1144 s = self._line[self._line_offset:]
1145 self._done()
1146 return s
1147
1148 def readlines(self, size=None):
1149 L = [self._line[self._line_offset:]]
1150 self._done()
1151 if size is None:
1152 return L + self._file.readlines()
1153 else:
1154 return L + self._file.readlines(size)
Greg Stein5e0fa402000-06-26 08:28:01 +00001155
Guido van Rossum23acc951994-02-21 16:36:04 +00001156def test():
Guido van Rossum41999c11997-12-09 00:12:23 +00001157 """Test this module.
1158
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001159 A hodge podge of tests collected here, because they have too many
1160 external dependencies for the regular test suite.
Guido van Rossum41999c11997-12-09 00:12:23 +00001161 """
Greg Stein5e0fa402000-06-26 08:28:01 +00001162
Guido van Rossum41999c11997-12-09 00:12:23 +00001163 import sys
1164 import getopt
1165 opts, args = getopt.getopt(sys.argv[1:], 'd')
1166 dl = 0
1167 for o, a in opts:
1168 if o == '-d': dl = dl + 1
1169 host = 'www.python.org'
1170 selector = '/'
1171 if args[0:]: host = args[0]
1172 if args[1:]: selector = args[1]
1173 h = HTTP()
1174 h.set_debuglevel(dl)
1175 h.connect(host)
1176 h.putrequest('GET', selector)
1177 h.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +00001178 status, reason, headers = h.getreply()
1179 print 'status =', status
1180 print 'reason =', reason
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001181 print "read", len(h.getfile().read())
Guido van Rossum41999c11997-12-09 00:12:23 +00001182 print
1183 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +00001184 for header in headers.headers: print header.strip()
Guido van Rossum41999c11997-12-09 00:12:23 +00001185 print
Greg Stein5e0fa402000-06-26 08:28:01 +00001186
Jeremy Hylton8acf1e02002-03-08 19:35:51 +00001187 # minimal test that code to extract host from url works
1188 class HTTP11(HTTP):
1189 _http_vsn = 11
1190 _http_vsn_str = 'HTTP/1.1'
1191
1192 h = HTTP11('www.python.org')
1193 h.putrequest('GET', 'http://www.python.org/~jeremy/')
1194 h.endheaders()
1195 h.getreply()
1196 h.close()
1197
Greg Stein5e0fa402000-06-26 08:28:01 +00001198 if hasattr(socket, 'ssl'):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001199
1200 for host, selector in (('sourceforge.net', '/projects/python'),
1201 ('dbserv2.theopalgroup.com', '/mediumfile'),
1202 ('dbserv2.theopalgroup.com', '/smallfile'),
1203 ):
1204 print "https://%s%s" % (host, selector)
1205 hs = HTTPS()
1206 hs.connect(host)
1207 hs.putrequest('GET', selector)
1208 hs.endheaders()
1209 status, reason, headers = hs.getreply()
1210 print 'status =', status
1211 print 'reason =', reason
1212 print "read", len(hs.getfile().read())
1213 print
1214 if headers:
1215 for header in headers.headers: print header.strip()
1216 print
Guido van Rossum23acc951994-02-21 16:36:04 +00001217
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001218 return
Guido van Rossuma0dfc7a1995-09-07 19:28:19 +00001219
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001220 # Test a buggy server -- returns garbled status line.
1221 # http://www.yahoo.com/promotions/mom_com97/supermom.html
1222 c = HTTPConnection("promotions.yahoo.com")
1223 c.set_debuglevel(1)
1224 c.connect()
1225 c.request("GET", "/promotions/mom_com97/supermom.html")
1226 r = c.getresponse()
1227 print r.status, r.version
1228 lines = r.read().split("\n")
1229 print "\n".join(lines[:5])
1230
1231 c = HTTPConnection("promotions.yahoo.com", strict=1)
1232 c.set_debuglevel(1)
1233 c.connect()
1234 c.request("GET", "/promotions/mom_com97/supermom.html")
1235 try:
1236 r = c.getresponse()
1237 except BadStatusLine, err:
1238 print "strict mode failed as expected"
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001239 print err
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001240 else:
1241 print "XXX strict mode should have failed"
1242
1243 for strict in 0, 1:
1244 h = HTTP(strict=strict)
1245 h.connect("promotions.yahoo.com")
1246 h.putrequest('GET', "/promotions/mom_com97/supermom.html")
1247 h.endheaders()
1248 status, reason, headers = h.getreply()
1249 assert (strict and status == -1) or status == 200, (strict, status)
1250
Guido van Rossum23acc951994-02-21 16:36:04 +00001251if __name__ == '__main__':
Guido van Rossum41999c11997-12-09 00:12:23 +00001252 test()