blob: e58a30a12e9c9096142af0ef3360b0ed09506850 [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Greg Stein5e0fa402000-06-26 08:28:01 +00006HTTPConnection go through a number of "states", which defines when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000069import errno
Guido van Rossum65ab98c1995-08-07 20:13:02 +000070import mimetools
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000071import socket
Jeremy Hylton8acf1e02002-03-08 19:35:51 +000072from urlparse import urlsplit
Guido van Rossum23acc951994-02-21 16:36:04 +000073
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000074try:
Greg Steindd6eefb2000-07-18 09:09:48 +000075 from cStringIO import StringIO
Greg Stein5e0fa402000-06-26 08:28:01 +000076except ImportError:
Greg Steindd6eefb2000-07-18 09:09:48 +000077 from StringIO import StringIO
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000078
Skip Montanaro951a8842001-06-01 16:25:38 +000079__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
80 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000081 "UnknownTransferEncoding", "UnimplementedFileMode",
82 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
83 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
84 "BadStatusLine", "error"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000085
Guido van Rossum23acc951994-02-21 16:36:04 +000086HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000087HTTPS_PORT = 443
88
Greg Stein5e0fa402000-06-26 08:28:01 +000089_UNKNOWN = 'UNKNOWN'
90
91# connection states
92_CS_IDLE = 'Idle'
93_CS_REQ_STARTED = 'Request-started'
94_CS_REQ_SENT = 'Request-sent'
95
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +000096class HTTPMessage(mimetools.Message):
97
98 def addheader(self, key, value):
99 """Add header for field key handling repeats."""
100 prev = self.dict.get(key)
101 if prev is None:
102 self.dict[key] = value
103 else:
104 combined = ", ".join((prev, value))
105 self.dict[key] = combined
106
107 def addcontinue(self, key, more):
108 """Add more field data from a continuation line."""
109 prev = self.dict[key]
110 self.dict[key] = prev + "\n " + more
111
112 def readheaders(self):
113 """Read header lines.
114
115 Read header lines up to the entirely blank line that terminates them.
116 The (normally blank) line that ends the headers is skipped, but not
117 included in the returned list. If a non-header line ends the headers,
118 (which is an error), an attempt is made to backspace over it; it is
119 never included in the returned list.
120
121 The variable self.status is set to the empty string if all went well,
122 otherwise it is an error message. The variable self.headers is a
123 completely uninterpreted list of lines contained in the header (so
124 printing them will reproduce the header exactly as it appears in the
125 file).
126
127 If multiple header fields with the same name occur, they are combined
128 according to the rules in RFC 2616 sec 4.2:
129
130 Appending each subsequent field-value to the first, each separated
131 by a comma. The order in which header fields with the same field-name
132 are received is significant to the interpretation of the combined
133 field value.
134 """
135 # XXX The implementation overrides the readheaders() method of
136 # rfc822.Message. The base class design isn't amenable to
137 # customized behavior here so the method here is a copy of the
138 # base class code with a few small changes.
139
140 self.dict = {}
141 self.unixfrom = ''
142 self.headers = list = []
143 self.status = ''
144 headerseen = ""
145 firstline = 1
146 startofline = unread = tell = None
147 if hasattr(self.fp, 'unread'):
148 unread = self.fp.unread
149 elif self.seekable:
150 tell = self.fp.tell
151 while 1:
152 if tell:
153 try:
154 startofline = tell()
155 except IOError:
156 startofline = tell = None
157 self.seekable = 0
158 line = self.fp.readline()
159 if not line:
160 self.status = 'EOF in headers'
161 break
162 # Skip unix From name time lines
163 if firstline and line.startswith('From '):
164 self.unixfrom = self.unixfrom + line
165 continue
166 firstline = 0
167 if headerseen and line[0] in ' \t':
168 # XXX Not sure if continuation lines are handled properly
169 # for http and/or for repeating headers
170 # It's a continuation line.
171 list.append(line)
172 x = self.dict[headerseen] + "\n " + line.strip()
173 self.addcontinue(headerseen, line.strip())
174 continue
175 elif self.iscomment(line):
176 # It's a comment. Ignore it.
177 continue
178 elif self.islast(line):
179 # Note! No pushback here! The delimiter line gets eaten.
180 break
181 headerseen = self.isheader(line)
182 if headerseen:
183 # It's a legal header line, save it.
184 list.append(line)
185 self.addheader(headerseen, line[len(headerseen)+1:].strip())
186 continue
187 else:
188 # It's not a header line; throw it back and stop here.
189 if not self.dict:
190 self.status = 'No headers'
191 else:
192 self.status = 'Non-header line where header expected'
193 # Try to undo the read.
194 if unread:
195 unread(line)
196 elif tell:
197 self.fp.seek(startofline)
198 else:
199 self.status = self.status + '; bad seek'
200 break
Greg Stein5e0fa402000-06-26 08:28:01 +0000201
202class HTTPResponse:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000203
204 # strict: If true, raise BadStatusLine if the status line can't be
205 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
Skip Montanaro186bec22002-07-25 16:10:38 +0000206 # false because it prevents clients from talking to HTTP/0.9
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000207 # servers. Note that a response with a sufficiently corrupted
208 # status line will look like an HTTP/0.9 response.
209
210 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
211
212 def __init__(self, sock, debuglevel=0, strict=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000213 self.fp = sock.makefile('rb', 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000214 self.debuglevel = debuglevel
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000215 self.strict = strict
Greg Stein5e0fa402000-06-26 08:28:01 +0000216
Greg Steindd6eefb2000-07-18 09:09:48 +0000217 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000218
Greg Steindd6eefb2000-07-18 09:09:48 +0000219 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000220 self.version = _UNKNOWN # HTTP-Version
221 self.status = _UNKNOWN # Status-Code
222 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000223
Tim Peters07e99cb2001-01-14 23:47:14 +0000224 self.chunked = _UNKNOWN # is "chunked" being used?
225 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
226 self.length = _UNKNOWN # number of bytes left in response
227 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000228
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000229 def _read_status(self):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000230 # Initialize with Simple-Response defaults
Greg Stein5e0fa402000-06-26 08:28:01 +0000231 line = self.fp.readline()
Jeremy Hylton30f86742000-09-18 22:50:38 +0000232 if self.debuglevel > 0:
233 print "reply:", repr(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000234 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000235 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000236 except ValueError:
237 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000238 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000239 reason = ""
240 except ValueError:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000241 # empty version will cause next test to fail and status
242 # will be treated as 0.9 response.
243 version = ""
244 if not version.startswith('HTTP/'):
245 if self.strict:
246 self.close()
247 raise BadStatusLine(line)
248 else:
249 # assume it's a Simple-Response from an 0.9 server
250 self.fp = LineAndFileWrapper(line, self.fp)
251 return "HTTP/0.9", 200, ""
Greg Stein5e0fa402000-06-26 08:28:01 +0000252
Jeremy Hylton23d40472001-04-13 14:57:08 +0000253 # The status code is a three-digit number
254 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000255 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000256 if status < 100 or status > 999:
257 raise BadStatusLine(line)
258 except ValueError:
259 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000260 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000261
Jeremy Hylton39c03802002-07-12 14:04:09 +0000262 def begin(self):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000263 if self.msg is not None:
264 # we've already started reading the response
265 return
266
267 # read until we get a non-100 response
268 while 1:
269 version, status, reason = self._read_status()
270 if status != 100:
271 break
272 # skip the header from the 100 response
273 while 1:
274 skip = self.fp.readline().strip()
275 if not skip:
276 break
277 if self.debuglevel > 0:
278 print "header:", skip
Tim Petersc411dba2002-07-16 21:35:23 +0000279
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000280 self.status = status
281 self.reason = reason.strip()
Greg Steindd6eefb2000-07-18 09:09:48 +0000282 if version == 'HTTP/1.0':
283 self.version = 10
Jeremy Hylton110941a2000-10-12 19:58:36 +0000284 elif version.startswith('HTTP/1.'):
Tim Peters07e99cb2001-01-14 23:47:14 +0000285 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hylton110941a2000-10-12 19:58:36 +0000286 elif version == 'HTTP/0.9':
287 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000288 else:
289 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000290
Jeremy Hylton110941a2000-10-12 19:58:36 +0000291 if self.version == 9:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000292 self.chunked = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000293 self.will_close = 1
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000294 self.msg = HTTPMessage(StringIO())
Jeremy Hylton110941a2000-10-12 19:58:36 +0000295 return
296
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000297 self.msg = HTTPMessage(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000298 if self.debuglevel > 0:
299 for hdr in self.msg.headers:
300 print "header:", hdr,
Greg Stein5e0fa402000-06-26 08:28:01 +0000301
Greg Steindd6eefb2000-07-18 09:09:48 +0000302 # don't let the msg keep an fp
303 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000304
Greg Steindd6eefb2000-07-18 09:09:48 +0000305 # are we using the chunked-style of transfer encoding?
306 tr_enc = self.msg.getheader('transfer-encoding')
307 if tr_enc:
Guido van Rossum34735a62000-12-15 15:09:42 +0000308 if tr_enc.lower() != 'chunked':
Greg Steindd6eefb2000-07-18 09:09:48 +0000309 raise UnknownTransferEncoding()
310 self.chunked = 1
311 self.chunk_left = None
312 else:
313 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000314
Greg Steindd6eefb2000-07-18 09:09:48 +0000315 # will the connection close at the end of the response?
316 conn = self.msg.getheader('connection')
317 if conn:
Guido van Rossum34735a62000-12-15 15:09:42 +0000318 conn = conn.lower()
Greg Steindd6eefb2000-07-18 09:09:48 +0000319 # a "Connection: close" will always close the connection. if we
320 # don't see that and this is not HTTP/1.1, then the connection will
321 # close unless we see a Keep-Alive header.
Guido van Rossum34735a62000-12-15 15:09:42 +0000322 self.will_close = conn.find('close') != -1 or \
Greg Steindd6eefb2000-07-18 09:09:48 +0000323 ( self.version != 11 and \
324 not self.msg.getheader('keep-alive') )
325 else:
326 # for HTTP/1.1, the connection will always remain open
327 # otherwise, it will remain open IFF we see a Keep-Alive header
328 self.will_close = self.version != 11 and \
329 not self.msg.getheader('keep-alive')
Greg Stein5e0fa402000-06-26 08:28:01 +0000330
Greg Steindd6eefb2000-07-18 09:09:48 +0000331 # do we have a Content-Length?
332 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
333 length = self.msg.getheader('content-length')
334 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000335 try:
336 self.length = int(length)
337 except ValueError:
338 self.length = None
Greg Steindd6eefb2000-07-18 09:09:48 +0000339 else:
340 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000341
Greg Steindd6eefb2000-07-18 09:09:48 +0000342 # does the body have a fixed length? (of zero)
Tim Peters07e99cb2001-01-14 23:47:14 +0000343 if (status == 204 or # No Content
344 status == 304 or # Not Modified
345 100 <= status < 200): # 1xx codes
Greg Steindd6eefb2000-07-18 09:09:48 +0000346 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000347
Greg Steindd6eefb2000-07-18 09:09:48 +0000348 # if the connection remains open, and we aren't using chunked, and
349 # a content-length was not provided, then assume that the connection
350 # WILL close.
351 if not self.will_close and \
352 not self.chunked and \
353 self.length is None:
354 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000355
Greg Steindd6eefb2000-07-18 09:09:48 +0000356 def close(self):
357 if self.fp:
358 self.fp.close()
359 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000360
Greg Steindd6eefb2000-07-18 09:09:48 +0000361 def isclosed(self):
362 # NOTE: it is possible that we will not ever call self.close(). This
363 # case occurs when will_close is TRUE, length is None, and we
364 # read up to the last byte, but NOT past it.
365 #
366 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
367 # called, meaning self.isclosed() is meaningful.
368 return self.fp is None
369
370 def read(self, amt=None):
371 if self.fp is None:
372 return ''
373
374 if self.chunked:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000375 assert self.chunked != _UNKNOWN
Greg Steindd6eefb2000-07-18 09:09:48 +0000376 chunk_left = self.chunk_left
377 value = ''
378 while 1:
379 if chunk_left is None:
380 line = self.fp.readline()
Guido van Rossum34735a62000-12-15 15:09:42 +0000381 i = line.find(';')
Greg Steindd6eefb2000-07-18 09:09:48 +0000382 if i >= 0:
Tim Peters07e99cb2001-01-14 23:47:14 +0000383 line = line[:i] # strip chunk-extensions
Guido van Rossum34735a62000-12-15 15:09:42 +0000384 chunk_left = int(line, 16)
Greg Steindd6eefb2000-07-18 09:09:48 +0000385 if chunk_left == 0:
386 break
387 if amt is None:
388 value = value + self._safe_read(chunk_left)
389 elif amt < chunk_left:
390 value = value + self._safe_read(amt)
391 self.chunk_left = chunk_left - amt
392 return value
393 elif amt == chunk_left:
394 value = value + self._safe_read(amt)
Tim Peters07e99cb2001-01-14 23:47:14 +0000395 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000396 self.chunk_left = None
397 return value
398 else:
399 value = value + self._safe_read(chunk_left)
400 amt = amt - chunk_left
401
402 # we read the whole chunk, get another
Tim Peters07e99cb2001-01-14 23:47:14 +0000403 self._safe_read(2) # toss the CRLF at the end of the chunk
Greg Steindd6eefb2000-07-18 09:09:48 +0000404 chunk_left = None
405
406 # read and discard trailer up to the CRLF terminator
407 ### note: we shouldn't have any trailers!
408 while 1:
409 line = self.fp.readline()
410 if line == '\r\n':
411 break
412
413 # we read everything; close the "file"
414 self.close()
415
416 return value
417
418 elif amt is None:
419 # unbounded read
420 if self.will_close:
421 s = self.fp.read()
422 else:
423 s = self._safe_read(self.length)
Tim Peters07e99cb2001-01-14 23:47:14 +0000424 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000425 return s
426
427 if self.length is not None:
428 if amt > self.length:
429 # clip the read to the "end of response"
430 amt = self.length
431 self.length = self.length - amt
432
433 # we do not use _safe_read() here because this may be a .will_close
434 # connection, and the user is reading more bytes than will be provided
435 # (for example, reading in 1k chunks)
436 s = self.fp.read(amt)
437
Greg Steindd6eefb2000-07-18 09:09:48 +0000438 return s
439
440 def _safe_read(self, amt):
441 """Read the number of bytes requested, compensating for partial reads.
442
443 Normally, we have a blocking socket, but a read() can be interrupted
444 by a signal (resulting in a partial read).
445
446 Note that we cannot distinguish between EOF and an interrupt when zero
447 bytes have been read. IncompleteRead() will be raised in this
448 situation.
449
450 This function should be used when <amt> bytes "should" be present for
451 reading. If the bytes are truly not available (due to EOF), then the
452 IncompleteRead exception can be used to detect the problem.
453 """
454 s = ''
455 while amt > 0:
456 chunk = self.fp.read(amt)
457 if not chunk:
458 raise IncompleteRead(s)
459 s = s + chunk
460 amt = amt - len(chunk)
461 return s
462
463 def getheader(self, name, default=None):
464 if self.msg is None:
465 raise ResponseNotReady()
466 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000467
468
469class HTTPConnection:
470
Greg Steindd6eefb2000-07-18 09:09:48 +0000471 _http_vsn = 11
472 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000473
Greg Steindd6eefb2000-07-18 09:09:48 +0000474 response_class = HTTPResponse
475 default_port = HTTP_PORT
476 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000477 debuglevel = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000478 strict = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000479
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000480 def __init__(self, host, port=None, strict=None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000481 self.sock = None
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000482 self._buffer = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000483 self.__response = None
484 self.__state = _CS_IDLE
Tim Petersc411dba2002-07-16 21:35:23 +0000485
Greg Steindd6eefb2000-07-18 09:09:48 +0000486 self._set_hostport(host, port)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000487 if strict is not None:
488 self.strict = strict
Greg Stein5e0fa402000-06-26 08:28:01 +0000489
Greg Steindd6eefb2000-07-18 09:09:48 +0000490 def _set_hostport(self, host, port):
491 if port is None:
Guido van Rossum34735a62000-12-15 15:09:42 +0000492 i = host.find(':')
Greg Steindd6eefb2000-07-18 09:09:48 +0000493 if i >= 0:
Skip Montanaro9d389972002-03-24 16:53:50 +0000494 try:
495 port = int(host[i+1:])
496 except ValueError:
Jeremy Hyltonfbd79942002-07-02 20:19:08 +0000497 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000498 host = host[:i]
499 else:
500 port = self.default_port
501 self.host = host
502 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000503
Jeremy Hylton30f86742000-09-18 22:50:38 +0000504 def set_debuglevel(self, level):
505 self.debuglevel = level
506
Greg Steindd6eefb2000-07-18 09:09:48 +0000507 def connect(self):
508 """Connect to the host and port specified in __init__."""
Martin v. Löwis2ad25692001-07-31 08:40:21 +0000509 msg = "getaddrinfo returns an empty list"
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000510 for res in socket.getaddrinfo(self.host, self.port, 0,
511 socket.SOCK_STREAM):
Martin v. Löwis4eb59402001-07-26 13:37:33 +0000512 af, socktype, proto, canonname, sa = res
513 try:
514 self.sock = socket.socket(af, socktype, proto)
515 if self.debuglevel > 0:
516 print "connect: (%s, %s)" % (self.host, self.port)
517 self.sock.connect(sa)
518 except socket.error, msg:
519 if self.debuglevel > 0:
520 print 'connect fail:', (self.host, self.port)
Martin v. Löwis322c0d12001-10-07 08:53:32 +0000521 if self.sock:
522 self.sock.close()
Martin v. Löwis4eb59402001-07-26 13:37:33 +0000523 self.sock = None
524 continue
525 break
526 if not self.sock:
527 raise socket.error, msg
Greg Stein5e0fa402000-06-26 08:28:01 +0000528
Greg Steindd6eefb2000-07-18 09:09:48 +0000529 def close(self):
530 """Close the connection to the HTTP server."""
531 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000532 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000533 self.sock = None
534 if self.__response:
535 self.__response.close()
536 self.__response = None
537 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000538
Greg Steindd6eefb2000-07-18 09:09:48 +0000539 def send(self, str):
540 """Send `str' to the server."""
541 if self.sock is None:
542 if self.auto_open:
543 self.connect()
544 else:
545 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000546
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000547 # send the data to the server. if we get a broken pipe, then close
Greg Steindd6eefb2000-07-18 09:09:48 +0000548 # the socket. we want to reconnect when somebody tries to send again.
549 #
550 # NOTE: we DO propagate the error, though, because we cannot simply
551 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000552 if self.debuglevel > 0:
553 print "send:", repr(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000554 try:
Martin v. Löwise12454f2002-02-16 23:06:19 +0000555 self.sock.sendall(str)
Greg Steindd6eefb2000-07-18 09:09:48 +0000556 except socket.error, v:
Tim Peters07e99cb2001-01-14 23:47:14 +0000557 if v[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000558 self.close()
559 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000560
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000561 def _output(self, s):
562 """Add a line of output to the current request buffer.
Tim Peters469cdad2002-08-08 20:19:19 +0000563
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000564 Assumes that the line does *not* end with \\r\\n.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000565 """
566 self._buffer.append(s)
567
568 def _send_output(self):
569 """Send the currently buffered request and clear the buffer.
570
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000571 Appends an extra \\r\\n to the buffer.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000572 """
573 self._buffer.extend(("", ""))
574 msg = "\r\n".join(self._buffer)
575 del self._buffer[:]
576 self.send(msg)
577
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000578 def putrequest(self, method, url, skip_host=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000579 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000580
Greg Steindd6eefb2000-07-18 09:09:48 +0000581 `method' specifies an HTTP request method, e.g. 'GET'.
582 `url' specifies the object being requested, e.g. '/index.html'.
583 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000584
Greg Steindd6eefb2000-07-18 09:09:48 +0000585 # check if a prior response has been completed
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000586 # XXX What if it hasn't?
Greg Steindd6eefb2000-07-18 09:09:48 +0000587 if self.__response and self.__response.isclosed():
588 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000589
Greg Steindd6eefb2000-07-18 09:09:48 +0000590 #
591 # in certain cases, we cannot issue another request on this connection.
592 # this occurs when:
593 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
594 # 2) a response to a previous request has signalled that it is going
595 # to close the connection upon completion.
596 # 3) the headers for the previous response have not been read, thus
597 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
598 #
599 # if there is no prior response, then we can request at will.
600 #
601 # if point (2) is true, then we will have passed the socket to the
602 # response (effectively meaning, "there is no prior response"), and
603 # will open a new one when a new request is made.
604 #
605 # Note: if a prior response exists, then we *can* start a new request.
606 # We are not allowed to begin fetching the response to this new
607 # request, however, until that prior response is complete.
608 #
609 if self.__state == _CS_IDLE:
610 self.__state = _CS_REQ_STARTED
611 else:
612 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000613
Greg Steindd6eefb2000-07-18 09:09:48 +0000614 if not url:
615 url = '/'
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000616 str = '%s %s %s' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000617
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000618 self._output(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000619
Greg Steindd6eefb2000-07-18 09:09:48 +0000620 if self._http_vsn == 11:
621 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000622
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000623 if not skip_host:
624 # this header is issued *only* for HTTP/1.1
625 # connections. more specifically, this means it is
626 # only issued when the client uses the new
627 # HTTPConnection() class. backwards-compat clients
628 # will be using HTTP/1.0 and those clients may be
629 # issuing this header themselves. we should NOT issue
630 # it twice; some web servers (such as Apache) barf
631 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000632
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000633 # If we need a non-standard port,include it in the
634 # header. If the request is going through a proxy,
635 # but the host of the actual URL, not the host of the
636 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +0000637
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000638 netloc = ''
639 if url.startswith('http'):
640 nil, netloc, nil, nil, nil = urlsplit(url)
641
642 if netloc:
643 self.putheader('Host', netloc)
644 elif self.port == HTTP_PORT:
645 self.putheader('Host', self.host)
646 else:
647 self.putheader('Host', "%s:%s" % (self.host, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000648
Greg Steindd6eefb2000-07-18 09:09:48 +0000649 # note: we are assuming that clients will not attempt to set these
650 # headers since *this* library must deal with the
651 # consequences. this also means that when the supporting
652 # libraries are updated to recognize other forms, then this
653 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000654
Greg Steindd6eefb2000-07-18 09:09:48 +0000655 # we only want a Content-Encoding of "identity" since we don't
656 # support encodings such as x-gzip or x-deflate.
657 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000658
Greg Steindd6eefb2000-07-18 09:09:48 +0000659 # we can accept "chunked" Transfer-Encodings, but no others
660 # NOTE: no TE header implies *only* "chunked"
661 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000662
Greg Steindd6eefb2000-07-18 09:09:48 +0000663 # if TE is supplied in the header, then it must appear in a
664 # Connection header.
665 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000666
Greg Steindd6eefb2000-07-18 09:09:48 +0000667 else:
668 # For HTTP/1.0, the server will assume "not chunked"
669 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000670
Greg Steindd6eefb2000-07-18 09:09:48 +0000671 def putheader(self, header, value):
672 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000673
Greg Steindd6eefb2000-07-18 09:09:48 +0000674 For example: h.putheader('Accept', 'text/html')
675 """
676 if self.__state != _CS_REQ_STARTED:
677 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000678
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000679 str = '%s: %s' % (header, value)
680 self._output(str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000681
Greg Steindd6eefb2000-07-18 09:09:48 +0000682 def endheaders(self):
683 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000684
Greg Steindd6eefb2000-07-18 09:09:48 +0000685 if self.__state == _CS_REQ_STARTED:
686 self.__state = _CS_REQ_SENT
687 else:
688 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000689
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000690 self._send_output()
Greg Stein5e0fa402000-06-26 08:28:01 +0000691
Greg Steindd6eefb2000-07-18 09:09:48 +0000692 def request(self, method, url, body=None, headers={}):
693 """Send a complete request to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000694
Greg Steindd6eefb2000-07-18 09:09:48 +0000695 try:
696 self._send_request(method, url, body, headers)
697 except socket.error, v:
698 # trap 'Broken pipe' if we're allowed to automatically reconnect
699 if v[0] != 32 or not self.auto_open:
700 raise
701 # try one more time
702 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000703
Greg Steindd6eefb2000-07-18 09:09:48 +0000704 def _send_request(self, method, url, body, headers):
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000705 # If headers already contains a host header, then define the
706 # optional skip_host argument to putrequest(). The check is
707 # harder because field names are case insensitive.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000708 if 'Host' in (headers
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000709 or [k for k in headers.iterkeys() if k.lower() == "host"]):
710 self.putrequest(method, url, skip_host=1)
711 else:
712 self.putrequest(method, url)
Greg Stein5e0fa402000-06-26 08:28:01 +0000713
Greg Steindd6eefb2000-07-18 09:09:48 +0000714 if body:
715 self.putheader('Content-Length', str(len(body)))
716 for hdr, value in headers.items():
717 self.putheader(hdr, value)
718 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000719
Greg Steindd6eefb2000-07-18 09:09:48 +0000720 if body:
721 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000722
Greg Steindd6eefb2000-07-18 09:09:48 +0000723 def getresponse(self):
724 "Get the response from the server."
Greg Stein5e0fa402000-06-26 08:28:01 +0000725
Greg Steindd6eefb2000-07-18 09:09:48 +0000726 # check if a prior response has been completed
727 if self.__response and self.__response.isclosed():
728 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000729
Greg Steindd6eefb2000-07-18 09:09:48 +0000730 #
731 # if a prior response exists, then it must be completed (otherwise, we
732 # cannot read this response's header to determine the connection-close
733 # behavior)
734 #
735 # note: if a prior response existed, but was connection-close, then the
736 # socket and response were made independent of this HTTPConnection
737 # object since a new request requires that we open a whole new
738 # connection
739 #
740 # this means the prior response had one of two states:
741 # 1) will_close: this connection was reset and the prior socket and
742 # response operate independently
743 # 2) persistent: the response was retained and we await its
744 # isclosed() status to become true.
745 #
746 if self.__state != _CS_REQ_SENT or self.__response:
747 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000748
Jeremy Hylton30f86742000-09-18 22:50:38 +0000749 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000750 response = self.response_class(self.sock, self.debuglevel,
751 strict=self.strict)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000752 else:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000753 response = self.response_class(self.sock, strict=self.strict)
Greg Stein5e0fa402000-06-26 08:28:01 +0000754
Jeremy Hylton39c03802002-07-12 14:04:09 +0000755 response.begin()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000756 assert response.will_close != _UNKNOWN
Greg Steindd6eefb2000-07-18 09:09:48 +0000757 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000758
Greg Steindd6eefb2000-07-18 09:09:48 +0000759 if response.will_close:
760 # this effectively passes the connection to the response
761 self.close()
762 else:
763 # remember this, so we can tell when it is complete
764 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000765
Greg Steindd6eefb2000-07-18 09:09:48 +0000766 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000767
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000768# The next several classes are used to define FakeSocket,a socket-like
769# interface to an SSL connection.
770
771# The primary complexity comes from faking a makefile() method. The
772# standard socket makefile() implementation calls dup() on the socket
773# file descriptor. As a consequence, clients can call close() on the
774# parent socket and its makefile children in any order. The underlying
775# socket isn't closed until they are all closed.
776
777# The implementation uses reference counting to keep the socket open
778# until the last client calls close(). SharedSocket keeps track of
779# the reference counting and SharedSocketClient provides an constructor
780# and close() method that call incref() and decref() correctly.
781
782class SharedSocket:
783
784 def __init__(self, sock):
785 self.sock = sock
786 self._refcnt = 0
787
788 def incref(self):
789 self._refcnt += 1
790
791 def decref(self):
792 self._refcnt -= 1
793 assert self._refcnt >= 0
794 if self._refcnt == 0:
795 self.sock.close()
796
797 def __del__(self):
798 self.sock.close()
799
800class SharedSocketClient:
801
802 def __init__(self, shared):
803 self._closed = 0
804 self._shared = shared
805 self._shared.incref()
806 self._sock = shared.sock
807
808 def close(self):
809 if not self._closed:
810 self._shared.decref()
811 self._closed = 1
812 self._shared = None
813
814class SSLFile(SharedSocketClient):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000815 """File-like object wrapping an SSL socket."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000816
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000817 BUFSIZE = 8192
Tim Petersc411dba2002-07-16 21:35:23 +0000818
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000819 def __init__(self, sock, ssl, bufsize=None):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000820 SharedSocketClient.__init__(self, sock)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000821 self._ssl = ssl
822 self._buf = ''
823 self._bufsize = bufsize or self.__class__.BUFSIZE
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000824
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000825 def _read(self):
826 buf = ''
827 # put in a loop so that we retry on transient errors
Greg Steindd6eefb2000-07-18 09:09:48 +0000828 while 1:
829 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000830 buf = self._ssl.read(self._bufsize)
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000831 except socket.sslerror, err:
832 if (err[0] == socket.SSL_ERROR_WANT_READ
Neal Norwitz22c5d772002-02-11 17:59:51 +0000833 or err[0] == socket.SSL_ERROR_WANT_WRITE):
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000834 continue
Martin v. Löwis6af3e2d2002-04-20 07:47:40 +0000835 if (err[0] == socket.SSL_ERROR_ZERO_RETURN
836 or err[0] == socket.SSL_ERROR_EOF):
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000837 break
838 raise
839 except socket.error, err:
Tim Petersf3623f32001-10-11 18:15:51 +0000840 if err[0] == errno.EINTR:
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000841 continue
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000842 if err[0] == errno.EBADF:
843 # XXX socket was closed?
844 break
Jeremy Hylton6459c8d2001-10-11 17:47:22 +0000845 raise
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000846 else:
Jeremy Hylton42dd01a2001-02-01 23:35:20 +0000847 break
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000848 return buf
849
850 def read(self, size=None):
851 L = [self._buf]
852 avail = len(self._buf)
853 while size is None or avail < size:
854 s = self._read()
855 if s == '':
856 break
857 L.append(s)
858 avail += len(s)
859 all = "".join(L)
860 if size is None:
861 self._buf = ''
862 return all
863 else:
864 self._buf = all[size:]
865 return all[:size]
866
867 def readline(self):
868 L = [self._buf]
869 self._buf = ''
870 while 1:
871 i = L[-1].find("\n")
872 if i >= 0:
873 break
874 s = self._read()
875 if s == '':
876 break
877 L.append(s)
878 if i == -1:
879 # loop exited because there is no more data
880 return "".join(L)
881 else:
882 all = "".join(L)
883 # XXX could do enough bookkeeping not to do a 2nd search
884 i = all.find("\n") + 1
885 line = all[:i]
886 self._buf = all[i:]
887 return line
888
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000889class FakeSocket(SharedSocketClient):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000890
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000891 class _closedsocket:
892 def __getattr__(self, name):
893 raise error(9, 'Bad file descriptor')
894
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000895 def __init__(self, sock, ssl):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000896 sock = SharedSocket(sock)
897 SharedSocketClient.__init__(self, sock)
898 self._ssl = ssl
899
900 def close(self):
901 SharedSocketClient.close(self)
902 self._sock = self.__class__._closedsocket()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000903
904 def makefile(self, mode, bufsize=None):
905 if mode != 'r' and mode != 'rb':
906 raise UnimplementedFileMode()
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000907 return SSLFile(self._shared, self._ssl, bufsize)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000908
Greg Steindd6eefb2000-07-18 09:09:48 +0000909 def send(self, stuff, flags = 0):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000910 return self._ssl.write(stuff)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000911
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000912 sendall = send
Andrew M. Kuchlinga3c0b932002-03-18 22:51:48 +0000913
Greg Steindd6eefb2000-07-18 09:09:48 +0000914 def recv(self, len = 1024, flags = 0):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000915 return self._ssl.read(len)
Guido van Rossum23acc951994-02-21 16:36:04 +0000916
Greg Steindd6eefb2000-07-18 09:09:48 +0000917 def __getattr__(self, attr):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000918 return getattr(self._sock, attr)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +0000919
Guido van Rossum23acc951994-02-21 16:36:04 +0000920
Greg Stein5e0fa402000-06-26 08:28:01 +0000921class HTTPSConnection(HTTPConnection):
Greg Steindd6eefb2000-07-18 09:09:48 +0000922 "This class allows communication via SSL."
Greg Stein5e0fa402000-06-26 08:28:01 +0000923
Greg Steindd6eefb2000-07-18 09:09:48 +0000924 default_port = HTTPS_PORT
Greg Stein5e0fa402000-06-26 08:28:01 +0000925
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000926 def __init__(self, host, port=None, key_file=None, cert_file=None,
927 strict=None):
928 HTTPConnection.__init__(self, host, port, strict)
Jeremy Hylton7c75c992002-06-28 23:38:14 +0000929 self.key_file = key_file
930 self.cert_file = cert_file
Greg Stein5e0fa402000-06-26 08:28:01 +0000931
Greg Steindd6eefb2000-07-18 09:09:48 +0000932 def connect(self):
933 "Connect to a host on a given (SSL) port."
Greg Stein5e0fa402000-06-26 08:28:01 +0000934
Greg Steindd6eefb2000-07-18 09:09:48 +0000935 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
936 sock.connect((self.host, self.port))
Guido van Rossum0aee7222000-12-11 20:32:20 +0000937 realsock = sock
938 if hasattr(sock, "_sock"):
939 realsock = sock._sock
940 ssl = socket.ssl(realsock, self.key_file, self.cert_file)
Greg Steindd6eefb2000-07-18 09:09:48 +0000941 self.sock = FakeSocket(sock, ssl)
Greg Stein5e0fa402000-06-26 08:28:01 +0000942
943
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000944class HTTP:
Greg Steindd6eefb2000-07-18 09:09:48 +0000945 "Compatibility class with httplib.py from 1.5."
Greg Stein5e0fa402000-06-26 08:28:01 +0000946
Greg Steindd6eefb2000-07-18 09:09:48 +0000947 _http_vsn = 10
948 _http_vsn_str = 'HTTP/1.0'
Greg Stein5e0fa402000-06-26 08:28:01 +0000949
Greg Steindd6eefb2000-07-18 09:09:48 +0000950 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000951
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000952 _connection_class = HTTPConnection
953
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000954 def __init__(self, host='', port=None, strict=None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000955 "Provide a default host, since the superclass requires one."
Greg Stein5e0fa402000-06-26 08:28:01 +0000956
Greg Steindd6eefb2000-07-18 09:09:48 +0000957 # some joker passed 0 explicitly, meaning default port
958 if port == 0:
959 port = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000960
Greg Steindd6eefb2000-07-18 09:09:48 +0000961 # Note that we may pass an empty string as the host; this will throw
962 # an error when we attempt to connect. Presumably, the client code
963 # will call connect before then, with a proper host.
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000964 self._setup(self._connection_class(host, port, strict))
Greg Stein5e0fa402000-06-26 08:28:01 +0000965
Greg Stein81937a42001-08-18 09:20:23 +0000966 def _setup(self, conn):
967 self._conn = conn
968
969 # set up delegation to flesh out interface
970 self.send = conn.send
971 self.putrequest = conn.putrequest
972 self.endheaders = conn.endheaders
973 self.set_debuglevel = conn.set_debuglevel
974
975 conn._http_vsn = self._http_vsn
976 conn._http_vsn_str = self._http_vsn_str
Greg Stein5e0fa402000-06-26 08:28:01 +0000977
Greg Steindd6eefb2000-07-18 09:09:48 +0000978 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000979
Greg Steindd6eefb2000-07-18 09:09:48 +0000980 def connect(self, host=None, port=None):
981 "Accept arguments to set the host/port, since the superclass doesn't."
Greg Stein5e0fa402000-06-26 08:28:01 +0000982
Greg Steindd6eefb2000-07-18 09:09:48 +0000983 if host is not None:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +0000984 self._conn._set_hostport(host, port)
985 self._conn.connect()
Greg Stein5e0fa402000-06-26 08:28:01 +0000986
Greg Steindd6eefb2000-07-18 09:09:48 +0000987 def getfile(self):
988 "Provide a getfile, since the superclass' does not use this concept."
989 return self.file
Greg Stein5e0fa402000-06-26 08:28:01 +0000990
Greg Steindd6eefb2000-07-18 09:09:48 +0000991 def putheader(self, header, *values):
992 "The superclass allows only one value argument."
Guido van Rossum34735a62000-12-15 15:09:42 +0000993 self._conn.putheader(header, '\r\n\t'.join(values))
Greg Stein5e0fa402000-06-26 08:28:01 +0000994
Greg Steindd6eefb2000-07-18 09:09:48 +0000995 def getreply(self):
996 """Compat definition since superclass does not define it.
Greg Stein5e0fa402000-06-26 08:28:01 +0000997
Greg Steindd6eefb2000-07-18 09:09:48 +0000998 Returns a tuple consisting of:
999 - server status code (e.g. '200' if all goes well)
1000 - server "reason" corresponding to status code
1001 - any RFC822 headers in the response from the server
1002 """
1003 try:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001004 response = self._conn.getresponse()
Greg Steindd6eefb2000-07-18 09:09:48 +00001005 except BadStatusLine, e:
1006 ### hmm. if getresponse() ever closes the socket on a bad request,
1007 ### then we are going to have problems with self.sock
Greg Stein5e0fa402000-06-26 08:28:01 +00001008
Greg Steindd6eefb2000-07-18 09:09:48 +00001009 ### should we keep this behavior? do people use it?
1010 # keep the socket open (as a file), and return it
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001011 self.file = self._conn.sock.makefile('rb', 0)
Greg Stein5e0fa402000-06-26 08:28:01 +00001012
Greg Steindd6eefb2000-07-18 09:09:48 +00001013 # close our socket -- we want to restart after any protocol error
1014 self.close()
Greg Stein5e0fa402000-06-26 08:28:01 +00001015
Greg Steindd6eefb2000-07-18 09:09:48 +00001016 self.headers = None
1017 return -1, e.line, None
Greg Stein5e0fa402000-06-26 08:28:01 +00001018
Greg Steindd6eefb2000-07-18 09:09:48 +00001019 self.headers = response.msg
1020 self.file = response.fp
1021 return response.status, response.reason, response.msg
Greg Stein5e0fa402000-06-26 08:28:01 +00001022
Greg Steindd6eefb2000-07-18 09:09:48 +00001023 def close(self):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001024 self._conn.close()
Greg Stein5e0fa402000-06-26 08:28:01 +00001025
Greg Steindd6eefb2000-07-18 09:09:48 +00001026 # note that self.file == response.fp, which gets closed by the
1027 # superclass. just clear the object ref here.
1028 ### hmm. messy. if status==-1, then self.file is owned by us.
1029 ### well... we aren't explicitly closing, but losing this ref will
1030 ### do it
1031 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001032
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001033if hasattr(socket, 'ssl'):
1034 class HTTPS(HTTP):
1035 """Compatibility with 1.5 httplib interface
1036
1037 Python 1.5.2 did not have an HTTPS class, but it defined an
1038 interface for sending http requests that is also useful for
Tim Peters5ceadc82001-01-13 19:16:21 +00001039 https.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001040 """
1041
Martin v. Löwisd7bf9742000-09-21 22:09:47 +00001042 _connection_class = HTTPSConnection
Tim Peters5ceadc82001-01-13 19:16:21 +00001043
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001044 def __init__(self, host='', port=None, key_file=None, cert_file=None,
1045 strict=None):
Greg Stein81937a42001-08-18 09:20:23 +00001046 # provide a default host, pass the X509 cert info
1047
1048 # urf. compensate for bad input.
1049 if port == 0:
1050 port = None
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001051 self._setup(self._connection_class(host, port, key_file,
1052 cert_file, strict))
Greg Stein81937a42001-08-18 09:20:23 +00001053
1054 # we never actually use these for anything, but we keep them
1055 # here for compatibility with post-1.5.2 CVS.
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001056 self.key_file = key_file
1057 self.cert_file = cert_file
Greg Stein81937a42001-08-18 09:20:23 +00001058
Greg Stein5e0fa402000-06-26 08:28:01 +00001059
1060class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001061 # Subclasses that define an __init__ must call Exception.__init__
1062 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +00001063 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001064
1065class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001066 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001067
Skip Montanaro9d389972002-03-24 16:53:50 +00001068class InvalidURL(HTTPException):
1069 pass
1070
Greg Stein5e0fa402000-06-26 08:28:01 +00001071class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001072 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001073 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001074 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001075
1076class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001077 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001078
Greg Stein5e0fa402000-06-26 08:28:01 +00001079class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001080 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001081
1082class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001083 def __init__(self, partial):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001084 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001085 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +00001086
1087class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001088 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001089
1090class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001091 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001092
1093class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001094 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001095
1096class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001097 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001098
1099class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001100 def __init__(self, line):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001101 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001102 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001103
1104# for backwards compatibility
1105error = HTTPException
1106
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001107class LineAndFileWrapper:
1108 """A limited file-like object for HTTP/0.9 responses."""
1109
1110 # The status-line parsing code calls readline(), which normally
1111 # get the HTTP status line. For a 0.9 response, however, this is
1112 # actually the first line of the body! Clients need to get a
1113 # readable file object that contains that line.
1114
1115 def __init__(self, line, file):
1116 self._line = line
1117 self._file = file
1118 self._line_consumed = 0
1119 self._line_offset = 0
1120 self._line_left = len(line)
1121
1122 def __getattr__(self, attr):
1123 return getattr(self._file, attr)
1124
1125 def _done(self):
1126 # called when the last byte is read from the line. After the
1127 # call, all read methods are delegated to the underlying file
1128 # obhect.
1129 self._line_consumed = 1
1130 self.read = self._file.read
1131 self.readline = self._file.readline
1132 self.readlines = self._file.readlines
1133
1134 def read(self, amt=None):
1135 assert not self._line_consumed and self._line_left
1136 if amt is None or amt > self._line_left:
1137 s = self._line[self._line_offset:]
1138 self._done()
1139 if amt is None:
1140 return s + self._file.read()
1141 else:
Tim Petersc411dba2002-07-16 21:35:23 +00001142 return s + self._file.read(amt - len(s))
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001143 else:
1144 assert amt <= self._line_left
1145 i = self._line_offset
1146 j = i + amt
1147 s = self._line[i:j]
1148 self._line_offset = j
1149 self._line_left -= amt
1150 if self._line_left == 0:
1151 self._done()
1152 return s
Tim Petersc411dba2002-07-16 21:35:23 +00001153
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001154 def readline(self):
1155 s = self._line[self._line_offset:]
1156 self._done()
1157 return s
1158
1159 def readlines(self, size=None):
1160 L = [self._line[self._line_offset:]]
1161 self._done()
1162 if size is None:
1163 return L + self._file.readlines()
1164 else:
1165 return L + self._file.readlines(size)
Greg Stein5e0fa402000-06-26 08:28:01 +00001166
Guido van Rossum23acc951994-02-21 16:36:04 +00001167def test():
Guido van Rossum41999c11997-12-09 00:12:23 +00001168 """Test this module.
1169
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001170 A hodge podge of tests collected here, because they have too many
1171 external dependencies for the regular test suite.
Guido van Rossum41999c11997-12-09 00:12:23 +00001172 """
Greg Stein5e0fa402000-06-26 08:28:01 +00001173
Guido van Rossum41999c11997-12-09 00:12:23 +00001174 import sys
1175 import getopt
1176 opts, args = getopt.getopt(sys.argv[1:], 'd')
1177 dl = 0
1178 for o, a in opts:
1179 if o == '-d': dl = dl + 1
1180 host = 'www.python.org'
1181 selector = '/'
1182 if args[0:]: host = args[0]
1183 if args[1:]: selector = args[1]
1184 h = HTTP()
1185 h.set_debuglevel(dl)
1186 h.connect(host)
1187 h.putrequest('GET', selector)
1188 h.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +00001189 status, reason, headers = h.getreply()
1190 print 'status =', status
1191 print 'reason =', reason
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001192 print "read", len(h.getfile().read())
Guido van Rossum41999c11997-12-09 00:12:23 +00001193 print
1194 if headers:
Guido van Rossum34735a62000-12-15 15:09:42 +00001195 for header in headers.headers: print header.strip()
Guido van Rossum41999c11997-12-09 00:12:23 +00001196 print
Greg Stein5e0fa402000-06-26 08:28:01 +00001197
Jeremy Hylton8acf1e02002-03-08 19:35:51 +00001198 # minimal test that code to extract host from url works
1199 class HTTP11(HTTP):
1200 _http_vsn = 11
1201 _http_vsn_str = 'HTTP/1.1'
1202
1203 h = HTTP11('www.python.org')
1204 h.putrequest('GET', 'http://www.python.org/~jeremy/')
1205 h.endheaders()
1206 h.getreply()
1207 h.close()
1208
Greg Stein5e0fa402000-06-26 08:28:01 +00001209 if hasattr(socket, 'ssl'):
Tim Petersc411dba2002-07-16 21:35:23 +00001210
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001211 for host, selector in (('sourceforge.net', '/projects/python'),
1212 ('dbserv2.theopalgroup.com', '/mediumfile'),
1213 ('dbserv2.theopalgroup.com', '/smallfile'),
1214 ):
1215 print "https://%s%s" % (host, selector)
1216 hs = HTTPS()
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001217 hs.set_debuglevel(dl)
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001218 hs.connect(host)
1219 hs.putrequest('GET', selector)
1220 hs.endheaders()
1221 status, reason, headers = hs.getreply()
1222 print 'status =', status
1223 print 'reason =', reason
1224 print "read", len(hs.getfile().read())
1225 print
1226 if headers:
1227 for header in headers.headers: print header.strip()
1228 print
Guido van Rossum23acc951994-02-21 16:36:04 +00001229
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001230 # Test a buggy server -- returns garbled status line.
1231 # http://www.yahoo.com/promotions/mom_com97/supermom.html
1232 c = HTTPConnection("promotions.yahoo.com")
1233 c.set_debuglevel(1)
1234 c.connect()
1235 c.request("GET", "/promotions/mom_com97/supermom.html")
1236 r = c.getresponse()
1237 print r.status, r.version
1238 lines = r.read().split("\n")
1239 print "\n".join(lines[:5])
1240
1241 c = HTTPConnection("promotions.yahoo.com", strict=1)
1242 c.set_debuglevel(1)
1243 c.connect()
1244 c.request("GET", "/promotions/mom_com97/supermom.html")
1245 try:
1246 r = c.getresponse()
1247 except BadStatusLine, err:
1248 print "strict mode failed as expected"
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001249 print err
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001250 else:
1251 print "XXX strict mode should have failed"
1252
1253 for strict in 0, 1:
1254 h = HTTP(strict=strict)
1255 h.connect("promotions.yahoo.com")
1256 h.putrequest('GET', "/promotions/mom_com97/supermom.html")
1257 h.endheaders()
1258 status, reason, headers = h.getreply()
1259 assert (strict and status == -1) or status == 200, (strict, status)
1260
Guido van Rossum23acc951994-02-21 16:36:04 +00001261if __name__ == '__main__':
Guido van Rossum41999c11997-12-09 00:12:23 +00001262 test()