blob: 638a92b039d1eead41edb9b5626ab787a573f182 [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006HTTPConnection goes through a number of "states", which define when a client
Greg Stein5e0fa402000-06-26 08:28:01 +00007may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Jeremy Hyltonfb35f652007-08-03 20:30:33 +000069import io
Guido van Rossum65ab98c1995-08-07 20:13:02 +000070import mimetools
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000071import socket
Jeremy Hylton8acf1e02002-03-08 19:35:51 +000072from urlparse import urlsplit
Thomas Wouters89d996e2007-09-08 17:39:28 +000073import warnings
Guido van Rossum23acc951994-02-21 16:36:04 +000074
Thomas Wouters47b49bf2007-08-30 22:15:33 +000075__all__ = ["HTTPResponse", "HTTPConnection",
Skip Montanaro951a8842001-06-01 16:25:38 +000076 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000077 "UnknownTransferEncoding", "UnimplementedFileMode",
78 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
79 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
Georg Brandl6aab16e2006-02-17 19:17:25 +000080 "BadStatusLine", "error", "responses"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000081
Guido van Rossum23acc951994-02-21 16:36:04 +000082HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000083HTTPS_PORT = 443
84
Greg Stein5e0fa402000-06-26 08:28:01 +000085_UNKNOWN = 'UNKNOWN'
86
87# connection states
88_CS_IDLE = 'Idle'
89_CS_REQ_STARTED = 'Request-started'
90_CS_REQ_SENT = 'Request-sent'
91
Martin v. Löwis39a31782004-09-18 09:03:49 +000092# status codes
93# informational
94CONTINUE = 100
95SWITCHING_PROTOCOLS = 101
96PROCESSING = 102
97
98# successful
99OK = 200
100CREATED = 201
101ACCEPTED = 202
102NON_AUTHORITATIVE_INFORMATION = 203
103NO_CONTENT = 204
104RESET_CONTENT = 205
105PARTIAL_CONTENT = 206
106MULTI_STATUS = 207
107IM_USED = 226
108
109# redirection
110MULTIPLE_CHOICES = 300
111MOVED_PERMANENTLY = 301
112FOUND = 302
113SEE_OTHER = 303
114NOT_MODIFIED = 304
115USE_PROXY = 305
116TEMPORARY_REDIRECT = 307
117
118# client error
119BAD_REQUEST = 400
120UNAUTHORIZED = 401
121PAYMENT_REQUIRED = 402
122FORBIDDEN = 403
123NOT_FOUND = 404
124METHOD_NOT_ALLOWED = 405
125NOT_ACCEPTABLE = 406
126PROXY_AUTHENTICATION_REQUIRED = 407
127REQUEST_TIMEOUT = 408
128CONFLICT = 409
129GONE = 410
130LENGTH_REQUIRED = 411
131PRECONDITION_FAILED = 412
132REQUEST_ENTITY_TOO_LARGE = 413
133REQUEST_URI_TOO_LONG = 414
134UNSUPPORTED_MEDIA_TYPE = 415
135REQUESTED_RANGE_NOT_SATISFIABLE = 416
136EXPECTATION_FAILED = 417
137UNPROCESSABLE_ENTITY = 422
138LOCKED = 423
139FAILED_DEPENDENCY = 424
140UPGRADE_REQUIRED = 426
141
142# server error
143INTERNAL_SERVER_ERROR = 500
144NOT_IMPLEMENTED = 501
145BAD_GATEWAY = 502
146SERVICE_UNAVAILABLE = 503
147GATEWAY_TIMEOUT = 504
148HTTP_VERSION_NOT_SUPPORTED = 505
149INSUFFICIENT_STORAGE = 507
150NOT_EXTENDED = 510
151
Georg Brandl6aab16e2006-02-17 19:17:25 +0000152# Mapping status codes to official W3C names
153responses = {
154 100: 'Continue',
155 101: 'Switching Protocols',
156
157 200: 'OK',
158 201: 'Created',
159 202: 'Accepted',
160 203: 'Non-Authoritative Information',
161 204: 'No Content',
162 205: 'Reset Content',
163 206: 'Partial Content',
164
165 300: 'Multiple Choices',
166 301: 'Moved Permanently',
167 302: 'Found',
168 303: 'See Other',
169 304: 'Not Modified',
170 305: 'Use Proxy',
171 306: '(Unused)',
172 307: 'Temporary Redirect',
173
174 400: 'Bad Request',
175 401: 'Unauthorized',
176 402: 'Payment Required',
177 403: 'Forbidden',
178 404: 'Not Found',
179 405: 'Method Not Allowed',
180 406: 'Not Acceptable',
181 407: 'Proxy Authentication Required',
182 408: 'Request Timeout',
183 409: 'Conflict',
184 410: 'Gone',
185 411: 'Length Required',
186 412: 'Precondition Failed',
187 413: 'Request Entity Too Large',
188 414: 'Request-URI Too Long',
189 415: 'Unsupported Media Type',
190 416: 'Requested Range Not Satisfiable',
191 417: 'Expectation Failed',
192
193 500: 'Internal Server Error',
194 501: 'Not Implemented',
195 502: 'Bad Gateway',
196 503: 'Service Unavailable',
197 504: 'Gateway Timeout',
198 505: 'HTTP Version Not Supported',
199}
200
Georg Brandl80ba8e82005-09-29 20:16:07 +0000201# maximal amount of data to read at one time in _safe_read
202MAXAMOUNT = 1048576
203
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000204class HTTPMessage(mimetools.Message):
205
206 def addheader(self, key, value):
207 """Add header for field key handling repeats."""
208 prev = self.dict.get(key)
209 if prev is None:
210 self.dict[key] = value
211 else:
212 combined = ", ".join((prev, value))
213 self.dict[key] = combined
214
215 def addcontinue(self, key, more):
216 """Add more field data from a continuation line."""
217 prev = self.dict[key]
218 self.dict[key] = prev + "\n " + more
219
220 def readheaders(self):
221 """Read header lines.
222
223 Read header lines up to the entirely blank line that terminates them.
224 The (normally blank) line that ends the headers is skipped, but not
225 included in the returned list. If a non-header line ends the headers,
226 (which is an error), an attempt is made to backspace over it; it is
227 never included in the returned list.
228
229 The variable self.status is set to the empty string if all went well,
230 otherwise it is an error message. The variable self.headers is a
231 completely uninterpreted list of lines contained in the header (so
232 printing them will reproduce the header exactly as it appears in the
233 file).
234
235 If multiple header fields with the same name occur, they are combined
236 according to the rules in RFC 2616 sec 4.2:
237
238 Appending each subsequent field-value to the first, each separated
239 by a comma. The order in which header fields with the same field-name
240 are received is significant to the interpretation of the combined
241 field value.
242 """
243 # XXX The implementation overrides the readheaders() method of
244 # rfc822.Message. The base class design isn't amenable to
245 # customized behavior here so the method here is a copy of the
246 # base class code with a few small changes.
247
248 self.dict = {}
249 self.unixfrom = ''
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000250 self.headers = hlist = []
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000251 self.status = ''
252 headerseen = ""
253 firstline = 1
254 startofline = unread = tell = None
255 if hasattr(self.fp, 'unread'):
256 unread = self.fp.unread
257 elif self.seekable:
258 tell = self.fp.tell
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000259 while True:
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000260 if tell:
261 try:
262 startofline = tell()
263 except IOError:
264 startofline = tell = None
265 self.seekable = 0
Jeremy Hylton811fc142007-08-03 13:30:02 +0000266 line = str(self.fp.readline(), "iso-8859-1")
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000267 if not line:
268 self.status = 'EOF in headers'
269 break
270 # Skip unix From name time lines
271 if firstline and line.startswith('From '):
272 self.unixfrom = self.unixfrom + line
273 continue
274 firstline = 0
275 if headerseen and line[0] in ' \t':
276 # XXX Not sure if continuation lines are handled properly
277 # for http and/or for repeating headers
278 # It's a continuation line.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000279 hlist.append(line)
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000280 self.addcontinue(headerseen, line.strip())
281 continue
282 elif self.iscomment(line):
283 # It's a comment. Ignore it.
284 continue
285 elif self.islast(line):
286 # Note! No pushback here! The delimiter line gets eaten.
287 break
288 headerseen = self.isheader(line)
289 if headerseen:
290 # It's a legal header line, save it.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000291 hlist.append(line)
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000292 self.addheader(headerseen, line[len(headerseen)+1:].strip())
293 continue
294 else:
295 # It's not a header line; throw it back and stop here.
296 if not self.dict:
297 self.status = 'No headers'
298 else:
299 self.status = 'Non-header line where header expected'
300 # Try to undo the read.
301 if unread:
302 unread(line)
303 elif tell:
304 self.fp.seek(startofline)
305 else:
306 self.status = self.status + '; bad seek'
307 break
Greg Stein5e0fa402000-06-26 08:28:01 +0000308
Jeremy Hylton97043c32007-08-04 02:34:24 +0000309class HTTPResponse:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000310
311 # strict: If true, raise BadStatusLine if the status line can't be
312 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
Skip Montanaro186bec22002-07-25 16:10:38 +0000313 # false because it prevents clients from talking to HTTP/0.9
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000314 # servers. Note that a response with a sufficiently corrupted
315 # status line will look like an HTTP/0.9 response.
316
317 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
318
Jeremy Hylton811fc142007-08-03 13:30:02 +0000319 # The bytes from the socket object are iso-8859-1 strings.
320 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
321 # text following RFC 2047. The basic status line parsing only
322 # accepts iso-8859-1.
323
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000324 def __init__(self, sock, debuglevel=0, strict=0, method=None):
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000325 # XXX If the response includes a content-length header, we
326 # need to make sure that the client doesn't read more than the
327 # specified number of bytes. If it does, it will block until
328 # the server times out and closes the connection. (The only
329 # applies to HTTP/1.1 connections.) Since some clients access
330 # self.fp directly rather than calling read(), this is a little
331 # tricky.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000332 self.fp = sock.makefile("rb", 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000333 self.debuglevel = debuglevel
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000334 self.strict = strict
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000335 self._method = method
Greg Stein5e0fa402000-06-26 08:28:01 +0000336
Greg Steindd6eefb2000-07-18 09:09:48 +0000337 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000338
Greg Steindd6eefb2000-07-18 09:09:48 +0000339 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000340 self.version = _UNKNOWN # HTTP-Version
341 self.status = _UNKNOWN # Status-Code
342 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000343
Tim Peters07e99cb2001-01-14 23:47:14 +0000344 self.chunked = _UNKNOWN # is "chunked" being used?
345 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
346 self.length = _UNKNOWN # number of bytes left in response
347 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000348
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000349 def _read_status(self):
Jeremy Hylton04319c72007-08-04 03:41:19 +0000350 # Initialize with Simple-Response defaults.
Jeremy Hylton811fc142007-08-03 13:30:02 +0000351 line = str(self.fp.readline(), "iso-8859-1")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000352 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000353 print("reply:", repr(line))
Jeremy Hyltonb6769522003-06-29 17:55:05 +0000354 if not line:
355 # Presumably, the server closed the connection before
356 # sending a valid response.
357 raise BadStatusLine(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000358 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000359 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000360 except ValueError:
361 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000362 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000363 reason = ""
364 except ValueError:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000365 # empty version will cause next test to fail and status
366 # will be treated as 0.9 response.
367 version = ""
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000368 if not version.startswith("HTTP/"):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000369 if self.strict:
370 self.close()
371 raise BadStatusLine(line)
372 else:
Jeremy Hylton04319c72007-08-04 03:41:19 +0000373 # Assume it's a Simple-Response from an 0.9 server.
374 # We have to convert the first line back to raw bytes
375 # because self.fp.readline() needs to return bytes.
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000376 self.fp = LineAndFileWrapper(bytes(line, "ascii"), self.fp)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000377 return "HTTP/0.9", 200, ""
Greg Stein5e0fa402000-06-26 08:28:01 +0000378
Jeremy Hylton23d40472001-04-13 14:57:08 +0000379 # The status code is a three-digit number
380 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000381 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000382 if status < 100 or status > 999:
383 raise BadStatusLine(line)
384 except ValueError:
385 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000386 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000387
Jeremy Hylton39c03802002-07-12 14:04:09 +0000388 def begin(self):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000389 if self.msg is not None:
390 # we've already started reading the response
391 return
392
393 # read until we get a non-100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000394 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000395 version, status, reason = self._read_status()
Martin v. Löwis39a31782004-09-18 09:03:49 +0000396 if status != CONTINUE:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000397 break
398 # skip the header from the 100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000399 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000400 skip = self.fp.readline().strip()
401 if not skip:
402 break
403 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000404 print("header:", skip)
Tim Petersc411dba2002-07-16 21:35:23 +0000405
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000406 self.status = status
407 self.reason = reason.strip()
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000408 if version == "HTTP/1.0":
Greg Steindd6eefb2000-07-18 09:09:48 +0000409 self.version = 10
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000410 elif version.startswith("HTTP/1."):
Tim Peters07e99cb2001-01-14 23:47:14 +0000411 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000412 elif version == "HTTP/0.9":
Jeremy Hylton110941a2000-10-12 19:58:36 +0000413 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000414 else:
415 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000416
Jeremy Hylton110941a2000-10-12 19:58:36 +0000417 if self.version == 9:
Georg Brandl0aade9a2005-06-26 22:06:54 +0000418 self.length = None
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000419 self.chunked = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000420 self.will_close = 1
Jeremy Hylton4e7855d2007-08-04 03:34:03 +0000421 self.msg = HTTPMessage(io.BytesIO())
Jeremy Hylton110941a2000-10-12 19:58:36 +0000422 return
423
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000424 self.msg = HTTPMessage(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000425 if self.debuglevel > 0:
426 for hdr in self.msg.headers:
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000427 print("header:", hdr, end=" ")
Greg Stein5e0fa402000-06-26 08:28:01 +0000428
Greg Steindd6eefb2000-07-18 09:09:48 +0000429 # don't let the msg keep an fp
430 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000431
Greg Steindd6eefb2000-07-18 09:09:48 +0000432 # are we using the chunked-style of transfer encoding?
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000433 tr_enc = self.msg.getheader("transfer-encoding")
Jeremy Hyltond229b3a2002-09-03 19:24:24 +0000434 if tr_enc and tr_enc.lower() == "chunked":
Greg Steindd6eefb2000-07-18 09:09:48 +0000435 self.chunked = 1
436 self.chunk_left = None
437 else:
438 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000439
Greg Steindd6eefb2000-07-18 09:09:48 +0000440 # will the connection close at the end of the response?
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000441 self.will_close = self._check_close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000442
Greg Steindd6eefb2000-07-18 09:09:48 +0000443 # do we have a Content-Length?
444 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000445 self.length = None
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000446 length = self.msg.getheader("content-length")
Greg Steindd6eefb2000-07-18 09:09:48 +0000447 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000448 try:
449 self.length = int(length)
450 except ValueError:
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000451 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000452
Greg Steindd6eefb2000-07-18 09:09:48 +0000453 # does the body have a fixed length? (of zero)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000454 if (status == NO_CONTENT or status == NOT_MODIFIED or
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000455 100 <= status < 200 or # 1xx codes
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000456 self._method == "HEAD"):
Greg Steindd6eefb2000-07-18 09:09:48 +0000457 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000458
Greg Steindd6eefb2000-07-18 09:09:48 +0000459 # if the connection remains open, and we aren't using chunked, and
460 # a content-length was not provided, then assume that the connection
461 # WILL close.
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000462 if (not self.will_close and
463 not self.chunked and
464 self.length is None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000465 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000466
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000467 def _check_close(self):
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000468 conn = self.msg.getheader("connection")
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000469 if self.version == 11:
470 # An HTTP/1.1 proxy is assumed to stay open unless
471 # explicitly closed.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000472 conn = self.msg.getheader("connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000473 if conn and "close" in conn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000474 return True
475 return False
476
Jeremy Hylton2c178252004-08-07 16:28:14 +0000477 # Some HTTP/1.0 implementations have support for persistent
478 # connections, using rules different than HTTP/1.1.
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000479
Christian Heimes895627f2007-12-08 17:28:33 +0000480 # For older HTTP, Keep-Alive indicates persistent connection.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000481 if self.msg.getheader("keep-alive"):
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000482 return False
Tim Peters77c06fb2002-11-24 02:35:35 +0000483
Jeremy Hylton2c178252004-08-07 16:28:14 +0000484 # At least Akamai returns a "Connection: Keep-Alive" header,
485 # which was supposed to be sent by the client.
486 if conn and "keep-alive" in conn.lower():
487 return False
488
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000489 # Proxy-Connection is a netscape hack.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000490 pconn = self.msg.getheader("proxy-connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000491 if pconn and "keep-alive" in pconn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000492 return False
493
494 # otherwise, assume it will close
495 return True
496
Greg Steindd6eefb2000-07-18 09:09:48 +0000497 def close(self):
498 if self.fp:
499 self.fp.close()
500 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000501
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000502 # These implementations are for the benefit of io.BufferedReader.
503
504 # XXX This class should probably be revised to act more like
505 # the "raw stream" that BufferedReader expects.
506
507 @property
508 def closed(self):
509 return self.isclosed()
510
511 def flush(self):
512 self.fp.flush()
513
514 # End of "raw stream" methods
515
Greg Steindd6eefb2000-07-18 09:09:48 +0000516 def isclosed(self):
517 # NOTE: it is possible that we will not ever call self.close(). This
518 # case occurs when will_close is TRUE, length is None, and we
519 # read up to the last byte, but NOT past it.
520 #
521 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
522 # called, meaning self.isclosed() is meaningful.
523 return self.fp is None
524
Jeremy Hylton2c178252004-08-07 16:28:14 +0000525 # XXX It would be nice to have readline and __iter__ for this, too.
526
Greg Steindd6eefb2000-07-18 09:09:48 +0000527 def read(self, amt=None):
528 if self.fp is None:
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000529 return b""
Greg Steindd6eefb2000-07-18 09:09:48 +0000530
531 if self.chunked:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000532 return self._read_chunked(amt)
Tim Peters230a60c2002-11-09 05:08:07 +0000533
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000534 if amt is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000535 # unbounded read
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000536 if self.length is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000537 s = self.fp.read()
538 else:
539 s = self._safe_read(self.length)
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000540 self.length = 0
Tim Peters07e99cb2001-01-14 23:47:14 +0000541 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000542 return s
543
544 if self.length is not None:
545 if amt > self.length:
546 # clip the read to the "end of response"
547 amt = self.length
Greg Steindd6eefb2000-07-18 09:09:48 +0000548
549 # we do not use _safe_read() here because this may be a .will_close
550 # connection, and the user is reading more bytes than will be provided
551 # (for example, reading in 1k chunks)
552 s = self.fp.read(amt)
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000553 if self.length is not None:
554 self.length -= len(s)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000555 if not self.length:
556 self.close()
Greg Steindd6eefb2000-07-18 09:09:48 +0000557 return s
558
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000559 def _read_chunked(self, amt):
560 assert self.chunked != _UNKNOWN
561 chunk_left = self.chunk_left
Georg Brandl95ba4692008-01-26 09:45:58 +0000562 value = b""
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000563
564 # XXX This accumulates chunks by repeated string concatenation,
565 # which is not efficient as the number or size of chunks gets big.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000566 while True:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000567 if chunk_left is None:
568 line = self.fp.readline()
Georg Brandl95ba4692008-01-26 09:45:58 +0000569 i = line.find(b";")
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000570 if i >= 0:
571 line = line[:i] # strip chunk-extensions
572 chunk_left = int(line, 16)
573 if chunk_left == 0:
574 break
575 if amt is None:
576 value += self._safe_read(chunk_left)
577 elif amt < chunk_left:
578 value += self._safe_read(amt)
579 self.chunk_left = chunk_left - amt
580 return value
581 elif amt == chunk_left:
582 value += self._safe_read(amt)
583 self._safe_read(2) # toss the CRLF at the end of the chunk
584 self.chunk_left = None
585 return value
586 else:
587 value += self._safe_read(chunk_left)
588 amt -= chunk_left
589
590 # we read the whole chunk, get another
591 self._safe_read(2) # toss the CRLF at the end of the chunk
592 chunk_left = None
593
594 # read and discard trailer up to the CRLF terminator
595 ### note: we shouldn't have any trailers!
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000596 while True:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000597 line = self.fp.readline()
Christian Heimes0bd4e112008-02-12 22:59:25 +0000598 if not line:
599 # a vanishingly small number of sites EOF without
600 # sending the trailer
601 break
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000602 if line == b"\r\n":
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000603 break
604
605 # we read everything; close the "file"
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000606 self.close()
607
608 return value
Tim Peters230a60c2002-11-09 05:08:07 +0000609
Greg Steindd6eefb2000-07-18 09:09:48 +0000610 def _safe_read(self, amt):
611 """Read the number of bytes requested, compensating for partial reads.
612
613 Normally, we have a blocking socket, but a read() can be interrupted
614 by a signal (resulting in a partial read).
615
616 Note that we cannot distinguish between EOF and an interrupt when zero
617 bytes have been read. IncompleteRead() will be raised in this
618 situation.
619
620 This function should be used when <amt> bytes "should" be present for
621 reading. If the bytes are truly not available (due to EOF), then the
622 IncompleteRead exception can be used to detect the problem.
623 """
Georg Brandl80ba8e82005-09-29 20:16:07 +0000624 s = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000625 while amt > 0:
Georg Brandl80ba8e82005-09-29 20:16:07 +0000626 chunk = self.fp.read(min(amt, MAXAMOUNT))
Greg Steindd6eefb2000-07-18 09:09:48 +0000627 if not chunk:
628 raise IncompleteRead(s)
Georg Brandl80ba8e82005-09-29 20:16:07 +0000629 s.append(chunk)
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000630 amt -= len(chunk)
Guido van Rossuma00f1232007-09-12 19:43:09 +0000631 return b"".join(s)
Greg Steindd6eefb2000-07-18 09:09:48 +0000632
633 def getheader(self, name, default=None):
634 if self.msg is None:
635 raise ResponseNotReady()
636 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000637
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000638 def getheaders(self):
639 """Return list of (header, value) tuples."""
640 if self.msg is None:
641 raise ResponseNotReady()
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000642 return list(self.msg.items())
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000643
Greg Stein5e0fa402000-06-26 08:28:01 +0000644
645class HTTPConnection:
646
Greg Steindd6eefb2000-07-18 09:09:48 +0000647 _http_vsn = 11
648 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000649
Greg Steindd6eefb2000-07-18 09:09:48 +0000650 response_class = HTTPResponse
651 default_port = HTTP_PORT
652 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000653 debuglevel = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000654 strict = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000655
Guido van Rossumd8faa362007-04-27 19:54:29 +0000656 def __init__(self, host, port=None, strict=None, timeout=None):
657 self.timeout = timeout
Greg Steindd6eefb2000-07-18 09:09:48 +0000658 self.sock = None
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000659 self._buffer = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000660 self.__response = None
661 self.__state = _CS_IDLE
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000662 self._method = None
Tim Petersc411dba2002-07-16 21:35:23 +0000663
Greg Steindd6eefb2000-07-18 09:09:48 +0000664 self._set_hostport(host, port)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000665 if strict is not None:
666 self.strict = strict
Greg Stein5e0fa402000-06-26 08:28:01 +0000667
Greg Steindd6eefb2000-07-18 09:09:48 +0000668 def _set_hostport(self, host, port):
669 if port is None:
Skip Montanaro10e6e0e2004-09-14 16:32:02 +0000670 i = host.rfind(':')
Skip Montanarocae14d22004-09-14 17:55:21 +0000671 j = host.rfind(']') # ipv6 addresses have [...]
672 if i > j:
Skip Montanaro9d389972002-03-24 16:53:50 +0000673 try:
674 port = int(host[i+1:])
675 except ValueError:
Jeremy Hyltonfbd79942002-07-02 20:19:08 +0000676 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000677 host = host[:i]
678 else:
679 port = self.default_port
Raymond Hettinger4d037912004-10-14 15:23:38 +0000680 if host and host[0] == '[' and host[-1] == ']':
Brett Cannon0a1af4a2004-09-15 23:26:23 +0000681 host = host[1:-1]
Greg Steindd6eefb2000-07-18 09:09:48 +0000682 self.host = host
683 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000684
Jeremy Hylton30f86742000-09-18 22:50:38 +0000685 def set_debuglevel(self, level):
686 self.debuglevel = level
687
Greg Steindd6eefb2000-07-18 09:09:48 +0000688 def connect(self):
689 """Connect to the host and port specified in __init__."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000690 self.sock = socket.create_connection((self.host,self.port),
691 self.timeout)
Greg Stein5e0fa402000-06-26 08:28:01 +0000692
Greg Steindd6eefb2000-07-18 09:09:48 +0000693 def close(self):
694 """Close the connection to the HTTP server."""
695 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000696 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000697 self.sock = None
698 if self.__response:
699 self.__response.close()
700 self.__response = None
701 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000702
Greg Steindd6eefb2000-07-18 09:09:48 +0000703 def send(self, str):
704 """Send `str' to the server."""
705 if self.sock is None:
706 if self.auto_open:
707 self.connect()
708 else:
709 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000710
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000711 # send the data to the server. if we get a broken pipe, then close
Greg Steindd6eefb2000-07-18 09:09:48 +0000712 # the socket. we want to reconnect when somebody tries to send again.
713 #
714 # NOTE: we DO propagate the error, though, because we cannot simply
715 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000716 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000717 print("send:", repr(str))
Greg Steindd6eefb2000-07-18 09:09:48 +0000718 try:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000719 blocksize=8192
720 if hasattr(str,'read') :
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000721 if self.debuglevel > 0: print("sendIng a read()able")
Thomas Wouters89f507f2006-12-13 04:49:30 +0000722 data=str.read(blocksize)
723 while data:
724 self.sock.sendall(data)
725 data=str.read(blocksize)
726 else:
727 self.sock.sendall(str)
Guido van Rossumb940e112007-01-10 16:19:56 +0000728 except socket.error as v:
Guido van Rossum89df2452007-03-19 22:26:27 +0000729 if v.args[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000730 self.close()
731 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000732
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000733 def _output(self, s):
734 """Add a line of output to the current request buffer.
Tim Peters469cdad2002-08-08 20:19:19 +0000735
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000736 Assumes that the line does *not* end with \\r\\n.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000737 """
738 self._buffer.append(s)
739
740 def _send_output(self):
741 """Send the currently buffered request and clear the buffer.
742
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000743 Appends an extra \\r\\n to the buffer.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000744 """
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000745 self._buffer.extend((b"", b""))
746 msg = b"\r\n".join(self._buffer)
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000747 del self._buffer[:]
748 self.send(msg)
749
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000750 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000751 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000752
Greg Steindd6eefb2000-07-18 09:09:48 +0000753 `method' specifies an HTTP request method, e.g. 'GET'.
754 `url' specifies the object being requested, e.g. '/index.html'.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000755 `skip_host' if True does not add automatically a 'Host:' header
756 `skip_accept_encoding' if True does not add automatically an
757 'Accept-Encoding:' header
Greg Steindd6eefb2000-07-18 09:09:48 +0000758 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000759
Greg Stein616a58d2003-06-24 06:35:19 +0000760 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000761 if self.__response and self.__response.isclosed():
762 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000763
Tim Peters58eb11c2004-01-18 20:29:55 +0000764
Greg Steindd6eefb2000-07-18 09:09:48 +0000765 # in certain cases, we cannot issue another request on this connection.
766 # this occurs when:
767 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
768 # 2) a response to a previous request has signalled that it is going
769 # to close the connection upon completion.
770 # 3) the headers for the previous response have not been read, thus
771 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
772 #
773 # if there is no prior response, then we can request at will.
774 #
775 # if point (2) is true, then we will have passed the socket to the
776 # response (effectively meaning, "there is no prior response"), and
777 # will open a new one when a new request is made.
778 #
779 # Note: if a prior response exists, then we *can* start a new request.
780 # We are not allowed to begin fetching the response to this new
781 # request, however, until that prior response is complete.
782 #
783 if self.__state == _CS_IDLE:
784 self.__state = _CS_REQ_STARTED
785 else:
786 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000787
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000788 # Save the method we use, we need it later in the response phase
789 self._method = method
Greg Steindd6eefb2000-07-18 09:09:48 +0000790 if not url:
791 url = '/'
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000792 request = '%s %s %s' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000793
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000794 # Non-ASCII characters should have been eliminated earlier
795 self._output(request.encode('ascii'))
Greg Stein5e0fa402000-06-26 08:28:01 +0000796
Greg Steindd6eefb2000-07-18 09:09:48 +0000797 if self._http_vsn == 11:
798 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000799
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000800 if not skip_host:
801 # this header is issued *only* for HTTP/1.1
802 # connections. more specifically, this means it is
803 # only issued when the client uses the new
804 # HTTPConnection() class. backwards-compat clients
805 # will be using HTTP/1.0 and those clients may be
806 # issuing this header themselves. we should NOT issue
807 # it twice; some web servers (such as Apache) barf
808 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000809
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000810 # If we need a non-standard port,include it in the
811 # header. If the request is going through a proxy,
812 # but the host of the actual URL, not the host of the
813 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +0000814
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000815 netloc = ''
816 if url.startswith('http'):
817 nil, netloc, nil, nil, nil = urlsplit(url)
818
819 if netloc:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000820 try:
821 netloc_enc = netloc.encode("ascii")
822 except UnicodeEncodeError:
823 netloc_enc = netloc.encode("idna")
824 self.putheader('Host', netloc_enc)
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000825 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000826 try:
827 host_enc = self.host.encode("ascii")
828 except UnicodeEncodeError:
829 host_enc = self.host.encode("idna")
830 if self.port == HTTP_PORT:
831 self.putheader('Host', host_enc)
832 else:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000833 host_enc = host_enc.decode("ascii")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000834 self.putheader('Host', "%s:%s" % (host_enc, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000835
Greg Steindd6eefb2000-07-18 09:09:48 +0000836 # note: we are assuming that clients will not attempt to set these
837 # headers since *this* library must deal with the
838 # consequences. this also means that when the supporting
839 # libraries are updated to recognize other forms, then this
840 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000841
Greg Steindd6eefb2000-07-18 09:09:48 +0000842 # we only want a Content-Encoding of "identity" since we don't
843 # support encodings such as x-gzip or x-deflate.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000844 if not skip_accept_encoding:
845 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000846
Greg Steindd6eefb2000-07-18 09:09:48 +0000847 # we can accept "chunked" Transfer-Encodings, but no others
848 # NOTE: no TE header implies *only* "chunked"
849 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000850
Greg Steindd6eefb2000-07-18 09:09:48 +0000851 # if TE is supplied in the header, then it must appear in a
852 # Connection header.
853 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000854
Greg Steindd6eefb2000-07-18 09:09:48 +0000855 else:
856 # For HTTP/1.0, the server will assume "not chunked"
857 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000858
Greg Steindd6eefb2000-07-18 09:09:48 +0000859 def putheader(self, header, value):
860 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000861
Greg Steindd6eefb2000-07-18 09:09:48 +0000862 For example: h.putheader('Accept', 'text/html')
863 """
864 if self.__state != _CS_REQ_STARTED:
865 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000866
Guido van Rossum98297ee2007-11-06 21:34:58 +0000867 if hasattr(header, 'encode'):
868 header = header.encode('ascii')
869 if hasattr(value, 'encode'):
870 value = value.encode('ascii')
871 header = header + b': ' + value
872 self._output(header)
Greg Stein5e0fa402000-06-26 08:28:01 +0000873
Greg Steindd6eefb2000-07-18 09:09:48 +0000874 def endheaders(self):
875 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000876
Greg Steindd6eefb2000-07-18 09:09:48 +0000877 if self.__state == _CS_REQ_STARTED:
878 self.__state = _CS_REQ_SENT
879 else:
880 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000881
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000882 self._send_output()
Greg Stein5e0fa402000-06-26 08:28:01 +0000883
Greg Steindd6eefb2000-07-18 09:09:48 +0000884 def request(self, method, url, body=None, headers={}):
885 """Send a complete request to the server."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000886 try:
887 self._send_request(method, url, body, headers)
Guido van Rossumb940e112007-01-10 16:19:56 +0000888 except socket.error as v:
Greg Steindd6eefb2000-07-18 09:09:48 +0000889 # trap 'Broken pipe' if we're allowed to automatically reconnect
Guido van Rossum89df2452007-03-19 22:26:27 +0000890 if v.args[0] != 32 or not self.auto_open:
Greg Steindd6eefb2000-07-18 09:09:48 +0000891 raise
892 # try one more time
893 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000894
Greg Steindd6eefb2000-07-18 09:09:48 +0000895 def _send_request(self, method, url, body, headers):
Jeremy Hylton2c178252004-08-07 16:28:14 +0000896 # honour explicitly requested Host: and Accept-Encoding headers
897 header_names = dict.fromkeys([k.lower() for k in headers])
898 skips = {}
899 if 'host' in header_names:
900 skips['skip_host'] = 1
901 if 'accept-encoding' in header_names:
902 skips['skip_accept_encoding'] = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000903
Jeremy Hylton2c178252004-08-07 16:28:14 +0000904 self.putrequest(method, url, **skips)
905
906 if body and ('content-length' not in header_names):
Jeremy Hylton4b878bd2007-08-10 18:49:01 +0000907 thelen = None
Thomas Wouters89f507f2006-12-13 04:49:30 +0000908 try:
Jeremy Hylton4b878bd2007-08-10 18:49:01 +0000909 thelen = str(len(body))
Guido van Rossumb940e112007-01-10 16:19:56 +0000910 except TypeError as te:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000911 # If this is a file-like object, try to
912 # fstat its file descriptor
913 import os
914 try:
915 thelen = str(os.fstat(body.fileno()).st_size)
916 except (AttributeError, OSError):
917 # Don't send a length if this failed
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000918 if self.debuglevel > 0: print("Cannot stat!!")
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000919
Thomas Wouters89f507f2006-12-13 04:49:30 +0000920 if thelen is not None:
921 self.putheader('Content-Length',thelen)
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000922 for hdr, value in headers.items():
Greg Steindd6eefb2000-07-18 09:09:48 +0000923 self.putheader(hdr, value)
924 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000925
Greg Steindd6eefb2000-07-18 09:09:48 +0000926 if body:
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000927 if isinstance(body, str): body = body.encode('ascii')
Greg Steindd6eefb2000-07-18 09:09:48 +0000928 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000929
Greg Steindd6eefb2000-07-18 09:09:48 +0000930 def getresponse(self):
Jeremy Hyltonfb35f652007-08-03 20:30:33 +0000931 """Get the response from the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000932
Greg Stein616a58d2003-06-24 06:35:19 +0000933 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000934 if self.__response and self.__response.isclosed():
935 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000936
Greg Steindd6eefb2000-07-18 09:09:48 +0000937 #
938 # if a prior response exists, then it must be completed (otherwise, we
939 # cannot read this response's header to determine the connection-close
940 # behavior)
941 #
942 # note: if a prior response existed, but was connection-close, then the
943 # socket and response were made independent of this HTTPConnection
944 # object since a new request requires that we open a whole new
945 # connection
946 #
947 # this means the prior response had one of two states:
948 # 1) will_close: this connection was reset and the prior socket and
949 # response operate independently
950 # 2) persistent: the response was retained and we await its
951 # isclosed() status to become true.
952 #
953 if self.__state != _CS_REQ_SENT or self.__response:
954 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000955
Jeremy Hylton30f86742000-09-18 22:50:38 +0000956 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000957 response = self.response_class(self.sock, self.debuglevel,
Tim Petersc2659cf2003-05-12 20:19:37 +0000958 strict=self.strict,
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000959 method=self._method)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000960 else:
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000961 response = self.response_class(self.sock, strict=self.strict,
962 method=self._method)
Greg Stein5e0fa402000-06-26 08:28:01 +0000963
Jeremy Hylton39c03802002-07-12 14:04:09 +0000964 response.begin()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000965 assert response.will_close != _UNKNOWN
Greg Steindd6eefb2000-07-18 09:09:48 +0000966 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000967
Greg Steindd6eefb2000-07-18 09:09:48 +0000968 if response.will_close:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000969 # this effectively passes the connection to the response
970 self.close()
Greg Steindd6eefb2000-07-18 09:09:48 +0000971 else:
972 # remember this, so we can tell when it is complete
973 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000974
Greg Steindd6eefb2000-07-18 09:09:48 +0000975 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000976
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000977try:
978 import ssl
979except ImportError:
980 pass
981else:
982 class HTTPSConnection(HTTPConnection):
983 "This class allows communication via SSL."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000984
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000985 default_port = HTTPS_PORT
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000986
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000987 def __init__(self, host, port=None, key_file=None, cert_file=None,
988 strict=None, timeout=None):
989 HTTPConnection.__init__(self, host, port, strict, timeout)
990 self.key_file = key_file
991 self.cert_file = cert_file
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000992
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000993 def connect(self):
994 "Connect to a host on a given (SSL) port."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000995
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000996 sock = socket.create_connection((self.host, self.port), self.timeout)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000997 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000998
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000999
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001000 def FakeSocket (sock, sslobj):
Thomas Wouters89d996e2007-09-08 17:39:28 +00001001 warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
Thomas Wouters1b7f8912007-09-19 03:06:30 +00001002 "Use the result of ssl.wrap_socket() directly instead.",
Thomas Wouters89d996e2007-09-08 17:39:28 +00001003 DeprecationWarning, stacklevel=2)
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001004 return sslobj
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001005
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001006 __all__.append("HTTPSConnection")
Greg Stein5e0fa402000-06-26 08:28:01 +00001007
Greg Stein5e0fa402000-06-26 08:28:01 +00001008class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001009 # Subclasses that define an __init__ must call Exception.__init__
1010 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +00001011 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001012
1013class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001014 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001015
Skip Montanaro9d389972002-03-24 16:53:50 +00001016class InvalidURL(HTTPException):
1017 pass
1018
Greg Stein5e0fa402000-06-26 08:28:01 +00001019class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001020 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001021 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001022 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001023
1024class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001025 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001026
Greg Stein5e0fa402000-06-26 08:28:01 +00001027class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001028 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001029
1030class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001031 def __init__(self, partial):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001032 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001033 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +00001034
1035class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001036 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001037
1038class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001039 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001040
1041class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001042 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001043
1044class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001045 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001046
1047class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001048 def __init__(self, line):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001049 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001050 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001051
1052# for backwards compatibility
1053error = HTTPException
1054
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001055class LineAndFileWrapper:
1056 """A limited file-like object for HTTP/0.9 responses."""
1057
1058 # The status-line parsing code calls readline(), which normally
1059 # get the HTTP status line. For a 0.9 response, however, this is
1060 # actually the first line of the body! Clients need to get a
1061 # readable file object that contains that line.
1062
1063 def __init__(self, line, file):
1064 self._line = line
1065 self._file = file
1066 self._line_consumed = 0
1067 self._line_offset = 0
1068 self._line_left = len(line)
1069
1070 def __getattr__(self, attr):
1071 return getattr(self._file, attr)
1072
1073 def _done(self):
1074 # called when the last byte is read from the line. After the
1075 # call, all read methods are delegated to the underlying file
Skip Montanaro74b9a7a2003-02-25 17:48:15 +00001076 # object.
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001077 self._line_consumed = 1
1078 self.read = self._file.read
1079 self.readline = self._file.readline
1080 self.readlines = self._file.readlines
1081
1082 def read(self, amt=None):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001083 if self._line_consumed:
1084 return self._file.read(amt)
1085 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001086 if amt is None or amt > self._line_left:
1087 s = self._line[self._line_offset:]
1088 self._done()
1089 if amt is None:
1090 return s + self._file.read()
1091 else:
Tim Petersc411dba2002-07-16 21:35:23 +00001092 return s + self._file.read(amt - len(s))
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001093 else:
1094 assert amt <= self._line_left
1095 i = self._line_offset
1096 j = i + amt
1097 s = self._line[i:j]
1098 self._line_offset = j
1099 self._line_left -= amt
1100 if self._line_left == 0:
1101 self._done()
1102 return s
Tim Petersc411dba2002-07-16 21:35:23 +00001103
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001104 def readline(self):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001105 if self._line_consumed:
1106 return self._file.readline()
1107 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001108 s = self._line[self._line_offset:]
1109 self._done()
1110 return s
1111
1112 def readlines(self, size=None):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001113 if self._line_consumed:
1114 return self._file.readlines(size)
1115 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001116 L = [self._line[self._line_offset:]]
1117 self._done()
1118 if size is None:
1119 return L + self._file.readlines()
1120 else:
1121 return L + self._file.readlines(size)