blob: de27c17b4cff261b3f914b75ac64d167d477798e [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006HTTPConnection goes through a number of "states", which define when a client
Greg Stein5e0fa402000-06-26 08:28:01 +00007may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Jeremy Hyltonfb35f652007-08-03 20:30:33 +000069import io
Guido van Rossum65ab98c1995-08-07 20:13:02 +000070import mimetools
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000071import socket
Jeremy Hylton8acf1e02002-03-08 19:35:51 +000072from urlparse import urlsplit
Thomas Wouters89d996e2007-09-08 17:39:28 +000073import warnings
Guido van Rossum23acc951994-02-21 16:36:04 +000074
Thomas Wouters47b49bf2007-08-30 22:15:33 +000075__all__ = ["HTTPResponse", "HTTPConnection",
Skip Montanaro951a8842001-06-01 16:25:38 +000076 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000077 "UnknownTransferEncoding", "UnimplementedFileMode",
78 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
79 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
Georg Brandl6aab16e2006-02-17 19:17:25 +000080 "BadStatusLine", "error", "responses"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000081
Guido van Rossum23acc951994-02-21 16:36:04 +000082HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000083HTTPS_PORT = 443
84
Greg Stein5e0fa402000-06-26 08:28:01 +000085_UNKNOWN = 'UNKNOWN'
86
87# connection states
88_CS_IDLE = 'Idle'
89_CS_REQ_STARTED = 'Request-started'
90_CS_REQ_SENT = 'Request-sent'
91
Martin v. Löwis39a31782004-09-18 09:03:49 +000092# status codes
93# informational
94CONTINUE = 100
95SWITCHING_PROTOCOLS = 101
96PROCESSING = 102
97
98# successful
99OK = 200
100CREATED = 201
101ACCEPTED = 202
102NON_AUTHORITATIVE_INFORMATION = 203
103NO_CONTENT = 204
104RESET_CONTENT = 205
105PARTIAL_CONTENT = 206
106MULTI_STATUS = 207
107IM_USED = 226
108
109# redirection
110MULTIPLE_CHOICES = 300
111MOVED_PERMANENTLY = 301
112FOUND = 302
113SEE_OTHER = 303
114NOT_MODIFIED = 304
115USE_PROXY = 305
116TEMPORARY_REDIRECT = 307
117
118# client error
119BAD_REQUEST = 400
120UNAUTHORIZED = 401
121PAYMENT_REQUIRED = 402
122FORBIDDEN = 403
123NOT_FOUND = 404
124METHOD_NOT_ALLOWED = 405
125NOT_ACCEPTABLE = 406
126PROXY_AUTHENTICATION_REQUIRED = 407
127REQUEST_TIMEOUT = 408
128CONFLICT = 409
129GONE = 410
130LENGTH_REQUIRED = 411
131PRECONDITION_FAILED = 412
132REQUEST_ENTITY_TOO_LARGE = 413
133REQUEST_URI_TOO_LONG = 414
134UNSUPPORTED_MEDIA_TYPE = 415
135REQUESTED_RANGE_NOT_SATISFIABLE = 416
136EXPECTATION_FAILED = 417
137UNPROCESSABLE_ENTITY = 422
138LOCKED = 423
139FAILED_DEPENDENCY = 424
140UPGRADE_REQUIRED = 426
141
142# server error
143INTERNAL_SERVER_ERROR = 500
144NOT_IMPLEMENTED = 501
145BAD_GATEWAY = 502
146SERVICE_UNAVAILABLE = 503
147GATEWAY_TIMEOUT = 504
148HTTP_VERSION_NOT_SUPPORTED = 505
149INSUFFICIENT_STORAGE = 507
150NOT_EXTENDED = 510
151
Georg Brandl6aab16e2006-02-17 19:17:25 +0000152# Mapping status codes to official W3C names
153responses = {
154 100: 'Continue',
155 101: 'Switching Protocols',
156
157 200: 'OK',
158 201: 'Created',
159 202: 'Accepted',
160 203: 'Non-Authoritative Information',
161 204: 'No Content',
162 205: 'Reset Content',
163 206: 'Partial Content',
164
165 300: 'Multiple Choices',
166 301: 'Moved Permanently',
167 302: 'Found',
168 303: 'See Other',
169 304: 'Not Modified',
170 305: 'Use Proxy',
171 306: '(Unused)',
172 307: 'Temporary Redirect',
173
174 400: 'Bad Request',
175 401: 'Unauthorized',
176 402: 'Payment Required',
177 403: 'Forbidden',
178 404: 'Not Found',
179 405: 'Method Not Allowed',
180 406: 'Not Acceptable',
181 407: 'Proxy Authentication Required',
182 408: 'Request Timeout',
183 409: 'Conflict',
184 410: 'Gone',
185 411: 'Length Required',
186 412: 'Precondition Failed',
187 413: 'Request Entity Too Large',
188 414: 'Request-URI Too Long',
189 415: 'Unsupported Media Type',
190 416: 'Requested Range Not Satisfiable',
191 417: 'Expectation Failed',
192
193 500: 'Internal Server Error',
194 501: 'Not Implemented',
195 502: 'Bad Gateway',
196 503: 'Service Unavailable',
197 504: 'Gateway Timeout',
198 505: 'HTTP Version Not Supported',
199}
200
Georg Brandl80ba8e82005-09-29 20:16:07 +0000201# maximal amount of data to read at one time in _safe_read
202MAXAMOUNT = 1048576
203
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000204class HTTPMessage(mimetools.Message):
205
206 def addheader(self, key, value):
207 """Add header for field key handling repeats."""
208 prev = self.dict.get(key)
209 if prev is None:
210 self.dict[key] = value
211 else:
212 combined = ", ".join((prev, value))
213 self.dict[key] = combined
214
215 def addcontinue(self, key, more):
216 """Add more field data from a continuation line."""
217 prev = self.dict[key]
218 self.dict[key] = prev + "\n " + more
219
220 def readheaders(self):
221 """Read header lines.
222
223 Read header lines up to the entirely blank line that terminates them.
224 The (normally blank) line that ends the headers is skipped, but not
225 included in the returned list. If a non-header line ends the headers,
226 (which is an error), an attempt is made to backspace over it; it is
227 never included in the returned list.
228
229 The variable self.status is set to the empty string if all went well,
230 otherwise it is an error message. The variable self.headers is a
231 completely uninterpreted list of lines contained in the header (so
232 printing them will reproduce the header exactly as it appears in the
233 file).
234
235 If multiple header fields with the same name occur, they are combined
236 according to the rules in RFC 2616 sec 4.2:
237
238 Appending each subsequent field-value to the first, each separated
239 by a comma. The order in which header fields with the same field-name
240 are received is significant to the interpretation of the combined
241 field value.
242 """
243 # XXX The implementation overrides the readheaders() method of
244 # rfc822.Message. The base class design isn't amenable to
245 # customized behavior here so the method here is a copy of the
246 # base class code with a few small changes.
247
248 self.dict = {}
249 self.unixfrom = ''
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000250 self.headers = hlist = []
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000251 self.status = ''
252 headerseen = ""
253 firstline = 1
254 startofline = unread = tell = None
255 if hasattr(self.fp, 'unread'):
256 unread = self.fp.unread
257 elif self.seekable:
258 tell = self.fp.tell
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000259 while True:
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000260 if tell:
261 try:
262 startofline = tell()
263 except IOError:
264 startofline = tell = None
265 self.seekable = 0
Jeremy Hylton811fc142007-08-03 13:30:02 +0000266 line = str(self.fp.readline(), "iso-8859-1")
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000267 if not line:
268 self.status = 'EOF in headers'
269 break
270 # Skip unix From name time lines
271 if firstline and line.startswith('From '):
272 self.unixfrom = self.unixfrom + line
273 continue
274 firstline = 0
275 if headerseen and line[0] in ' \t':
276 # XXX Not sure if continuation lines are handled properly
277 # for http and/or for repeating headers
278 # It's a continuation line.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000279 hlist.append(line)
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000280 self.addcontinue(headerseen, line.strip())
281 continue
282 elif self.iscomment(line):
283 # It's a comment. Ignore it.
284 continue
285 elif self.islast(line):
286 # Note! No pushback here! The delimiter line gets eaten.
287 break
288 headerseen = self.isheader(line)
289 if headerseen:
290 # It's a legal header line, save it.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000291 hlist.append(line)
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000292 self.addheader(headerseen, line[len(headerseen)+1:].strip())
293 continue
294 else:
295 # It's not a header line; throw it back and stop here.
296 if not self.dict:
297 self.status = 'No headers'
298 else:
299 self.status = 'Non-header line where header expected'
300 # Try to undo the read.
301 if unread:
302 unread(line)
303 elif tell:
304 self.fp.seek(startofline)
305 else:
306 self.status = self.status + '; bad seek'
307 break
Greg Stein5e0fa402000-06-26 08:28:01 +0000308
Jeremy Hylton97043c32007-08-04 02:34:24 +0000309class HTTPResponse:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000310
311 # strict: If true, raise BadStatusLine if the status line can't be
312 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
Skip Montanaro186bec22002-07-25 16:10:38 +0000313 # false because it prevents clients from talking to HTTP/0.9
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000314 # servers. Note that a response with a sufficiently corrupted
315 # status line will look like an HTTP/0.9 response.
316
317 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
318
Jeremy Hylton811fc142007-08-03 13:30:02 +0000319 # The bytes from the socket object are iso-8859-1 strings.
320 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
321 # text following RFC 2047. The basic status line parsing only
322 # accepts iso-8859-1.
323
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000324 def __init__(self, sock, debuglevel=0, strict=0, method=None):
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000325 # XXX If the response includes a content-length header, we
326 # need to make sure that the client doesn't read more than the
327 # specified number of bytes. If it does, it will block until
328 # the server times out and closes the connection. (The only
329 # applies to HTTP/1.1 connections.) Since some clients access
330 # self.fp directly rather than calling read(), this is a little
331 # tricky.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000332 self.fp = sock.makefile("rb", 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000333 self.debuglevel = debuglevel
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000334 self.strict = strict
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000335 self._method = method
Greg Stein5e0fa402000-06-26 08:28:01 +0000336
Greg Steindd6eefb2000-07-18 09:09:48 +0000337 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000338
Greg Steindd6eefb2000-07-18 09:09:48 +0000339 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000340 self.version = _UNKNOWN # HTTP-Version
341 self.status = _UNKNOWN # Status-Code
342 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000343
Tim Peters07e99cb2001-01-14 23:47:14 +0000344 self.chunked = _UNKNOWN # is "chunked" being used?
345 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
346 self.length = _UNKNOWN # number of bytes left in response
347 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000348
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000349 def _read_status(self):
Jeremy Hylton04319c72007-08-04 03:41:19 +0000350 # Initialize with Simple-Response defaults.
Jeremy Hylton811fc142007-08-03 13:30:02 +0000351 line = str(self.fp.readline(), "iso-8859-1")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000352 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000353 print("reply:", repr(line))
Jeremy Hyltonb6769522003-06-29 17:55:05 +0000354 if not line:
355 # Presumably, the server closed the connection before
356 # sending a valid response.
357 raise BadStatusLine(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000358 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000359 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000360 except ValueError:
361 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000362 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000363 reason = ""
364 except ValueError:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000365 # empty version will cause next test to fail and status
366 # will be treated as 0.9 response.
367 version = ""
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000368 if not version.startswith("HTTP/"):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000369 if self.strict:
370 self.close()
371 raise BadStatusLine(line)
372 else:
Jeremy Hylton04319c72007-08-04 03:41:19 +0000373 # Assume it's a Simple-Response from an 0.9 server.
374 # We have to convert the first line back to raw bytes
375 # because self.fp.readline() needs to return bytes.
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000376 self.fp = LineAndFileWrapper(bytes(line, "ascii"), self.fp)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000377 return "HTTP/0.9", 200, ""
Greg Stein5e0fa402000-06-26 08:28:01 +0000378
Jeremy Hylton23d40472001-04-13 14:57:08 +0000379 # The status code is a three-digit number
380 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000381 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000382 if status < 100 or status > 999:
383 raise BadStatusLine(line)
384 except ValueError:
385 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000386 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000387
Jeremy Hylton39c03802002-07-12 14:04:09 +0000388 def begin(self):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000389 if self.msg is not None:
390 # we've already started reading the response
391 return
392
393 # read until we get a non-100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000394 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000395 version, status, reason = self._read_status()
Martin v. Löwis39a31782004-09-18 09:03:49 +0000396 if status != CONTINUE:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000397 break
398 # skip the header from the 100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000399 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000400 skip = self.fp.readline().strip()
401 if not skip:
402 break
403 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000404 print("header:", skip)
Tim Petersc411dba2002-07-16 21:35:23 +0000405
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000406 self.status = status
407 self.reason = reason.strip()
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000408 if version == "HTTP/1.0":
Greg Steindd6eefb2000-07-18 09:09:48 +0000409 self.version = 10
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000410 elif version.startswith("HTTP/1."):
Tim Peters07e99cb2001-01-14 23:47:14 +0000411 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000412 elif version == "HTTP/0.9":
Jeremy Hylton110941a2000-10-12 19:58:36 +0000413 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000414 else:
415 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000416
Jeremy Hylton110941a2000-10-12 19:58:36 +0000417 if self.version == 9:
Georg Brandl0aade9a2005-06-26 22:06:54 +0000418 self.length = None
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000419 self.chunked = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000420 self.will_close = 1
Jeremy Hylton4e7855d2007-08-04 03:34:03 +0000421 self.msg = HTTPMessage(io.BytesIO())
Jeremy Hylton110941a2000-10-12 19:58:36 +0000422 return
423
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000424 self.msg = HTTPMessage(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000425 if self.debuglevel > 0:
426 for hdr in self.msg.headers:
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000427 print("header:", hdr, end=" ")
Greg Stein5e0fa402000-06-26 08:28:01 +0000428
Greg Steindd6eefb2000-07-18 09:09:48 +0000429 # don't let the msg keep an fp
430 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000431
Greg Steindd6eefb2000-07-18 09:09:48 +0000432 # are we using the chunked-style of transfer encoding?
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000433 tr_enc = self.msg.getheader("transfer-encoding")
Jeremy Hyltond229b3a2002-09-03 19:24:24 +0000434 if tr_enc and tr_enc.lower() == "chunked":
Greg Steindd6eefb2000-07-18 09:09:48 +0000435 self.chunked = 1
436 self.chunk_left = None
437 else:
438 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000439
Greg Steindd6eefb2000-07-18 09:09:48 +0000440 # will the connection close at the end of the response?
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000441 self.will_close = self._check_close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000442
Greg Steindd6eefb2000-07-18 09:09:48 +0000443 # do we have a Content-Length?
444 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000445 self.length = None
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000446 length = self.msg.getheader("content-length")
Greg Steindd6eefb2000-07-18 09:09:48 +0000447 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000448 try:
449 self.length = int(length)
450 except ValueError:
Christian Heimesa612dc02008-02-24 13:08:18 +0000451 self.length = None
452 else:
453 if self.length < 0: # ignore nonsensical negative lengths
454 self.length = None
455 else:
456 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000457
Greg Steindd6eefb2000-07-18 09:09:48 +0000458 # does the body have a fixed length? (of zero)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000459 if (status == NO_CONTENT or status == NOT_MODIFIED or
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000460 100 <= status < 200 or # 1xx codes
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000461 self._method == "HEAD"):
Greg Steindd6eefb2000-07-18 09:09:48 +0000462 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000463
Greg Steindd6eefb2000-07-18 09:09:48 +0000464 # if the connection remains open, and we aren't using chunked, and
465 # a content-length was not provided, then assume that the connection
466 # WILL close.
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000467 if (not self.will_close and
468 not self.chunked and
469 self.length is None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000470 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000471
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000472 def _check_close(self):
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000473 conn = self.msg.getheader("connection")
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000474 if self.version == 11:
475 # An HTTP/1.1 proxy is assumed to stay open unless
476 # explicitly closed.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000477 conn = self.msg.getheader("connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000478 if conn and "close" in conn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000479 return True
480 return False
481
Jeremy Hylton2c178252004-08-07 16:28:14 +0000482 # Some HTTP/1.0 implementations have support for persistent
483 # connections, using rules different than HTTP/1.1.
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000484
Christian Heimes895627f2007-12-08 17:28:33 +0000485 # For older HTTP, Keep-Alive indicates persistent connection.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000486 if self.msg.getheader("keep-alive"):
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000487 return False
Tim Peters77c06fb2002-11-24 02:35:35 +0000488
Jeremy Hylton2c178252004-08-07 16:28:14 +0000489 # At least Akamai returns a "Connection: Keep-Alive" header,
490 # which was supposed to be sent by the client.
491 if conn and "keep-alive" in conn.lower():
492 return False
493
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000494 # Proxy-Connection is a netscape hack.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000495 pconn = self.msg.getheader("proxy-connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000496 if pconn and "keep-alive" in pconn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000497 return False
498
499 # otherwise, assume it will close
500 return True
501
Greg Steindd6eefb2000-07-18 09:09:48 +0000502 def close(self):
503 if self.fp:
504 self.fp.close()
505 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000506
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000507 # These implementations are for the benefit of io.BufferedReader.
508
509 # XXX This class should probably be revised to act more like
510 # the "raw stream" that BufferedReader expects.
511
512 @property
513 def closed(self):
514 return self.isclosed()
515
516 def flush(self):
517 self.fp.flush()
518
519 # End of "raw stream" methods
520
Greg Steindd6eefb2000-07-18 09:09:48 +0000521 def isclosed(self):
522 # NOTE: it is possible that we will not ever call self.close(). This
523 # case occurs when will_close is TRUE, length is None, and we
524 # read up to the last byte, but NOT past it.
525 #
526 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
527 # called, meaning self.isclosed() is meaningful.
528 return self.fp is None
529
Jeremy Hylton2c178252004-08-07 16:28:14 +0000530 # XXX It would be nice to have readline and __iter__ for this, too.
531
Greg Steindd6eefb2000-07-18 09:09:48 +0000532 def read(self, amt=None):
533 if self.fp is None:
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000534 return b""
Greg Steindd6eefb2000-07-18 09:09:48 +0000535
536 if self.chunked:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000537 return self._read_chunked(amt)
Tim Peters230a60c2002-11-09 05:08:07 +0000538
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000539 if amt is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000540 # unbounded read
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000541 if self.length is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000542 s = self.fp.read()
543 else:
544 s = self._safe_read(self.length)
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000545 self.length = 0
Tim Peters07e99cb2001-01-14 23:47:14 +0000546 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000547 return s
548
549 if self.length is not None:
550 if amt > self.length:
551 # clip the read to the "end of response"
552 amt = self.length
Greg Steindd6eefb2000-07-18 09:09:48 +0000553
554 # we do not use _safe_read() here because this may be a .will_close
555 # connection, and the user is reading more bytes than will be provided
556 # (for example, reading in 1k chunks)
557 s = self.fp.read(amt)
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000558 if self.length is not None:
559 self.length -= len(s)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000560 if not self.length:
561 self.close()
Greg Steindd6eefb2000-07-18 09:09:48 +0000562 return s
563
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000564 def _read_chunked(self, amt):
565 assert self.chunked != _UNKNOWN
566 chunk_left = self.chunk_left
Georg Brandl95ba4692008-01-26 09:45:58 +0000567 value = b""
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000568
569 # XXX This accumulates chunks by repeated string concatenation,
570 # which is not efficient as the number or size of chunks gets big.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000571 while True:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000572 if chunk_left is None:
573 line = self.fp.readline()
Georg Brandl95ba4692008-01-26 09:45:58 +0000574 i = line.find(b";")
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000575 if i >= 0:
576 line = line[:i] # strip chunk-extensions
Christian Heimesa612dc02008-02-24 13:08:18 +0000577 try:
578 chunk_left = int(line, 16)
579 except ValueError:
580 # close the connection as protocol synchronisation is
581 # probably lost
582 self.close()
583 raise IncompleteRead(value)
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000584 if chunk_left == 0:
585 break
586 if amt is None:
587 value += self._safe_read(chunk_left)
588 elif amt < chunk_left:
589 value += self._safe_read(amt)
590 self.chunk_left = chunk_left - amt
591 return value
592 elif amt == chunk_left:
593 value += self._safe_read(amt)
594 self._safe_read(2) # toss the CRLF at the end of the chunk
595 self.chunk_left = None
596 return value
597 else:
598 value += self._safe_read(chunk_left)
599 amt -= chunk_left
600
601 # we read the whole chunk, get another
602 self._safe_read(2) # toss the CRLF at the end of the chunk
603 chunk_left = None
604
605 # read and discard trailer up to the CRLF terminator
606 ### note: we shouldn't have any trailers!
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000607 while True:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000608 line = self.fp.readline()
Christian Heimes0bd4e112008-02-12 22:59:25 +0000609 if not line:
610 # a vanishingly small number of sites EOF without
611 # sending the trailer
612 break
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000613 if line == b"\r\n":
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000614 break
615
616 # we read everything; close the "file"
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000617 self.close()
618
619 return value
Tim Peters230a60c2002-11-09 05:08:07 +0000620
Greg Steindd6eefb2000-07-18 09:09:48 +0000621 def _safe_read(self, amt):
622 """Read the number of bytes requested, compensating for partial reads.
623
624 Normally, we have a blocking socket, but a read() can be interrupted
625 by a signal (resulting in a partial read).
626
627 Note that we cannot distinguish between EOF and an interrupt when zero
628 bytes have been read. IncompleteRead() will be raised in this
629 situation.
630
631 This function should be used when <amt> bytes "should" be present for
632 reading. If the bytes are truly not available (due to EOF), then the
633 IncompleteRead exception can be used to detect the problem.
634 """
Georg Brandl80ba8e82005-09-29 20:16:07 +0000635 s = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000636 while amt > 0:
Georg Brandl80ba8e82005-09-29 20:16:07 +0000637 chunk = self.fp.read(min(amt, MAXAMOUNT))
Greg Steindd6eefb2000-07-18 09:09:48 +0000638 if not chunk:
639 raise IncompleteRead(s)
Georg Brandl80ba8e82005-09-29 20:16:07 +0000640 s.append(chunk)
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000641 amt -= len(chunk)
Guido van Rossuma00f1232007-09-12 19:43:09 +0000642 return b"".join(s)
Greg Steindd6eefb2000-07-18 09:09:48 +0000643
644 def getheader(self, name, default=None):
645 if self.msg is None:
646 raise ResponseNotReady()
647 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000648
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000649 def getheaders(self):
650 """Return list of (header, value) tuples."""
651 if self.msg is None:
652 raise ResponseNotReady()
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000653 return list(self.msg.items())
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000654
Greg Stein5e0fa402000-06-26 08:28:01 +0000655
656class HTTPConnection:
657
Greg Steindd6eefb2000-07-18 09:09:48 +0000658 _http_vsn = 11
659 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000660
Greg Steindd6eefb2000-07-18 09:09:48 +0000661 response_class = HTTPResponse
662 default_port = HTTP_PORT
663 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000664 debuglevel = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000665 strict = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000666
Guido van Rossumd8faa362007-04-27 19:54:29 +0000667 def __init__(self, host, port=None, strict=None, timeout=None):
668 self.timeout = timeout
Greg Steindd6eefb2000-07-18 09:09:48 +0000669 self.sock = None
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000670 self._buffer = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000671 self.__response = None
672 self.__state = _CS_IDLE
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000673 self._method = None
Tim Petersc411dba2002-07-16 21:35:23 +0000674
Greg Steindd6eefb2000-07-18 09:09:48 +0000675 self._set_hostport(host, port)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000676 if strict is not None:
677 self.strict = strict
Greg Stein5e0fa402000-06-26 08:28:01 +0000678
Greg Steindd6eefb2000-07-18 09:09:48 +0000679 def _set_hostport(self, host, port):
680 if port is None:
Skip Montanaro10e6e0e2004-09-14 16:32:02 +0000681 i = host.rfind(':')
Skip Montanarocae14d22004-09-14 17:55:21 +0000682 j = host.rfind(']') # ipv6 addresses have [...]
683 if i > j:
Skip Montanaro9d389972002-03-24 16:53:50 +0000684 try:
685 port = int(host[i+1:])
686 except ValueError:
Jeremy Hyltonfbd79942002-07-02 20:19:08 +0000687 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000688 host = host[:i]
689 else:
690 port = self.default_port
Raymond Hettinger4d037912004-10-14 15:23:38 +0000691 if host and host[0] == '[' and host[-1] == ']':
Brett Cannon0a1af4a2004-09-15 23:26:23 +0000692 host = host[1:-1]
Greg Steindd6eefb2000-07-18 09:09:48 +0000693 self.host = host
694 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000695
Jeremy Hylton30f86742000-09-18 22:50:38 +0000696 def set_debuglevel(self, level):
697 self.debuglevel = level
698
Greg Steindd6eefb2000-07-18 09:09:48 +0000699 def connect(self):
700 """Connect to the host and port specified in __init__."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000701 self.sock = socket.create_connection((self.host,self.port),
702 self.timeout)
Greg Stein5e0fa402000-06-26 08:28:01 +0000703
Greg Steindd6eefb2000-07-18 09:09:48 +0000704 def close(self):
705 """Close the connection to the HTTP server."""
706 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000707 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000708 self.sock = None
709 if self.__response:
710 self.__response.close()
711 self.__response = None
712 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000713
Greg Steindd6eefb2000-07-18 09:09:48 +0000714 def send(self, str):
715 """Send `str' to the server."""
716 if self.sock is None:
717 if self.auto_open:
718 self.connect()
719 else:
720 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000721
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000722 # send the data to the server. if we get a broken pipe, then close
Greg Steindd6eefb2000-07-18 09:09:48 +0000723 # the socket. we want to reconnect when somebody tries to send again.
724 #
725 # NOTE: we DO propagate the error, though, because we cannot simply
726 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000727 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000728 print("send:", repr(str))
Greg Steindd6eefb2000-07-18 09:09:48 +0000729 try:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000730 blocksize=8192
731 if hasattr(str,'read') :
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000732 if self.debuglevel > 0: print("sendIng a read()able")
Thomas Wouters89f507f2006-12-13 04:49:30 +0000733 data=str.read(blocksize)
734 while data:
735 self.sock.sendall(data)
736 data=str.read(blocksize)
737 else:
738 self.sock.sendall(str)
Guido van Rossumb940e112007-01-10 16:19:56 +0000739 except socket.error as v:
Guido van Rossum89df2452007-03-19 22:26:27 +0000740 if v.args[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000741 self.close()
742 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000743
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000744 def _output(self, s):
745 """Add a line of output to the current request buffer.
Tim Peters469cdad2002-08-08 20:19:19 +0000746
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000747 Assumes that the line does *not* end with \\r\\n.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000748 """
749 self._buffer.append(s)
750
751 def _send_output(self):
752 """Send the currently buffered request and clear the buffer.
753
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000754 Appends an extra \\r\\n to the buffer.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000755 """
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000756 self._buffer.extend((b"", b""))
757 msg = b"\r\n".join(self._buffer)
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000758 del self._buffer[:]
759 self.send(msg)
760
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000761 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000762 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000763
Greg Steindd6eefb2000-07-18 09:09:48 +0000764 `method' specifies an HTTP request method, e.g. 'GET'.
765 `url' specifies the object being requested, e.g. '/index.html'.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000766 `skip_host' if True does not add automatically a 'Host:' header
767 `skip_accept_encoding' if True does not add automatically an
768 'Accept-Encoding:' header
Greg Steindd6eefb2000-07-18 09:09:48 +0000769 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000770
Greg Stein616a58d2003-06-24 06:35:19 +0000771 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000772 if self.__response and self.__response.isclosed():
773 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000774
Tim Peters58eb11c2004-01-18 20:29:55 +0000775
Greg Steindd6eefb2000-07-18 09:09:48 +0000776 # in certain cases, we cannot issue another request on this connection.
777 # this occurs when:
778 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
779 # 2) a response to a previous request has signalled that it is going
780 # to close the connection upon completion.
781 # 3) the headers for the previous response have not been read, thus
782 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
783 #
784 # if there is no prior response, then we can request at will.
785 #
786 # if point (2) is true, then we will have passed the socket to the
787 # response (effectively meaning, "there is no prior response"), and
788 # will open a new one when a new request is made.
789 #
790 # Note: if a prior response exists, then we *can* start a new request.
791 # We are not allowed to begin fetching the response to this new
792 # request, however, until that prior response is complete.
793 #
794 if self.__state == _CS_IDLE:
795 self.__state = _CS_REQ_STARTED
796 else:
797 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000798
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000799 # Save the method we use, we need it later in the response phase
800 self._method = method
Greg Steindd6eefb2000-07-18 09:09:48 +0000801 if not url:
802 url = '/'
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000803 request = '%s %s %s' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000804
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000805 # Non-ASCII characters should have been eliminated earlier
806 self._output(request.encode('ascii'))
Greg Stein5e0fa402000-06-26 08:28:01 +0000807
Greg Steindd6eefb2000-07-18 09:09:48 +0000808 if self._http_vsn == 11:
809 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000810
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000811 if not skip_host:
812 # this header is issued *only* for HTTP/1.1
813 # connections. more specifically, this means it is
814 # only issued when the client uses the new
815 # HTTPConnection() class. backwards-compat clients
816 # will be using HTTP/1.0 and those clients may be
817 # issuing this header themselves. we should NOT issue
818 # it twice; some web servers (such as Apache) barf
819 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000820
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000821 # If we need a non-standard port,include it in the
822 # header. If the request is going through a proxy,
823 # but the host of the actual URL, not the host of the
824 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +0000825
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000826 netloc = ''
827 if url.startswith('http'):
828 nil, netloc, nil, nil, nil = urlsplit(url)
829
830 if netloc:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000831 try:
832 netloc_enc = netloc.encode("ascii")
833 except UnicodeEncodeError:
834 netloc_enc = netloc.encode("idna")
835 self.putheader('Host', netloc_enc)
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000836 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000837 try:
838 host_enc = self.host.encode("ascii")
839 except UnicodeEncodeError:
840 host_enc = self.host.encode("idna")
841 if self.port == HTTP_PORT:
842 self.putheader('Host', host_enc)
843 else:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000844 host_enc = host_enc.decode("ascii")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000845 self.putheader('Host', "%s:%s" % (host_enc, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000846
Greg Steindd6eefb2000-07-18 09:09:48 +0000847 # note: we are assuming that clients will not attempt to set these
848 # headers since *this* library must deal with the
849 # consequences. this also means that when the supporting
850 # libraries are updated to recognize other forms, then this
851 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000852
Greg Steindd6eefb2000-07-18 09:09:48 +0000853 # we only want a Content-Encoding of "identity" since we don't
854 # support encodings such as x-gzip or x-deflate.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000855 if not skip_accept_encoding:
856 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000857
Greg Steindd6eefb2000-07-18 09:09:48 +0000858 # we can accept "chunked" Transfer-Encodings, but no others
859 # NOTE: no TE header implies *only* "chunked"
860 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000861
Greg Steindd6eefb2000-07-18 09:09:48 +0000862 # if TE is supplied in the header, then it must appear in a
863 # Connection header.
864 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000865
Greg Steindd6eefb2000-07-18 09:09:48 +0000866 else:
867 # For HTTP/1.0, the server will assume "not chunked"
868 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000869
Greg Steindd6eefb2000-07-18 09:09:48 +0000870 def putheader(self, header, value):
871 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000872
Greg Steindd6eefb2000-07-18 09:09:48 +0000873 For example: h.putheader('Accept', 'text/html')
874 """
875 if self.__state != _CS_REQ_STARTED:
876 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000877
Guido van Rossum98297ee2007-11-06 21:34:58 +0000878 if hasattr(header, 'encode'):
879 header = header.encode('ascii')
880 if hasattr(value, 'encode'):
881 value = value.encode('ascii')
882 header = header + b': ' + value
883 self._output(header)
Greg Stein5e0fa402000-06-26 08:28:01 +0000884
Greg Steindd6eefb2000-07-18 09:09:48 +0000885 def endheaders(self):
886 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000887
Greg Steindd6eefb2000-07-18 09:09:48 +0000888 if self.__state == _CS_REQ_STARTED:
889 self.__state = _CS_REQ_SENT
890 else:
891 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000892
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000893 self._send_output()
Greg Stein5e0fa402000-06-26 08:28:01 +0000894
Greg Steindd6eefb2000-07-18 09:09:48 +0000895 def request(self, method, url, body=None, headers={}):
896 """Send a complete request to the server."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000897 try:
898 self._send_request(method, url, body, headers)
Guido van Rossumb940e112007-01-10 16:19:56 +0000899 except socket.error as v:
Greg Steindd6eefb2000-07-18 09:09:48 +0000900 # trap 'Broken pipe' if we're allowed to automatically reconnect
Guido van Rossum89df2452007-03-19 22:26:27 +0000901 if v.args[0] != 32 or not self.auto_open:
Greg Steindd6eefb2000-07-18 09:09:48 +0000902 raise
903 # try one more time
904 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000905
Greg Steindd6eefb2000-07-18 09:09:48 +0000906 def _send_request(self, method, url, body, headers):
Jeremy Hylton2c178252004-08-07 16:28:14 +0000907 # honour explicitly requested Host: and Accept-Encoding headers
908 header_names = dict.fromkeys([k.lower() for k in headers])
909 skips = {}
910 if 'host' in header_names:
911 skips['skip_host'] = 1
912 if 'accept-encoding' in header_names:
913 skips['skip_accept_encoding'] = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000914
Jeremy Hylton2c178252004-08-07 16:28:14 +0000915 self.putrequest(method, url, **skips)
916
917 if body and ('content-length' not in header_names):
Jeremy Hylton4b878bd2007-08-10 18:49:01 +0000918 thelen = None
Thomas Wouters89f507f2006-12-13 04:49:30 +0000919 try:
Jeremy Hylton4b878bd2007-08-10 18:49:01 +0000920 thelen = str(len(body))
Guido van Rossumb940e112007-01-10 16:19:56 +0000921 except TypeError as te:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000922 # If this is a file-like object, try to
923 # fstat its file descriptor
924 import os
925 try:
926 thelen = str(os.fstat(body.fileno()).st_size)
927 except (AttributeError, OSError):
928 # Don't send a length if this failed
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000929 if self.debuglevel > 0: print("Cannot stat!!")
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000930
Thomas Wouters89f507f2006-12-13 04:49:30 +0000931 if thelen is not None:
932 self.putheader('Content-Length',thelen)
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000933 for hdr, value in headers.items():
Greg Steindd6eefb2000-07-18 09:09:48 +0000934 self.putheader(hdr, value)
935 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000936
Greg Steindd6eefb2000-07-18 09:09:48 +0000937 if body:
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000938 if isinstance(body, str): body = body.encode('ascii')
Greg Steindd6eefb2000-07-18 09:09:48 +0000939 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000940
Greg Steindd6eefb2000-07-18 09:09:48 +0000941 def getresponse(self):
Jeremy Hyltonfb35f652007-08-03 20:30:33 +0000942 """Get the response from the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000943
Greg Stein616a58d2003-06-24 06:35:19 +0000944 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000945 if self.__response and self.__response.isclosed():
946 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000947
Greg Steindd6eefb2000-07-18 09:09:48 +0000948 #
949 # if a prior response exists, then it must be completed (otherwise, we
950 # cannot read this response's header to determine the connection-close
951 # behavior)
952 #
953 # note: if a prior response existed, but was connection-close, then the
954 # socket and response were made independent of this HTTPConnection
955 # object since a new request requires that we open a whole new
956 # connection
957 #
958 # this means the prior response had one of two states:
959 # 1) will_close: this connection was reset and the prior socket and
960 # response operate independently
961 # 2) persistent: the response was retained and we await its
962 # isclosed() status to become true.
963 #
964 if self.__state != _CS_REQ_SENT or self.__response:
965 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000966
Jeremy Hylton30f86742000-09-18 22:50:38 +0000967 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000968 response = self.response_class(self.sock, self.debuglevel,
Tim Petersc2659cf2003-05-12 20:19:37 +0000969 strict=self.strict,
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000970 method=self._method)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000971 else:
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000972 response = self.response_class(self.sock, strict=self.strict,
973 method=self._method)
Greg Stein5e0fa402000-06-26 08:28:01 +0000974
Jeremy Hylton39c03802002-07-12 14:04:09 +0000975 response.begin()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000976 assert response.will_close != _UNKNOWN
Greg Steindd6eefb2000-07-18 09:09:48 +0000977 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000978
Greg Steindd6eefb2000-07-18 09:09:48 +0000979 if response.will_close:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000980 # this effectively passes the connection to the response
981 self.close()
Greg Steindd6eefb2000-07-18 09:09:48 +0000982 else:
983 # remember this, so we can tell when it is complete
984 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000985
Greg Steindd6eefb2000-07-18 09:09:48 +0000986 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000987
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000988try:
989 import ssl
990except ImportError:
991 pass
992else:
993 class HTTPSConnection(HTTPConnection):
994 "This class allows communication via SSL."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000995
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000996 default_port = HTTPS_PORT
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000997
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000998 def __init__(self, host, port=None, key_file=None, cert_file=None,
999 strict=None, timeout=None):
1000 HTTPConnection.__init__(self, host, port, strict, timeout)
1001 self.key_file = key_file
1002 self.cert_file = cert_file
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001003
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001004 def connect(self):
1005 "Connect to a host on a given (SSL) port."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001006
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001007 sock = socket.create_connection((self.host, self.port), self.timeout)
Thomas Wouters1b7f8912007-09-19 03:06:30 +00001008 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001009
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001010
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001011 def FakeSocket (sock, sslobj):
Thomas Wouters89d996e2007-09-08 17:39:28 +00001012 warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
Thomas Wouters1b7f8912007-09-19 03:06:30 +00001013 "Use the result of ssl.wrap_socket() directly instead.",
Thomas Wouters89d996e2007-09-08 17:39:28 +00001014 DeprecationWarning, stacklevel=2)
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001015 return sslobj
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001016
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001017 __all__.append("HTTPSConnection")
Greg Stein5e0fa402000-06-26 08:28:01 +00001018
Greg Stein5e0fa402000-06-26 08:28:01 +00001019class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001020 # Subclasses that define an __init__ must call Exception.__init__
1021 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +00001022 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001023
1024class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001025 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001026
Skip Montanaro9d389972002-03-24 16:53:50 +00001027class InvalidURL(HTTPException):
1028 pass
1029
Greg Stein5e0fa402000-06-26 08:28:01 +00001030class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001031 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001032 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001033 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001034
1035class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001036 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001037
Greg Stein5e0fa402000-06-26 08:28:01 +00001038class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001039 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001040
1041class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001042 def __init__(self, partial):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001043 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001044 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +00001045
1046class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001047 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001048
1049class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001050 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001051
1052class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001053 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001054
1055class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001056 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001057
1058class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001059 def __init__(self, line):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001060 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001061 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001062
1063# for backwards compatibility
1064error = HTTPException
1065
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001066class LineAndFileWrapper:
1067 """A limited file-like object for HTTP/0.9 responses."""
1068
1069 # The status-line parsing code calls readline(), which normally
1070 # get the HTTP status line. For a 0.9 response, however, this is
1071 # actually the first line of the body! Clients need to get a
1072 # readable file object that contains that line.
1073
1074 def __init__(self, line, file):
1075 self._line = line
1076 self._file = file
1077 self._line_consumed = 0
1078 self._line_offset = 0
1079 self._line_left = len(line)
1080
1081 def __getattr__(self, attr):
1082 return getattr(self._file, attr)
1083
1084 def _done(self):
1085 # called when the last byte is read from the line. After the
1086 # call, all read methods are delegated to the underlying file
Skip Montanaro74b9a7a2003-02-25 17:48:15 +00001087 # object.
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001088 self._line_consumed = 1
1089 self.read = self._file.read
1090 self.readline = self._file.readline
1091 self.readlines = self._file.readlines
1092
1093 def read(self, amt=None):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001094 if self._line_consumed:
1095 return self._file.read(amt)
1096 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001097 if amt is None or amt > self._line_left:
1098 s = self._line[self._line_offset:]
1099 self._done()
1100 if amt is None:
1101 return s + self._file.read()
1102 else:
Tim Petersc411dba2002-07-16 21:35:23 +00001103 return s + self._file.read(amt - len(s))
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001104 else:
1105 assert amt <= self._line_left
1106 i = self._line_offset
1107 j = i + amt
1108 s = self._line[i:j]
1109 self._line_offset = j
1110 self._line_left -= amt
1111 if self._line_left == 0:
1112 self._done()
1113 return s
Tim Petersc411dba2002-07-16 21:35:23 +00001114
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001115 def readline(self):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001116 if self._line_consumed:
1117 return self._file.readline()
1118 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001119 s = self._line[self._line_offset:]
1120 self._done()
1121 return s
1122
1123 def readlines(self, size=None):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001124 if self._line_consumed:
1125 return self._file.readlines(size)
1126 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001127 L = [self._line[self._line_offset:]]
1128 self._done()
1129 if size is None:
1130 return L + self._file.readlines()
1131 else:
1132 return L + self._file.readlines(size)