blob: c6e40e1cbb8d1458bb3242af795aa7b42582c3ea [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006HTTPConnection goes through a number of "states", which define when a client
Greg Stein5e0fa402000-06-26 08:28:01 +00007may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Jeremy Hyltonfb35f652007-08-03 20:30:33 +000069import io
Guido van Rossum65ab98c1995-08-07 20:13:02 +000070import mimetools
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000071import socket
Jeremy Hylton8acf1e02002-03-08 19:35:51 +000072from urlparse import urlsplit
Thomas Wouters89d996e2007-09-08 17:39:28 +000073import warnings
Guido van Rossum23acc951994-02-21 16:36:04 +000074
Thomas Wouters47b49bf2007-08-30 22:15:33 +000075__all__ = ["HTTPResponse", "HTTPConnection",
Skip Montanaro951a8842001-06-01 16:25:38 +000076 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000077 "UnknownTransferEncoding", "UnimplementedFileMode",
78 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
79 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
Georg Brandl6aab16e2006-02-17 19:17:25 +000080 "BadStatusLine", "error", "responses"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000081
Guido van Rossum23acc951994-02-21 16:36:04 +000082HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000083HTTPS_PORT = 443
84
Greg Stein5e0fa402000-06-26 08:28:01 +000085_UNKNOWN = 'UNKNOWN'
86
87# connection states
88_CS_IDLE = 'Idle'
89_CS_REQ_STARTED = 'Request-started'
90_CS_REQ_SENT = 'Request-sent'
91
Martin v. Löwis39a31782004-09-18 09:03:49 +000092# status codes
93# informational
94CONTINUE = 100
95SWITCHING_PROTOCOLS = 101
96PROCESSING = 102
97
98# successful
99OK = 200
100CREATED = 201
101ACCEPTED = 202
102NON_AUTHORITATIVE_INFORMATION = 203
103NO_CONTENT = 204
104RESET_CONTENT = 205
105PARTIAL_CONTENT = 206
106MULTI_STATUS = 207
107IM_USED = 226
108
109# redirection
110MULTIPLE_CHOICES = 300
111MOVED_PERMANENTLY = 301
112FOUND = 302
113SEE_OTHER = 303
114NOT_MODIFIED = 304
115USE_PROXY = 305
116TEMPORARY_REDIRECT = 307
117
118# client error
119BAD_REQUEST = 400
120UNAUTHORIZED = 401
121PAYMENT_REQUIRED = 402
122FORBIDDEN = 403
123NOT_FOUND = 404
124METHOD_NOT_ALLOWED = 405
125NOT_ACCEPTABLE = 406
126PROXY_AUTHENTICATION_REQUIRED = 407
127REQUEST_TIMEOUT = 408
128CONFLICT = 409
129GONE = 410
130LENGTH_REQUIRED = 411
131PRECONDITION_FAILED = 412
132REQUEST_ENTITY_TOO_LARGE = 413
133REQUEST_URI_TOO_LONG = 414
134UNSUPPORTED_MEDIA_TYPE = 415
135REQUESTED_RANGE_NOT_SATISFIABLE = 416
136EXPECTATION_FAILED = 417
137UNPROCESSABLE_ENTITY = 422
138LOCKED = 423
139FAILED_DEPENDENCY = 424
140UPGRADE_REQUIRED = 426
141
142# server error
143INTERNAL_SERVER_ERROR = 500
144NOT_IMPLEMENTED = 501
145BAD_GATEWAY = 502
146SERVICE_UNAVAILABLE = 503
147GATEWAY_TIMEOUT = 504
148HTTP_VERSION_NOT_SUPPORTED = 505
149INSUFFICIENT_STORAGE = 507
150NOT_EXTENDED = 510
151
Georg Brandl6aab16e2006-02-17 19:17:25 +0000152# Mapping status codes to official W3C names
153responses = {
154 100: 'Continue',
155 101: 'Switching Protocols',
156
157 200: 'OK',
158 201: 'Created',
159 202: 'Accepted',
160 203: 'Non-Authoritative Information',
161 204: 'No Content',
162 205: 'Reset Content',
163 206: 'Partial Content',
164
165 300: 'Multiple Choices',
166 301: 'Moved Permanently',
167 302: 'Found',
168 303: 'See Other',
169 304: 'Not Modified',
170 305: 'Use Proxy',
171 306: '(Unused)',
172 307: 'Temporary Redirect',
173
174 400: 'Bad Request',
175 401: 'Unauthorized',
176 402: 'Payment Required',
177 403: 'Forbidden',
178 404: 'Not Found',
179 405: 'Method Not Allowed',
180 406: 'Not Acceptable',
181 407: 'Proxy Authentication Required',
182 408: 'Request Timeout',
183 409: 'Conflict',
184 410: 'Gone',
185 411: 'Length Required',
186 412: 'Precondition Failed',
187 413: 'Request Entity Too Large',
188 414: 'Request-URI Too Long',
189 415: 'Unsupported Media Type',
190 416: 'Requested Range Not Satisfiable',
191 417: 'Expectation Failed',
192
193 500: 'Internal Server Error',
194 501: 'Not Implemented',
195 502: 'Bad Gateway',
196 503: 'Service Unavailable',
197 504: 'Gateway Timeout',
198 505: 'HTTP Version Not Supported',
199}
200
Georg Brandl80ba8e82005-09-29 20:16:07 +0000201# maximal amount of data to read at one time in _safe_read
202MAXAMOUNT = 1048576
203
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000204class HTTPMessage(mimetools.Message):
205
206 def addheader(self, key, value):
207 """Add header for field key handling repeats."""
208 prev = self.dict.get(key)
209 if prev is None:
210 self.dict[key] = value
211 else:
212 combined = ", ".join((prev, value))
213 self.dict[key] = combined
214
215 def addcontinue(self, key, more):
216 """Add more field data from a continuation line."""
217 prev = self.dict[key]
218 self.dict[key] = prev + "\n " + more
219
220 def readheaders(self):
221 """Read header lines.
222
223 Read header lines up to the entirely blank line that terminates them.
224 The (normally blank) line that ends the headers is skipped, but not
225 included in the returned list. If a non-header line ends the headers,
226 (which is an error), an attempt is made to backspace over it; it is
227 never included in the returned list.
228
229 The variable self.status is set to the empty string if all went well,
230 otherwise it is an error message. The variable self.headers is a
231 completely uninterpreted list of lines contained in the header (so
232 printing them will reproduce the header exactly as it appears in the
233 file).
234
235 If multiple header fields with the same name occur, they are combined
236 according to the rules in RFC 2616 sec 4.2:
237
238 Appending each subsequent field-value to the first, each separated
239 by a comma. The order in which header fields with the same field-name
240 are received is significant to the interpretation of the combined
241 field value.
242 """
243 # XXX The implementation overrides the readheaders() method of
244 # rfc822.Message. The base class design isn't amenable to
245 # customized behavior here so the method here is a copy of the
246 # base class code with a few small changes.
247
248 self.dict = {}
249 self.unixfrom = ''
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000250 self.headers = hlist = []
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000251 self.status = ''
252 headerseen = ""
253 firstline = 1
254 startofline = unread = tell = None
255 if hasattr(self.fp, 'unread'):
256 unread = self.fp.unread
257 elif self.seekable:
258 tell = self.fp.tell
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000259 while True:
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000260 if tell:
261 try:
262 startofline = tell()
263 except IOError:
264 startofline = tell = None
265 self.seekable = 0
Jeremy Hylton811fc142007-08-03 13:30:02 +0000266 line = str(self.fp.readline(), "iso-8859-1")
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000267 if not line:
268 self.status = 'EOF in headers'
269 break
270 # Skip unix From name time lines
271 if firstline and line.startswith('From '):
272 self.unixfrom = self.unixfrom + line
273 continue
274 firstline = 0
275 if headerseen and line[0] in ' \t':
276 # XXX Not sure if continuation lines are handled properly
277 # for http and/or for repeating headers
278 # It's a continuation line.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000279 hlist.append(line)
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000280 self.addcontinue(headerseen, line.strip())
281 continue
282 elif self.iscomment(line):
283 # It's a comment. Ignore it.
284 continue
285 elif self.islast(line):
286 # Note! No pushback here! The delimiter line gets eaten.
287 break
288 headerseen = self.isheader(line)
289 if headerseen:
290 # It's a legal header line, save it.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000291 hlist.append(line)
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000292 self.addheader(headerseen, line[len(headerseen)+1:].strip())
293 continue
294 else:
295 # It's not a header line; throw it back and stop here.
296 if not self.dict:
297 self.status = 'No headers'
298 else:
299 self.status = 'Non-header line where header expected'
300 # Try to undo the read.
301 if unread:
302 unread(line)
303 elif tell:
304 self.fp.seek(startofline)
305 else:
306 self.status = self.status + '; bad seek'
307 break
Greg Stein5e0fa402000-06-26 08:28:01 +0000308
Jeremy Hylton97043c32007-08-04 02:34:24 +0000309class HTTPResponse:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000310
311 # strict: If true, raise BadStatusLine if the status line can't be
312 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
Skip Montanaro186bec22002-07-25 16:10:38 +0000313 # false because it prevents clients from talking to HTTP/0.9
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000314 # servers. Note that a response with a sufficiently corrupted
315 # status line will look like an HTTP/0.9 response.
316
317 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
318
Jeremy Hylton811fc142007-08-03 13:30:02 +0000319 # The bytes from the socket object are iso-8859-1 strings.
320 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
321 # text following RFC 2047. The basic status line parsing only
322 # accepts iso-8859-1.
323
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000324 def __init__(self, sock, debuglevel=0, strict=0, method=None):
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000325 # XXX If the response includes a content-length header, we
326 # need to make sure that the client doesn't read more than the
327 # specified number of bytes. If it does, it will block until
328 # the server times out and closes the connection. (The only
329 # applies to HTTP/1.1 connections.) Since some clients access
330 # self.fp directly rather than calling read(), this is a little
331 # tricky.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000332 self.fp = sock.makefile("rb", 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000333 self.debuglevel = debuglevel
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000334 self.strict = strict
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000335 self._method = method
Greg Stein5e0fa402000-06-26 08:28:01 +0000336
Greg Steindd6eefb2000-07-18 09:09:48 +0000337 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000338
Greg Steindd6eefb2000-07-18 09:09:48 +0000339 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000340 self.version = _UNKNOWN # HTTP-Version
341 self.status = _UNKNOWN # Status-Code
342 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000343
Tim Peters07e99cb2001-01-14 23:47:14 +0000344 self.chunked = _UNKNOWN # is "chunked" being used?
345 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
346 self.length = _UNKNOWN # number of bytes left in response
347 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000348
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000349 def _read_status(self):
Jeremy Hylton04319c72007-08-04 03:41:19 +0000350 # Initialize with Simple-Response defaults.
Jeremy Hylton811fc142007-08-03 13:30:02 +0000351 line = str(self.fp.readline(), "iso-8859-1")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000352 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000353 print("reply:", repr(line))
Jeremy Hyltonb6769522003-06-29 17:55:05 +0000354 if not line:
355 # Presumably, the server closed the connection before
356 # sending a valid response.
357 raise BadStatusLine(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000358 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000359 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000360 except ValueError:
361 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000362 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000363 reason = ""
364 except ValueError:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000365 # empty version will cause next test to fail and status
366 # will be treated as 0.9 response.
367 version = ""
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000368 if not version.startswith("HTTP/"):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000369 if self.strict:
370 self.close()
371 raise BadStatusLine(line)
372 else:
Jeremy Hylton04319c72007-08-04 03:41:19 +0000373 # Assume it's a Simple-Response from an 0.9 server.
374 # We have to convert the first line back to raw bytes
375 # because self.fp.readline() needs to return bytes.
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000376 self.fp = LineAndFileWrapper(bytes(line, "ascii"), self.fp)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000377 return "HTTP/0.9", 200, ""
Greg Stein5e0fa402000-06-26 08:28:01 +0000378
Jeremy Hylton23d40472001-04-13 14:57:08 +0000379 # The status code is a three-digit number
380 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000381 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000382 if status < 100 or status > 999:
383 raise BadStatusLine(line)
384 except ValueError:
385 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000386 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000387
Jeremy Hylton39c03802002-07-12 14:04:09 +0000388 def begin(self):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000389 if self.msg is not None:
390 # we've already started reading the response
391 return
392
393 # read until we get a non-100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000394 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000395 version, status, reason = self._read_status()
Martin v. Löwis39a31782004-09-18 09:03:49 +0000396 if status != CONTINUE:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000397 break
398 # skip the header from the 100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000399 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000400 skip = self.fp.readline().strip()
401 if not skip:
402 break
403 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000404 print("header:", skip)
Tim Petersc411dba2002-07-16 21:35:23 +0000405
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000406 self.status = status
407 self.reason = reason.strip()
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000408 if version == "HTTP/1.0":
Greg Steindd6eefb2000-07-18 09:09:48 +0000409 self.version = 10
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000410 elif version.startswith("HTTP/1."):
Tim Peters07e99cb2001-01-14 23:47:14 +0000411 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000412 elif version == "HTTP/0.9":
Jeremy Hylton110941a2000-10-12 19:58:36 +0000413 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000414 else:
415 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000416
Jeremy Hylton110941a2000-10-12 19:58:36 +0000417 if self.version == 9:
Georg Brandl0aade9a2005-06-26 22:06:54 +0000418 self.length = None
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000419 self.chunked = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000420 self.will_close = 1
Jeremy Hylton4e7855d2007-08-04 03:34:03 +0000421 self.msg = HTTPMessage(io.BytesIO())
Jeremy Hylton110941a2000-10-12 19:58:36 +0000422 return
423
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000424 self.msg = HTTPMessage(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000425 if self.debuglevel > 0:
426 for hdr in self.msg.headers:
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000427 print("header:", hdr, end=" ")
Greg Stein5e0fa402000-06-26 08:28:01 +0000428
Greg Steindd6eefb2000-07-18 09:09:48 +0000429 # don't let the msg keep an fp
430 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000431
Greg Steindd6eefb2000-07-18 09:09:48 +0000432 # are we using the chunked-style of transfer encoding?
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000433 tr_enc = self.msg.getheader("transfer-encoding")
Jeremy Hyltond229b3a2002-09-03 19:24:24 +0000434 if tr_enc and tr_enc.lower() == "chunked":
Greg Steindd6eefb2000-07-18 09:09:48 +0000435 self.chunked = 1
436 self.chunk_left = None
437 else:
438 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000439
Greg Steindd6eefb2000-07-18 09:09:48 +0000440 # will the connection close at the end of the response?
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000441 self.will_close = self._check_close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000442
Greg Steindd6eefb2000-07-18 09:09:48 +0000443 # do we have a Content-Length?
444 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000445 self.length = None
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000446 length = self.msg.getheader("content-length")
Greg Steindd6eefb2000-07-18 09:09:48 +0000447 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000448 try:
449 self.length = int(length)
450 except ValueError:
Christian Heimesa612dc02008-02-24 13:08:18 +0000451 self.length = None
452 else:
453 if self.length < 0: # ignore nonsensical negative lengths
454 self.length = None
455 else:
456 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000457
Greg Steindd6eefb2000-07-18 09:09:48 +0000458 # does the body have a fixed length? (of zero)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000459 if (status == NO_CONTENT or status == NOT_MODIFIED or
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000460 100 <= status < 200 or # 1xx codes
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000461 self._method == "HEAD"):
Greg Steindd6eefb2000-07-18 09:09:48 +0000462 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000463
Greg Steindd6eefb2000-07-18 09:09:48 +0000464 # if the connection remains open, and we aren't using chunked, and
465 # a content-length was not provided, then assume that the connection
466 # WILL close.
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000467 if (not self.will_close and
468 not self.chunked and
469 self.length is None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000470 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000471
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000472 def _check_close(self):
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000473 conn = self.msg.getheader("connection")
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000474 if self.version == 11:
475 # An HTTP/1.1 proxy is assumed to stay open unless
476 # explicitly closed.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000477 conn = self.msg.getheader("connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000478 if conn and "close" in conn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000479 return True
480 return False
481
Jeremy Hylton2c178252004-08-07 16:28:14 +0000482 # Some HTTP/1.0 implementations have support for persistent
483 # connections, using rules different than HTTP/1.1.
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000484
Christian Heimes895627f2007-12-08 17:28:33 +0000485 # For older HTTP, Keep-Alive indicates persistent connection.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000486 if self.msg.getheader("keep-alive"):
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000487 return False
Tim Peters77c06fb2002-11-24 02:35:35 +0000488
Jeremy Hylton2c178252004-08-07 16:28:14 +0000489 # At least Akamai returns a "Connection: Keep-Alive" header,
490 # which was supposed to be sent by the client.
491 if conn and "keep-alive" in conn.lower():
492 return False
493
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000494 # Proxy-Connection is a netscape hack.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000495 pconn = self.msg.getheader("proxy-connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000496 if pconn and "keep-alive" in pconn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000497 return False
498
499 # otherwise, assume it will close
500 return True
501
Greg Steindd6eefb2000-07-18 09:09:48 +0000502 def close(self):
503 if self.fp:
504 self.fp.close()
505 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000506
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000507 # These implementations are for the benefit of io.BufferedReader.
508
509 # XXX This class should probably be revised to act more like
510 # the "raw stream" that BufferedReader expects.
511
512 @property
513 def closed(self):
514 return self.isclosed()
515
516 def flush(self):
517 self.fp.flush()
518
519 # End of "raw stream" methods
520
Greg Steindd6eefb2000-07-18 09:09:48 +0000521 def isclosed(self):
522 # NOTE: it is possible that we will not ever call self.close(). This
523 # case occurs when will_close is TRUE, length is None, and we
524 # read up to the last byte, but NOT past it.
525 #
526 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
527 # called, meaning self.isclosed() is meaningful.
528 return self.fp is None
529
Jeremy Hylton2c178252004-08-07 16:28:14 +0000530 # XXX It would be nice to have readline and __iter__ for this, too.
531
Greg Steindd6eefb2000-07-18 09:09:48 +0000532 def read(self, amt=None):
533 if self.fp is None:
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000534 return b""
Greg Steindd6eefb2000-07-18 09:09:48 +0000535
536 if self.chunked:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000537 return self._read_chunked(amt)
Tim Peters230a60c2002-11-09 05:08:07 +0000538
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000539 if amt is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000540 # unbounded read
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000541 if self.length is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000542 s = self.fp.read()
543 else:
544 s = self._safe_read(self.length)
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000545 self.length = 0
Tim Peters07e99cb2001-01-14 23:47:14 +0000546 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000547 return s
548
549 if self.length is not None:
550 if amt > self.length:
551 # clip the read to the "end of response"
552 amt = self.length
Greg Steindd6eefb2000-07-18 09:09:48 +0000553
554 # we do not use _safe_read() here because this may be a .will_close
555 # connection, and the user is reading more bytes than will be provided
556 # (for example, reading in 1k chunks)
557 s = self.fp.read(amt)
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000558 if self.length is not None:
559 self.length -= len(s)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000560 if not self.length:
561 self.close()
Greg Steindd6eefb2000-07-18 09:09:48 +0000562 return s
563
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000564 def _read_chunked(self, amt):
565 assert self.chunked != _UNKNOWN
566 chunk_left = self.chunk_left
Georg Brandl95ba4692008-01-26 09:45:58 +0000567 value = b""
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000568
569 # XXX This accumulates chunks by repeated string concatenation,
570 # which is not efficient as the number or size of chunks gets big.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000571 while True:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000572 if chunk_left is None:
573 line = self.fp.readline()
Georg Brandl95ba4692008-01-26 09:45:58 +0000574 i = line.find(b";")
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000575 if i >= 0:
576 line = line[:i] # strip chunk-extensions
Christian Heimesa612dc02008-02-24 13:08:18 +0000577 try:
578 chunk_left = int(line, 16)
579 except ValueError:
580 # close the connection as protocol synchronisation is
581 # probably lost
582 self.close()
583 raise IncompleteRead(value)
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000584 if chunk_left == 0:
585 break
586 if amt is None:
587 value += self._safe_read(chunk_left)
588 elif amt < chunk_left:
589 value += self._safe_read(amt)
590 self.chunk_left = chunk_left - amt
591 return value
592 elif amt == chunk_left:
593 value += self._safe_read(amt)
594 self._safe_read(2) # toss the CRLF at the end of the chunk
595 self.chunk_left = None
596 return value
597 else:
598 value += self._safe_read(chunk_left)
599 amt -= chunk_left
600
601 # we read the whole chunk, get another
602 self._safe_read(2) # toss the CRLF at the end of the chunk
603 chunk_left = None
604
605 # read and discard trailer up to the CRLF terminator
606 ### note: we shouldn't have any trailers!
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000607 while True:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000608 line = self.fp.readline()
Christian Heimes0bd4e112008-02-12 22:59:25 +0000609 if not line:
610 # a vanishingly small number of sites EOF without
611 # sending the trailer
612 break
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000613 if line == b"\r\n":
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000614 break
615
616 # we read everything; close the "file"
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000617 self.close()
618
619 return value
Tim Peters230a60c2002-11-09 05:08:07 +0000620
Greg Steindd6eefb2000-07-18 09:09:48 +0000621 def _safe_read(self, amt):
622 """Read the number of bytes requested, compensating for partial reads.
623
624 Normally, we have a blocking socket, but a read() can be interrupted
625 by a signal (resulting in a partial read).
626
627 Note that we cannot distinguish between EOF and an interrupt when zero
628 bytes have been read. IncompleteRead() will be raised in this
629 situation.
630
631 This function should be used when <amt> bytes "should" be present for
632 reading. If the bytes are truly not available (due to EOF), then the
633 IncompleteRead exception can be used to detect the problem.
634 """
Georg Brandl80ba8e82005-09-29 20:16:07 +0000635 s = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000636 while amt > 0:
Georg Brandl80ba8e82005-09-29 20:16:07 +0000637 chunk = self.fp.read(min(amt, MAXAMOUNT))
Greg Steindd6eefb2000-07-18 09:09:48 +0000638 if not chunk:
639 raise IncompleteRead(s)
Georg Brandl80ba8e82005-09-29 20:16:07 +0000640 s.append(chunk)
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000641 amt -= len(chunk)
Guido van Rossuma00f1232007-09-12 19:43:09 +0000642 return b"".join(s)
Greg Steindd6eefb2000-07-18 09:09:48 +0000643
644 def getheader(self, name, default=None):
645 if self.msg is None:
646 raise ResponseNotReady()
647 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000648
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000649 def getheaders(self):
650 """Return list of (header, value) tuples."""
651 if self.msg is None:
652 raise ResponseNotReady()
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000653 return list(self.msg.items())
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000654
Greg Stein5e0fa402000-06-26 08:28:01 +0000655
656class HTTPConnection:
657
Greg Steindd6eefb2000-07-18 09:09:48 +0000658 _http_vsn = 11
659 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000660
Greg Steindd6eefb2000-07-18 09:09:48 +0000661 response_class = HTTPResponse
662 default_port = HTTP_PORT
663 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000664 debuglevel = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000665 strict = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000666
Georg Brandlf78e02b2008-06-10 17:40:04 +0000667 def __init__(self, host, port=None, strict=None,
668 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000669 self.timeout = timeout
Greg Steindd6eefb2000-07-18 09:09:48 +0000670 self.sock = None
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000671 self._buffer = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000672 self.__response = None
673 self.__state = _CS_IDLE
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000674 self._method = None
Tim Petersc411dba2002-07-16 21:35:23 +0000675
Greg Steindd6eefb2000-07-18 09:09:48 +0000676 self._set_hostport(host, port)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000677 if strict is not None:
678 self.strict = strict
Greg Stein5e0fa402000-06-26 08:28:01 +0000679
Greg Steindd6eefb2000-07-18 09:09:48 +0000680 def _set_hostport(self, host, port):
681 if port is None:
Skip Montanaro10e6e0e2004-09-14 16:32:02 +0000682 i = host.rfind(':')
Skip Montanarocae14d22004-09-14 17:55:21 +0000683 j = host.rfind(']') # ipv6 addresses have [...]
684 if i > j:
Skip Montanaro9d389972002-03-24 16:53:50 +0000685 try:
686 port = int(host[i+1:])
687 except ValueError:
Jeremy Hyltonfbd79942002-07-02 20:19:08 +0000688 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000689 host = host[:i]
690 else:
691 port = self.default_port
Raymond Hettinger4d037912004-10-14 15:23:38 +0000692 if host and host[0] == '[' and host[-1] == ']':
Brett Cannon0a1af4a2004-09-15 23:26:23 +0000693 host = host[1:-1]
Greg Steindd6eefb2000-07-18 09:09:48 +0000694 self.host = host
695 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000696
Jeremy Hylton30f86742000-09-18 22:50:38 +0000697 def set_debuglevel(self, level):
698 self.debuglevel = level
699
Greg Steindd6eefb2000-07-18 09:09:48 +0000700 def connect(self):
701 """Connect to the host and port specified in __init__."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000702 self.sock = socket.create_connection((self.host,self.port),
703 self.timeout)
Greg Stein5e0fa402000-06-26 08:28:01 +0000704
Greg Steindd6eefb2000-07-18 09:09:48 +0000705 def close(self):
706 """Close the connection to the HTTP server."""
707 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000708 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000709 self.sock = None
710 if self.__response:
711 self.__response.close()
712 self.__response = None
713 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000714
Greg Steindd6eefb2000-07-18 09:09:48 +0000715 def send(self, str):
716 """Send `str' to the server."""
717 if self.sock is None:
718 if self.auto_open:
719 self.connect()
720 else:
721 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000722
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000723 # send the data to the server. if we get a broken pipe, then close
Greg Steindd6eefb2000-07-18 09:09:48 +0000724 # the socket. we want to reconnect when somebody tries to send again.
725 #
726 # NOTE: we DO propagate the error, though, because we cannot simply
727 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000728 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000729 print("send:", repr(str))
Greg Steindd6eefb2000-07-18 09:09:48 +0000730 try:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000731 blocksize=8192
732 if hasattr(str,'read') :
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000733 if self.debuglevel > 0: print("sendIng a read()able")
Thomas Wouters89f507f2006-12-13 04:49:30 +0000734 data=str.read(blocksize)
735 while data:
736 self.sock.sendall(data)
737 data=str.read(blocksize)
738 else:
739 self.sock.sendall(str)
Guido van Rossumb940e112007-01-10 16:19:56 +0000740 except socket.error as v:
Guido van Rossum89df2452007-03-19 22:26:27 +0000741 if v.args[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000742 self.close()
743 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000744
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000745 def _output(self, s):
746 """Add a line of output to the current request buffer.
Tim Peters469cdad2002-08-08 20:19:19 +0000747
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000748 Assumes that the line does *not* end with \\r\\n.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000749 """
750 self._buffer.append(s)
751
752 def _send_output(self):
753 """Send the currently buffered request and clear the buffer.
754
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000755 Appends an extra \\r\\n to the buffer.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000756 """
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000757 self._buffer.extend((b"", b""))
758 msg = b"\r\n".join(self._buffer)
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000759 del self._buffer[:]
760 self.send(msg)
761
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000762 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000763 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000764
Greg Steindd6eefb2000-07-18 09:09:48 +0000765 `method' specifies an HTTP request method, e.g. 'GET'.
766 `url' specifies the object being requested, e.g. '/index.html'.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000767 `skip_host' if True does not add automatically a 'Host:' header
768 `skip_accept_encoding' if True does not add automatically an
769 'Accept-Encoding:' header
Greg Steindd6eefb2000-07-18 09:09:48 +0000770 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000771
Greg Stein616a58d2003-06-24 06:35:19 +0000772 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000773 if self.__response and self.__response.isclosed():
774 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000775
Tim Peters58eb11c2004-01-18 20:29:55 +0000776
Greg Steindd6eefb2000-07-18 09:09:48 +0000777 # in certain cases, we cannot issue another request on this connection.
778 # this occurs when:
779 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
780 # 2) a response to a previous request has signalled that it is going
781 # to close the connection upon completion.
782 # 3) the headers for the previous response have not been read, thus
783 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
784 #
785 # if there is no prior response, then we can request at will.
786 #
787 # if point (2) is true, then we will have passed the socket to the
788 # response (effectively meaning, "there is no prior response"), and
789 # will open a new one when a new request is made.
790 #
791 # Note: if a prior response exists, then we *can* start a new request.
792 # We are not allowed to begin fetching the response to this new
793 # request, however, until that prior response is complete.
794 #
795 if self.__state == _CS_IDLE:
796 self.__state = _CS_REQ_STARTED
797 else:
798 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000799
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000800 # Save the method we use, we need it later in the response phase
801 self._method = method
Greg Steindd6eefb2000-07-18 09:09:48 +0000802 if not url:
803 url = '/'
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000804 request = '%s %s %s' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000805
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000806 # Non-ASCII characters should have been eliminated earlier
807 self._output(request.encode('ascii'))
Greg Stein5e0fa402000-06-26 08:28:01 +0000808
Greg Steindd6eefb2000-07-18 09:09:48 +0000809 if self._http_vsn == 11:
810 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000811
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000812 if not skip_host:
813 # this header is issued *only* for HTTP/1.1
814 # connections. more specifically, this means it is
815 # only issued when the client uses the new
816 # HTTPConnection() class. backwards-compat clients
817 # will be using HTTP/1.0 and those clients may be
818 # issuing this header themselves. we should NOT issue
819 # it twice; some web servers (such as Apache) barf
820 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000821
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000822 # If we need a non-standard port,include it in the
823 # header. If the request is going through a proxy,
824 # but the host of the actual URL, not the host of the
825 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +0000826
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000827 netloc = ''
828 if url.startswith('http'):
829 nil, netloc, nil, nil, nil = urlsplit(url)
830
831 if netloc:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000832 try:
833 netloc_enc = netloc.encode("ascii")
834 except UnicodeEncodeError:
835 netloc_enc = netloc.encode("idna")
836 self.putheader('Host', netloc_enc)
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000837 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000838 try:
839 host_enc = self.host.encode("ascii")
840 except UnicodeEncodeError:
841 host_enc = self.host.encode("idna")
842 if self.port == HTTP_PORT:
843 self.putheader('Host', host_enc)
844 else:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000845 host_enc = host_enc.decode("ascii")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000846 self.putheader('Host', "%s:%s" % (host_enc, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000847
Greg Steindd6eefb2000-07-18 09:09:48 +0000848 # note: we are assuming that clients will not attempt to set these
849 # headers since *this* library must deal with the
850 # consequences. this also means that when the supporting
851 # libraries are updated to recognize other forms, then this
852 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000853
Greg Steindd6eefb2000-07-18 09:09:48 +0000854 # we only want a Content-Encoding of "identity" since we don't
855 # support encodings such as x-gzip or x-deflate.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000856 if not skip_accept_encoding:
857 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000858
Greg Steindd6eefb2000-07-18 09:09:48 +0000859 # we can accept "chunked" Transfer-Encodings, but no others
860 # NOTE: no TE header implies *only* "chunked"
861 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000862
Greg Steindd6eefb2000-07-18 09:09:48 +0000863 # if TE is supplied in the header, then it must appear in a
864 # Connection header.
865 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000866
Greg Steindd6eefb2000-07-18 09:09:48 +0000867 else:
868 # For HTTP/1.0, the server will assume "not chunked"
869 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000870
Greg Steindd6eefb2000-07-18 09:09:48 +0000871 def putheader(self, header, value):
872 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000873
Greg Steindd6eefb2000-07-18 09:09:48 +0000874 For example: h.putheader('Accept', 'text/html')
875 """
876 if self.__state != _CS_REQ_STARTED:
877 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000878
Guido van Rossum98297ee2007-11-06 21:34:58 +0000879 if hasattr(header, 'encode'):
880 header = header.encode('ascii')
881 if hasattr(value, 'encode'):
882 value = value.encode('ascii')
883 header = header + b': ' + value
884 self._output(header)
Greg Stein5e0fa402000-06-26 08:28:01 +0000885
Greg Steindd6eefb2000-07-18 09:09:48 +0000886 def endheaders(self):
887 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000888
Greg Steindd6eefb2000-07-18 09:09:48 +0000889 if self.__state == _CS_REQ_STARTED:
890 self.__state = _CS_REQ_SENT
891 else:
892 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000893
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000894 self._send_output()
Greg Stein5e0fa402000-06-26 08:28:01 +0000895
Greg Steindd6eefb2000-07-18 09:09:48 +0000896 def request(self, method, url, body=None, headers={}):
897 """Send a complete request to the server."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000898 try:
899 self._send_request(method, url, body, headers)
Guido van Rossumb940e112007-01-10 16:19:56 +0000900 except socket.error as v:
Greg Steindd6eefb2000-07-18 09:09:48 +0000901 # trap 'Broken pipe' if we're allowed to automatically reconnect
Guido van Rossum89df2452007-03-19 22:26:27 +0000902 if v.args[0] != 32 or not self.auto_open:
Greg Steindd6eefb2000-07-18 09:09:48 +0000903 raise
904 # try one more time
905 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000906
Greg Steindd6eefb2000-07-18 09:09:48 +0000907 def _send_request(self, method, url, body, headers):
Jeremy Hylton2c178252004-08-07 16:28:14 +0000908 # honour explicitly requested Host: and Accept-Encoding headers
909 header_names = dict.fromkeys([k.lower() for k in headers])
910 skips = {}
911 if 'host' in header_names:
912 skips['skip_host'] = 1
913 if 'accept-encoding' in header_names:
914 skips['skip_accept_encoding'] = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000915
Jeremy Hylton2c178252004-08-07 16:28:14 +0000916 self.putrequest(method, url, **skips)
917
918 if body and ('content-length' not in header_names):
Jeremy Hylton4b878bd2007-08-10 18:49:01 +0000919 thelen = None
Thomas Wouters89f507f2006-12-13 04:49:30 +0000920 try:
Jeremy Hylton4b878bd2007-08-10 18:49:01 +0000921 thelen = str(len(body))
Guido van Rossumb940e112007-01-10 16:19:56 +0000922 except TypeError as te:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000923 # If this is a file-like object, try to
924 # fstat its file descriptor
925 import os
926 try:
927 thelen = str(os.fstat(body.fileno()).st_size)
928 except (AttributeError, OSError):
929 # Don't send a length if this failed
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000930 if self.debuglevel > 0: print("Cannot stat!!")
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000931
Thomas Wouters89f507f2006-12-13 04:49:30 +0000932 if thelen is not None:
933 self.putheader('Content-Length',thelen)
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000934 for hdr, value in headers.items():
Greg Steindd6eefb2000-07-18 09:09:48 +0000935 self.putheader(hdr, value)
936 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000937
Greg Steindd6eefb2000-07-18 09:09:48 +0000938 if body:
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000939 if isinstance(body, str): body = body.encode('ascii')
Greg Steindd6eefb2000-07-18 09:09:48 +0000940 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000941
Greg Steindd6eefb2000-07-18 09:09:48 +0000942 def getresponse(self):
Jeremy Hyltonfb35f652007-08-03 20:30:33 +0000943 """Get the response from the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000944
Greg Stein616a58d2003-06-24 06:35:19 +0000945 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000946 if self.__response and self.__response.isclosed():
947 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000948
Greg Steindd6eefb2000-07-18 09:09:48 +0000949 #
950 # if a prior response exists, then it must be completed (otherwise, we
951 # cannot read this response's header to determine the connection-close
952 # behavior)
953 #
954 # note: if a prior response existed, but was connection-close, then the
955 # socket and response were made independent of this HTTPConnection
956 # object since a new request requires that we open a whole new
957 # connection
958 #
959 # this means the prior response had one of two states:
960 # 1) will_close: this connection was reset and the prior socket and
961 # response operate independently
962 # 2) persistent: the response was retained and we await its
963 # isclosed() status to become true.
964 #
965 if self.__state != _CS_REQ_SENT or self.__response:
966 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000967
Jeremy Hylton30f86742000-09-18 22:50:38 +0000968 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000969 response = self.response_class(self.sock, self.debuglevel,
Tim Petersc2659cf2003-05-12 20:19:37 +0000970 strict=self.strict,
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000971 method=self._method)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000972 else:
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000973 response = self.response_class(self.sock, strict=self.strict,
974 method=self._method)
Greg Stein5e0fa402000-06-26 08:28:01 +0000975
Jeremy Hylton39c03802002-07-12 14:04:09 +0000976 response.begin()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000977 assert response.will_close != _UNKNOWN
Greg Steindd6eefb2000-07-18 09:09:48 +0000978 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000979
Greg Steindd6eefb2000-07-18 09:09:48 +0000980 if response.will_close:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000981 # this effectively passes the connection to the response
982 self.close()
Greg Steindd6eefb2000-07-18 09:09:48 +0000983 else:
984 # remember this, so we can tell when it is complete
985 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000986
Greg Steindd6eefb2000-07-18 09:09:48 +0000987 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000988
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000989try:
990 import ssl
991except ImportError:
992 pass
993else:
994 class HTTPSConnection(HTTPConnection):
995 "This class allows communication via SSL."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000996
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000997 default_port = HTTPS_PORT
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000998
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000999 def __init__(self, host, port=None, key_file=None, cert_file=None,
Georg Brandlf78e02b2008-06-10 17:40:04 +00001000 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001001 HTTPConnection.__init__(self, host, port, strict, timeout)
1002 self.key_file = key_file
1003 self.cert_file = cert_file
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001004
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001005 def connect(self):
1006 "Connect to a host on a given (SSL) port."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001007
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001008 sock = socket.create_connection((self.host, self.port), self.timeout)
Thomas Wouters1b7f8912007-09-19 03:06:30 +00001009 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001010
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001011
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001012 def FakeSocket (sock, sslobj):
Thomas Wouters89d996e2007-09-08 17:39:28 +00001013 warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
Thomas Wouters1b7f8912007-09-19 03:06:30 +00001014 "Use the result of ssl.wrap_socket() directly instead.",
Thomas Wouters89d996e2007-09-08 17:39:28 +00001015 DeprecationWarning, stacklevel=2)
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001016 return sslobj
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001017
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001018 __all__.append("HTTPSConnection")
Greg Stein5e0fa402000-06-26 08:28:01 +00001019
Greg Stein5e0fa402000-06-26 08:28:01 +00001020class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001021 # Subclasses that define an __init__ must call Exception.__init__
1022 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +00001023 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001024
1025class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001026 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001027
Skip Montanaro9d389972002-03-24 16:53:50 +00001028class InvalidURL(HTTPException):
1029 pass
1030
Greg Stein5e0fa402000-06-26 08:28:01 +00001031class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001032 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001033 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001034 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001035
1036class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001037 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001038
Greg Stein5e0fa402000-06-26 08:28:01 +00001039class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001040 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001041
1042class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001043 def __init__(self, partial):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001044 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001045 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +00001046
1047class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001048 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001049
1050class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001051 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001052
1053class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001054 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001055
1056class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001057 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001058
1059class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001060 def __init__(self, line):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001061 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001062 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001063
1064# for backwards compatibility
1065error = HTTPException
1066
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001067class LineAndFileWrapper:
1068 """A limited file-like object for HTTP/0.9 responses."""
1069
1070 # The status-line parsing code calls readline(), which normally
1071 # get the HTTP status line. For a 0.9 response, however, this is
1072 # actually the first line of the body! Clients need to get a
1073 # readable file object that contains that line.
1074
1075 def __init__(self, line, file):
1076 self._line = line
1077 self._file = file
1078 self._line_consumed = 0
1079 self._line_offset = 0
1080 self._line_left = len(line)
1081
1082 def __getattr__(self, attr):
1083 return getattr(self._file, attr)
1084
1085 def _done(self):
1086 # called when the last byte is read from the line. After the
1087 # call, all read methods are delegated to the underlying file
Skip Montanaro74b9a7a2003-02-25 17:48:15 +00001088 # object.
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001089 self._line_consumed = 1
1090 self.read = self._file.read
1091 self.readline = self._file.readline
1092 self.readlines = self._file.readlines
1093
1094 def read(self, amt=None):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001095 if self._line_consumed:
1096 return self._file.read(amt)
1097 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001098 if amt is None or amt > self._line_left:
1099 s = self._line[self._line_offset:]
1100 self._done()
1101 if amt is None:
1102 return s + self._file.read()
1103 else:
Tim Petersc411dba2002-07-16 21:35:23 +00001104 return s + self._file.read(amt - len(s))
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001105 else:
1106 assert amt <= self._line_left
1107 i = self._line_offset
1108 j = i + amt
1109 s = self._line[i:j]
1110 self._line_offset = j
1111 self._line_left -= amt
1112 if self._line_left == 0:
1113 self._done()
1114 return s
Tim Petersc411dba2002-07-16 21:35:23 +00001115
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001116 def readline(self):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001117 if self._line_consumed:
1118 return self._file.readline()
1119 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001120 s = self._line[self._line_offset:]
1121 self._done()
1122 return s
1123
1124 def readlines(self, size=None):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001125 if self._line_consumed:
1126 return self._file.readlines(size)
1127 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001128 L = [self._line[self._line_offset:]]
1129 self._done()
1130 if size is None:
1131 return L + self._file.readlines()
1132 else:
1133 return L + self._file.readlines(size)