blob: a6ac4e303451e80b5b7fbcfaae8b173314556379 [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006HTTPConnection goes through a number of "states", which define when a client
Greg Stein5e0fa402000-06-26 08:28:01 +00007may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000069import errno
Jeremy Hyltonfb35f652007-08-03 20:30:33 +000070import io
Guido van Rossum65ab98c1995-08-07 20:13:02 +000071import mimetools
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000072import socket
Jeremy Hylton8acf1e02002-03-08 19:35:51 +000073from urlparse import urlsplit
Guido van Rossum23acc951994-02-21 16:36:04 +000074
Thomas Wouters47b49bf2007-08-30 22:15:33 +000075__all__ = ["HTTPResponse", "HTTPConnection",
Skip Montanaro951a8842001-06-01 16:25:38 +000076 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000077 "UnknownTransferEncoding", "UnimplementedFileMode",
78 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
79 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
Georg Brandl6aab16e2006-02-17 19:17:25 +000080 "BadStatusLine", "error", "responses"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000081
Guido van Rossum23acc951994-02-21 16:36:04 +000082HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000083HTTPS_PORT = 443
84
Greg Stein5e0fa402000-06-26 08:28:01 +000085_UNKNOWN = 'UNKNOWN'
86
87# connection states
88_CS_IDLE = 'Idle'
89_CS_REQ_STARTED = 'Request-started'
90_CS_REQ_SENT = 'Request-sent'
91
Martin v. Löwis39a31782004-09-18 09:03:49 +000092# status codes
93# informational
94CONTINUE = 100
95SWITCHING_PROTOCOLS = 101
96PROCESSING = 102
97
98# successful
99OK = 200
100CREATED = 201
101ACCEPTED = 202
102NON_AUTHORITATIVE_INFORMATION = 203
103NO_CONTENT = 204
104RESET_CONTENT = 205
105PARTIAL_CONTENT = 206
106MULTI_STATUS = 207
107IM_USED = 226
108
109# redirection
110MULTIPLE_CHOICES = 300
111MOVED_PERMANENTLY = 301
112FOUND = 302
113SEE_OTHER = 303
114NOT_MODIFIED = 304
115USE_PROXY = 305
116TEMPORARY_REDIRECT = 307
117
118# client error
119BAD_REQUEST = 400
120UNAUTHORIZED = 401
121PAYMENT_REQUIRED = 402
122FORBIDDEN = 403
123NOT_FOUND = 404
124METHOD_NOT_ALLOWED = 405
125NOT_ACCEPTABLE = 406
126PROXY_AUTHENTICATION_REQUIRED = 407
127REQUEST_TIMEOUT = 408
128CONFLICT = 409
129GONE = 410
130LENGTH_REQUIRED = 411
131PRECONDITION_FAILED = 412
132REQUEST_ENTITY_TOO_LARGE = 413
133REQUEST_URI_TOO_LONG = 414
134UNSUPPORTED_MEDIA_TYPE = 415
135REQUESTED_RANGE_NOT_SATISFIABLE = 416
136EXPECTATION_FAILED = 417
137UNPROCESSABLE_ENTITY = 422
138LOCKED = 423
139FAILED_DEPENDENCY = 424
140UPGRADE_REQUIRED = 426
141
142# server error
143INTERNAL_SERVER_ERROR = 500
144NOT_IMPLEMENTED = 501
145BAD_GATEWAY = 502
146SERVICE_UNAVAILABLE = 503
147GATEWAY_TIMEOUT = 504
148HTTP_VERSION_NOT_SUPPORTED = 505
149INSUFFICIENT_STORAGE = 507
150NOT_EXTENDED = 510
151
Georg Brandl6aab16e2006-02-17 19:17:25 +0000152# Mapping status codes to official W3C names
153responses = {
154 100: 'Continue',
155 101: 'Switching Protocols',
156
157 200: 'OK',
158 201: 'Created',
159 202: 'Accepted',
160 203: 'Non-Authoritative Information',
161 204: 'No Content',
162 205: 'Reset Content',
163 206: 'Partial Content',
164
165 300: 'Multiple Choices',
166 301: 'Moved Permanently',
167 302: 'Found',
168 303: 'See Other',
169 304: 'Not Modified',
170 305: 'Use Proxy',
171 306: '(Unused)',
172 307: 'Temporary Redirect',
173
174 400: 'Bad Request',
175 401: 'Unauthorized',
176 402: 'Payment Required',
177 403: 'Forbidden',
178 404: 'Not Found',
179 405: 'Method Not Allowed',
180 406: 'Not Acceptable',
181 407: 'Proxy Authentication Required',
182 408: 'Request Timeout',
183 409: 'Conflict',
184 410: 'Gone',
185 411: 'Length Required',
186 412: 'Precondition Failed',
187 413: 'Request Entity Too Large',
188 414: 'Request-URI Too Long',
189 415: 'Unsupported Media Type',
190 416: 'Requested Range Not Satisfiable',
191 417: 'Expectation Failed',
192
193 500: 'Internal Server Error',
194 501: 'Not Implemented',
195 502: 'Bad Gateway',
196 503: 'Service Unavailable',
197 504: 'Gateway Timeout',
198 505: 'HTTP Version Not Supported',
199}
200
Georg Brandl80ba8e82005-09-29 20:16:07 +0000201# maximal amount of data to read at one time in _safe_read
202MAXAMOUNT = 1048576
203
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000204class HTTPMessage(mimetools.Message):
205
206 def addheader(self, key, value):
207 """Add header for field key handling repeats."""
208 prev = self.dict.get(key)
209 if prev is None:
210 self.dict[key] = value
211 else:
212 combined = ", ".join((prev, value))
213 self.dict[key] = combined
214
215 def addcontinue(self, key, more):
216 """Add more field data from a continuation line."""
217 prev = self.dict[key]
218 self.dict[key] = prev + "\n " + more
219
220 def readheaders(self):
221 """Read header lines.
222
223 Read header lines up to the entirely blank line that terminates them.
224 The (normally blank) line that ends the headers is skipped, but not
225 included in the returned list. If a non-header line ends the headers,
226 (which is an error), an attempt is made to backspace over it; it is
227 never included in the returned list.
228
229 The variable self.status is set to the empty string if all went well,
230 otherwise it is an error message. The variable self.headers is a
231 completely uninterpreted list of lines contained in the header (so
232 printing them will reproduce the header exactly as it appears in the
233 file).
234
235 If multiple header fields with the same name occur, they are combined
236 according to the rules in RFC 2616 sec 4.2:
237
238 Appending each subsequent field-value to the first, each separated
239 by a comma. The order in which header fields with the same field-name
240 are received is significant to the interpretation of the combined
241 field value.
242 """
243 # XXX The implementation overrides the readheaders() method of
244 # rfc822.Message. The base class design isn't amenable to
245 # customized behavior here so the method here is a copy of the
246 # base class code with a few small changes.
247
248 self.dict = {}
249 self.unixfrom = ''
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000250 self.headers = hlist = []
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000251 self.status = ''
252 headerseen = ""
253 firstline = 1
254 startofline = unread = tell = None
255 if hasattr(self.fp, 'unread'):
256 unread = self.fp.unread
257 elif self.seekable:
258 tell = self.fp.tell
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000259 while True:
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000260 if tell:
261 try:
262 startofline = tell()
263 except IOError:
264 startofline = tell = None
265 self.seekable = 0
Jeremy Hylton811fc142007-08-03 13:30:02 +0000266 line = str(self.fp.readline(), "iso-8859-1")
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000267 if not line:
268 self.status = 'EOF in headers'
269 break
270 # Skip unix From name time lines
271 if firstline and line.startswith('From '):
272 self.unixfrom = self.unixfrom + line
273 continue
274 firstline = 0
275 if headerseen and line[0] in ' \t':
276 # XXX Not sure if continuation lines are handled properly
277 # for http and/or for repeating headers
278 # It's a continuation line.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000279 hlist.append(line)
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000280 self.addcontinue(headerseen, line.strip())
281 continue
282 elif self.iscomment(line):
283 # It's a comment. Ignore it.
284 continue
285 elif self.islast(line):
286 # Note! No pushback here! The delimiter line gets eaten.
287 break
288 headerseen = self.isheader(line)
289 if headerseen:
290 # It's a legal header line, save it.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000291 hlist.append(line)
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000292 self.addheader(headerseen, line[len(headerseen)+1:].strip())
293 continue
294 else:
295 # It's not a header line; throw it back and stop here.
296 if not self.dict:
297 self.status = 'No headers'
298 else:
299 self.status = 'Non-header line where header expected'
300 # Try to undo the read.
301 if unread:
302 unread(line)
303 elif tell:
304 self.fp.seek(startofline)
305 else:
306 self.status = self.status + '; bad seek'
307 break
Greg Stein5e0fa402000-06-26 08:28:01 +0000308
Jeremy Hylton97043c32007-08-04 02:34:24 +0000309class HTTPResponse:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000310
311 # strict: If true, raise BadStatusLine if the status line can't be
312 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
Skip Montanaro186bec22002-07-25 16:10:38 +0000313 # false because it prevents clients from talking to HTTP/0.9
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000314 # servers. Note that a response with a sufficiently corrupted
315 # status line will look like an HTTP/0.9 response.
316
317 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
318
Jeremy Hylton811fc142007-08-03 13:30:02 +0000319 # The bytes from the socket object are iso-8859-1 strings.
320 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
321 # text following RFC 2047. The basic status line parsing only
322 # accepts iso-8859-1.
323
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000324 def __init__(self, sock, debuglevel=0, strict=0, method=None):
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000325 # XXX If the response includes a content-length header, we
326 # need to make sure that the client doesn't read more than the
327 # specified number of bytes. If it does, it will block until
328 # the server times out and closes the connection. (The only
329 # applies to HTTP/1.1 connections.) Since some clients access
330 # self.fp directly rather than calling read(), this is a little
331 # tricky.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000332 self.fp = sock.makefile("rb", 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000333 self.debuglevel = debuglevel
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000334 self.strict = strict
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000335 self._method = method
Greg Stein5e0fa402000-06-26 08:28:01 +0000336
Greg Steindd6eefb2000-07-18 09:09:48 +0000337 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000338
Greg Steindd6eefb2000-07-18 09:09:48 +0000339 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000340 self.version = _UNKNOWN # HTTP-Version
341 self.status = _UNKNOWN # Status-Code
342 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000343
Tim Peters07e99cb2001-01-14 23:47:14 +0000344 self.chunked = _UNKNOWN # is "chunked" being used?
345 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
346 self.length = _UNKNOWN # number of bytes left in response
347 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000348
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000349 def _read_status(self):
Jeremy Hylton04319c72007-08-04 03:41:19 +0000350 # Initialize with Simple-Response defaults.
Jeremy Hylton811fc142007-08-03 13:30:02 +0000351 line = str(self.fp.readline(), "iso-8859-1")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000352 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000353 print("reply:", repr(line))
Jeremy Hyltonb6769522003-06-29 17:55:05 +0000354 if not line:
355 # Presumably, the server closed the connection before
356 # sending a valid response.
357 raise BadStatusLine(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000358 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000359 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000360 except ValueError:
361 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000362 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000363 reason = ""
364 except ValueError:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000365 # empty version will cause next test to fail and status
366 # will be treated as 0.9 response.
367 version = ""
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000368 if not version.startswith("HTTP/"):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000369 if self.strict:
370 self.close()
371 raise BadStatusLine(line)
372 else:
Jeremy Hylton04319c72007-08-04 03:41:19 +0000373 # Assume it's a Simple-Response from an 0.9 server.
374 # We have to convert the first line back to raw bytes
375 # because self.fp.readline() needs to return bytes.
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000376 self.fp = LineAndFileWrapper(bytes(line, "ascii"), self.fp)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000377 return "HTTP/0.9", 200, ""
Greg Stein5e0fa402000-06-26 08:28:01 +0000378
Jeremy Hylton23d40472001-04-13 14:57:08 +0000379 # The status code is a three-digit number
380 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000381 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000382 if status < 100 or status > 999:
383 raise BadStatusLine(line)
384 except ValueError:
385 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000386 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000387
Jeremy Hylton39c03802002-07-12 14:04:09 +0000388 def begin(self):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000389 if self.msg is not None:
390 # we've already started reading the response
391 return
392
393 # read until we get a non-100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000394 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000395 version, status, reason = self._read_status()
Martin v. Löwis39a31782004-09-18 09:03:49 +0000396 if status != CONTINUE:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000397 break
398 # skip the header from the 100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000399 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000400 skip = self.fp.readline().strip()
401 if not skip:
402 break
403 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000404 print("header:", skip)
Tim Petersc411dba2002-07-16 21:35:23 +0000405
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000406 self.status = status
407 self.reason = reason.strip()
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000408 if version == "HTTP/1.0":
Greg Steindd6eefb2000-07-18 09:09:48 +0000409 self.version = 10
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000410 elif version.startswith("HTTP/1."):
Tim Peters07e99cb2001-01-14 23:47:14 +0000411 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000412 elif version == "HTTP/0.9":
Jeremy Hylton110941a2000-10-12 19:58:36 +0000413 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000414 else:
415 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000416
Jeremy Hylton110941a2000-10-12 19:58:36 +0000417 if self.version == 9:
Georg Brandl0aade9a2005-06-26 22:06:54 +0000418 self.length = None
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000419 self.chunked = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000420 self.will_close = 1
Jeremy Hylton4e7855d2007-08-04 03:34:03 +0000421 self.msg = HTTPMessage(io.BytesIO())
Jeremy Hylton110941a2000-10-12 19:58:36 +0000422 return
423
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000424 self.msg = HTTPMessage(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000425 if self.debuglevel > 0:
426 for hdr in self.msg.headers:
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000427 print("header:", hdr, end=" ")
Greg Stein5e0fa402000-06-26 08:28:01 +0000428
Greg Steindd6eefb2000-07-18 09:09:48 +0000429 # don't let the msg keep an fp
430 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000431
Greg Steindd6eefb2000-07-18 09:09:48 +0000432 # are we using the chunked-style of transfer encoding?
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000433 tr_enc = self.msg.getheader("transfer-encoding")
Jeremy Hyltond229b3a2002-09-03 19:24:24 +0000434 if tr_enc and tr_enc.lower() == "chunked":
Greg Steindd6eefb2000-07-18 09:09:48 +0000435 self.chunked = 1
436 self.chunk_left = None
437 else:
438 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000439
Greg Steindd6eefb2000-07-18 09:09:48 +0000440 # will the connection close at the end of the response?
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000441 self.will_close = self._check_close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000442
Greg Steindd6eefb2000-07-18 09:09:48 +0000443 # do we have a Content-Length?
444 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000445 self.length = None
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000446 length = self.msg.getheader("content-length")
Greg Steindd6eefb2000-07-18 09:09:48 +0000447 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000448 try:
449 self.length = int(length)
450 except ValueError:
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000451 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000452
Greg Steindd6eefb2000-07-18 09:09:48 +0000453 # does the body have a fixed length? (of zero)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000454 if (status == NO_CONTENT or status == NOT_MODIFIED or
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000455 100 <= status < 200 or # 1xx codes
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000456 self._method == "HEAD"):
Greg Steindd6eefb2000-07-18 09:09:48 +0000457 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000458
Greg Steindd6eefb2000-07-18 09:09:48 +0000459 # if the connection remains open, and we aren't using chunked, and
460 # a content-length was not provided, then assume that the connection
461 # WILL close.
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000462 if (not self.will_close and
463 not self.chunked and
464 self.length is None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000465 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000466
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000467 def _check_close(self):
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000468 conn = self.msg.getheader("connection")
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000469 if self.version == 11:
470 # An HTTP/1.1 proxy is assumed to stay open unless
471 # explicitly closed.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000472 conn = self.msg.getheader("connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000473 if conn and "close" in conn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000474 return True
475 return False
476
Jeremy Hylton2c178252004-08-07 16:28:14 +0000477 # Some HTTP/1.0 implementations have support for persistent
478 # connections, using rules different than HTTP/1.1.
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000479
480 # For older HTTP, Keep-Alive indiciates persistent connection.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000481 if self.msg.getheader("keep-alive"):
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000482 return False
Tim Peters77c06fb2002-11-24 02:35:35 +0000483
Jeremy Hylton2c178252004-08-07 16:28:14 +0000484 # At least Akamai returns a "Connection: Keep-Alive" header,
485 # which was supposed to be sent by the client.
486 if conn and "keep-alive" in conn.lower():
487 return False
488
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000489 # Proxy-Connection is a netscape hack.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000490 pconn = self.msg.getheader("proxy-connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000491 if pconn and "keep-alive" in pconn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000492 return False
493
494 # otherwise, assume it will close
495 return True
496
Greg Steindd6eefb2000-07-18 09:09:48 +0000497 def close(self):
498 if self.fp:
499 self.fp.close()
500 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000501
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000502 # These implementations are for the benefit of io.BufferedReader.
503
504 # XXX This class should probably be revised to act more like
505 # the "raw stream" that BufferedReader expects.
506
507 @property
508 def closed(self):
509 return self.isclosed()
510
511 def flush(self):
512 self.fp.flush()
513
514 # End of "raw stream" methods
515
Greg Steindd6eefb2000-07-18 09:09:48 +0000516 def isclosed(self):
517 # NOTE: it is possible that we will not ever call self.close(). This
518 # case occurs when will_close is TRUE, length is None, and we
519 # read up to the last byte, but NOT past it.
520 #
521 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
522 # called, meaning self.isclosed() is meaningful.
523 return self.fp is None
524
Jeremy Hylton2c178252004-08-07 16:28:14 +0000525 # XXX It would be nice to have readline and __iter__ for this, too.
526
Greg Steindd6eefb2000-07-18 09:09:48 +0000527 def read(self, amt=None):
528 if self.fp is None:
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000529 return ""
Greg Steindd6eefb2000-07-18 09:09:48 +0000530
531 if self.chunked:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000532 return self._read_chunked(amt)
Tim Peters230a60c2002-11-09 05:08:07 +0000533
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000534 if amt is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000535 # unbounded read
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000536 if self.length is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000537 s = self.fp.read()
538 else:
539 s = self._safe_read(self.length)
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000540 self.length = 0
Tim Peters07e99cb2001-01-14 23:47:14 +0000541 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000542 return s
543
544 if self.length is not None:
545 if amt > self.length:
546 # clip the read to the "end of response"
547 amt = self.length
Greg Steindd6eefb2000-07-18 09:09:48 +0000548
549 # we do not use _safe_read() here because this may be a .will_close
550 # connection, and the user is reading more bytes than will be provided
551 # (for example, reading in 1k chunks)
552 s = self.fp.read(amt)
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000553 if self.length is not None:
554 self.length -= len(s)
Greg Steindd6eefb2000-07-18 09:09:48 +0000555
Greg Steindd6eefb2000-07-18 09:09:48 +0000556 return s
557
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000558 def _read_chunked(self, amt):
559 assert self.chunked != _UNKNOWN
560 chunk_left = self.chunk_left
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000561 value = ""
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000562
563 # XXX This accumulates chunks by repeated string concatenation,
564 # which is not efficient as the number or size of chunks gets big.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000565 while True:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000566 if chunk_left is None:
567 line = self.fp.readline()
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000568 i = line.find(";")
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000569 if i >= 0:
570 line = line[:i] # strip chunk-extensions
571 chunk_left = int(line, 16)
572 if chunk_left == 0:
573 break
574 if amt is None:
575 value += self._safe_read(chunk_left)
576 elif amt < chunk_left:
577 value += self._safe_read(amt)
578 self.chunk_left = chunk_left - amt
579 return value
580 elif amt == chunk_left:
581 value += self._safe_read(amt)
582 self._safe_read(2) # toss the CRLF at the end of the chunk
583 self.chunk_left = None
584 return value
585 else:
586 value += self._safe_read(chunk_left)
587 amt -= chunk_left
588
589 # we read the whole chunk, get another
590 self._safe_read(2) # toss the CRLF at the end of the chunk
591 chunk_left = None
592
593 # read and discard trailer up to the CRLF terminator
594 ### note: we shouldn't have any trailers!
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000595 while True:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000596 line = self.fp.readline()
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000597 if line == "\r\n":
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000598 break
599
600 # we read everything; close the "file"
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000601 self.close()
602
603 return value
Tim Peters230a60c2002-11-09 05:08:07 +0000604
Greg Steindd6eefb2000-07-18 09:09:48 +0000605 def _safe_read(self, amt):
606 """Read the number of bytes requested, compensating for partial reads.
607
608 Normally, we have a blocking socket, but a read() can be interrupted
609 by a signal (resulting in a partial read).
610
611 Note that we cannot distinguish between EOF and an interrupt when zero
612 bytes have been read. IncompleteRead() will be raised in this
613 situation.
614
615 This function should be used when <amt> bytes "should" be present for
616 reading. If the bytes are truly not available (due to EOF), then the
617 IncompleteRead exception can be used to detect the problem.
618 """
Georg Brandl80ba8e82005-09-29 20:16:07 +0000619 s = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000620 while amt > 0:
Georg Brandl80ba8e82005-09-29 20:16:07 +0000621 chunk = self.fp.read(min(amt, MAXAMOUNT))
Greg Steindd6eefb2000-07-18 09:09:48 +0000622 if not chunk:
623 raise IncompleteRead(s)
Georg Brandl80ba8e82005-09-29 20:16:07 +0000624 s.append(chunk)
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000625 amt -= len(chunk)
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000626 return "".join(s)
Greg Steindd6eefb2000-07-18 09:09:48 +0000627
628 def getheader(self, name, default=None):
629 if self.msg is None:
630 raise ResponseNotReady()
631 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000632
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000633 def getheaders(self):
634 """Return list of (header, value) tuples."""
635 if self.msg is None:
636 raise ResponseNotReady()
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000637 return list(self.msg.items())
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000638
Greg Stein5e0fa402000-06-26 08:28:01 +0000639
640class HTTPConnection:
641
Greg Steindd6eefb2000-07-18 09:09:48 +0000642 _http_vsn = 11
643 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000644
Greg Steindd6eefb2000-07-18 09:09:48 +0000645 response_class = HTTPResponse
646 default_port = HTTP_PORT
647 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000648 debuglevel = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000649 strict = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000650
Guido van Rossumd8faa362007-04-27 19:54:29 +0000651 def __init__(self, host, port=None, strict=None, timeout=None):
652 self.timeout = timeout
Greg Steindd6eefb2000-07-18 09:09:48 +0000653 self.sock = None
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000654 self._buffer = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000655 self.__response = None
656 self.__state = _CS_IDLE
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000657 self._method = None
Tim Petersc411dba2002-07-16 21:35:23 +0000658
Greg Steindd6eefb2000-07-18 09:09:48 +0000659 self._set_hostport(host, port)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000660 if strict is not None:
661 self.strict = strict
Greg Stein5e0fa402000-06-26 08:28:01 +0000662
Greg Steindd6eefb2000-07-18 09:09:48 +0000663 def _set_hostport(self, host, port):
664 if port is None:
Skip Montanaro10e6e0e2004-09-14 16:32:02 +0000665 i = host.rfind(':')
Skip Montanarocae14d22004-09-14 17:55:21 +0000666 j = host.rfind(']') # ipv6 addresses have [...]
667 if i > j:
Skip Montanaro9d389972002-03-24 16:53:50 +0000668 try:
669 port = int(host[i+1:])
670 except ValueError:
Jeremy Hyltonfbd79942002-07-02 20:19:08 +0000671 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000672 host = host[:i]
673 else:
674 port = self.default_port
Raymond Hettinger4d037912004-10-14 15:23:38 +0000675 if host and host[0] == '[' and host[-1] == ']':
Brett Cannon0a1af4a2004-09-15 23:26:23 +0000676 host = host[1:-1]
Greg Steindd6eefb2000-07-18 09:09:48 +0000677 self.host = host
678 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000679
Jeremy Hylton30f86742000-09-18 22:50:38 +0000680 def set_debuglevel(self, level):
681 self.debuglevel = level
682
Greg Steindd6eefb2000-07-18 09:09:48 +0000683 def connect(self):
684 """Connect to the host and port specified in __init__."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000685 self.sock = socket.create_connection((self.host,self.port),
686 self.timeout)
Greg Stein5e0fa402000-06-26 08:28:01 +0000687
Greg Steindd6eefb2000-07-18 09:09:48 +0000688 def close(self):
689 """Close the connection to the HTTP server."""
690 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000691 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000692 self.sock = None
693 if self.__response:
694 self.__response.close()
695 self.__response = None
696 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000697
Greg Steindd6eefb2000-07-18 09:09:48 +0000698 def send(self, str):
699 """Send `str' to the server."""
700 if self.sock is None:
701 if self.auto_open:
702 self.connect()
703 else:
704 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000705
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000706 # send the data to the server. if we get a broken pipe, then close
Greg Steindd6eefb2000-07-18 09:09:48 +0000707 # the socket. we want to reconnect when somebody tries to send again.
708 #
709 # NOTE: we DO propagate the error, though, because we cannot simply
710 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000711 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000712 print("send:", repr(str))
Greg Steindd6eefb2000-07-18 09:09:48 +0000713 try:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000714 blocksize=8192
715 if hasattr(str,'read') :
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000716 if self.debuglevel > 0: print("sendIng a read()able")
Thomas Wouters89f507f2006-12-13 04:49:30 +0000717 data=str.read(blocksize)
718 while data:
719 self.sock.sendall(data)
720 data=str.read(blocksize)
721 else:
722 self.sock.sendall(str)
Guido van Rossumb940e112007-01-10 16:19:56 +0000723 except socket.error as v:
Guido van Rossum89df2452007-03-19 22:26:27 +0000724 if v.args[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000725 self.close()
726 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000727
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000728 def _output(self, s):
729 """Add a line of output to the current request buffer.
Tim Peters469cdad2002-08-08 20:19:19 +0000730
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000731 Assumes that the line does *not* end with \\r\\n.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000732 """
733 self._buffer.append(s)
734
735 def _send_output(self):
736 """Send the currently buffered request and clear the buffer.
737
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000738 Appends an extra \\r\\n to the buffer.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000739 """
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000740 self._buffer.extend((b"", b""))
741 msg = b"\r\n".join(self._buffer)
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000742 del self._buffer[:]
743 self.send(msg)
744
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000745 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000746 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000747
Greg Steindd6eefb2000-07-18 09:09:48 +0000748 `method' specifies an HTTP request method, e.g. 'GET'.
749 `url' specifies the object being requested, e.g. '/index.html'.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000750 `skip_host' if True does not add automatically a 'Host:' header
751 `skip_accept_encoding' if True does not add automatically an
752 'Accept-Encoding:' header
Greg Steindd6eefb2000-07-18 09:09:48 +0000753 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000754
Greg Stein616a58d2003-06-24 06:35:19 +0000755 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000756 if self.__response and self.__response.isclosed():
757 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000758
Tim Peters58eb11c2004-01-18 20:29:55 +0000759
Greg Steindd6eefb2000-07-18 09:09:48 +0000760 # in certain cases, we cannot issue another request on this connection.
761 # this occurs when:
762 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
763 # 2) a response to a previous request has signalled that it is going
764 # to close the connection upon completion.
765 # 3) the headers for the previous response have not been read, thus
766 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
767 #
768 # if there is no prior response, then we can request at will.
769 #
770 # if point (2) is true, then we will have passed the socket to the
771 # response (effectively meaning, "there is no prior response"), and
772 # will open a new one when a new request is made.
773 #
774 # Note: if a prior response exists, then we *can* start a new request.
775 # We are not allowed to begin fetching the response to this new
776 # request, however, until that prior response is complete.
777 #
778 if self.__state == _CS_IDLE:
779 self.__state = _CS_REQ_STARTED
780 else:
781 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000782
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000783 # Save the method we use, we need it later in the response phase
784 self._method = method
Greg Steindd6eefb2000-07-18 09:09:48 +0000785 if not url:
786 url = '/'
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000787 request = '%s %s %s' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000788
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000789 # Non-ASCII characters should have been eliminated earlier
790 self._output(request.encode('ascii'))
Greg Stein5e0fa402000-06-26 08:28:01 +0000791
Greg Steindd6eefb2000-07-18 09:09:48 +0000792 if self._http_vsn == 11:
793 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000794
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000795 if not skip_host:
796 # this header is issued *only* for HTTP/1.1
797 # connections. more specifically, this means it is
798 # only issued when the client uses the new
799 # HTTPConnection() class. backwards-compat clients
800 # will be using HTTP/1.0 and those clients may be
801 # issuing this header themselves. we should NOT issue
802 # it twice; some web servers (such as Apache) barf
803 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000804
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000805 # If we need a non-standard port,include it in the
806 # header. If the request is going through a proxy,
807 # but the host of the actual URL, not the host of the
808 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +0000809
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000810 netloc = ''
811 if url.startswith('http'):
812 nil, netloc, nil, nil, nil = urlsplit(url)
813
814 if netloc:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000815 try:
816 netloc_enc = netloc.encode("ascii")
817 except UnicodeEncodeError:
818 netloc_enc = netloc.encode("idna")
819 self.putheader('Host', netloc_enc)
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000820 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000821 try:
822 host_enc = self.host.encode("ascii")
823 except UnicodeEncodeError:
824 host_enc = self.host.encode("idna")
825 if self.port == HTTP_PORT:
826 self.putheader('Host', host_enc)
827 else:
828 self.putheader('Host', "%s:%s" % (host_enc, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000829
Greg Steindd6eefb2000-07-18 09:09:48 +0000830 # note: we are assuming that clients will not attempt to set these
831 # headers since *this* library must deal with the
832 # consequences. this also means that when the supporting
833 # libraries are updated to recognize other forms, then this
834 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000835
Greg Steindd6eefb2000-07-18 09:09:48 +0000836 # we only want a Content-Encoding of "identity" since we don't
837 # support encodings such as x-gzip or x-deflate.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000838 if not skip_accept_encoding:
839 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000840
Greg Steindd6eefb2000-07-18 09:09:48 +0000841 # we can accept "chunked" Transfer-Encodings, but no others
842 # NOTE: no TE header implies *only* "chunked"
843 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000844
Greg Steindd6eefb2000-07-18 09:09:48 +0000845 # if TE is supplied in the header, then it must appear in a
846 # Connection header.
847 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000848
Greg Steindd6eefb2000-07-18 09:09:48 +0000849 else:
850 # For HTTP/1.0, the server will assume "not chunked"
851 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000852
Greg Steindd6eefb2000-07-18 09:09:48 +0000853 def putheader(self, header, value):
854 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000855
Greg Steindd6eefb2000-07-18 09:09:48 +0000856 For example: h.putheader('Accept', 'text/html')
857 """
858 if self.__state != _CS_REQ_STARTED:
859 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000860
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000861 header = '%s: %s' % (header, value)
862 self._output(header.encode('ascii'))
Greg Stein5e0fa402000-06-26 08:28:01 +0000863
Greg Steindd6eefb2000-07-18 09:09:48 +0000864 def endheaders(self):
865 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000866
Greg Steindd6eefb2000-07-18 09:09:48 +0000867 if self.__state == _CS_REQ_STARTED:
868 self.__state = _CS_REQ_SENT
869 else:
870 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000871
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000872 self._send_output()
Greg Stein5e0fa402000-06-26 08:28:01 +0000873
Greg Steindd6eefb2000-07-18 09:09:48 +0000874 def request(self, method, url, body=None, headers={}):
875 """Send a complete request to the server."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000876 try:
877 self._send_request(method, url, body, headers)
Guido van Rossumb940e112007-01-10 16:19:56 +0000878 except socket.error as v:
Greg Steindd6eefb2000-07-18 09:09:48 +0000879 # trap 'Broken pipe' if we're allowed to automatically reconnect
Guido van Rossum89df2452007-03-19 22:26:27 +0000880 if v.args[0] != 32 or not self.auto_open:
Greg Steindd6eefb2000-07-18 09:09:48 +0000881 raise
882 # try one more time
883 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000884
Greg Steindd6eefb2000-07-18 09:09:48 +0000885 def _send_request(self, method, url, body, headers):
Jeremy Hylton2c178252004-08-07 16:28:14 +0000886 # honour explicitly requested Host: and Accept-Encoding headers
887 header_names = dict.fromkeys([k.lower() for k in headers])
888 skips = {}
889 if 'host' in header_names:
890 skips['skip_host'] = 1
891 if 'accept-encoding' in header_names:
892 skips['skip_accept_encoding'] = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000893
Jeremy Hylton2c178252004-08-07 16:28:14 +0000894 self.putrequest(method, url, **skips)
895
896 if body and ('content-length' not in header_names):
Jeremy Hylton4b878bd2007-08-10 18:49:01 +0000897 thelen = None
Thomas Wouters89f507f2006-12-13 04:49:30 +0000898 try:
Jeremy Hylton4b878bd2007-08-10 18:49:01 +0000899 thelen = str(len(body))
Guido van Rossumb940e112007-01-10 16:19:56 +0000900 except TypeError as te:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000901 # If this is a file-like object, try to
902 # fstat its file descriptor
903 import os
904 try:
905 thelen = str(os.fstat(body.fileno()).st_size)
906 except (AttributeError, OSError):
907 # Don't send a length if this failed
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000908 if self.debuglevel > 0: print("Cannot stat!!")
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000909
Thomas Wouters89f507f2006-12-13 04:49:30 +0000910 if thelen is not None:
911 self.putheader('Content-Length',thelen)
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000912 for hdr, value in headers.items():
Greg Steindd6eefb2000-07-18 09:09:48 +0000913 self.putheader(hdr, value)
914 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000915
Greg Steindd6eefb2000-07-18 09:09:48 +0000916 if body:
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000917 if isinstance(body, str): body = body.encode('ascii')
Greg Steindd6eefb2000-07-18 09:09:48 +0000918 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000919
Greg Steindd6eefb2000-07-18 09:09:48 +0000920 def getresponse(self):
Jeremy Hyltonfb35f652007-08-03 20:30:33 +0000921 """Get the response from the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000922
Greg Stein616a58d2003-06-24 06:35:19 +0000923 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000924 if self.__response and self.__response.isclosed():
925 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000926
Greg Steindd6eefb2000-07-18 09:09:48 +0000927 #
928 # if a prior response exists, then it must be completed (otherwise, we
929 # cannot read this response's header to determine the connection-close
930 # behavior)
931 #
932 # note: if a prior response existed, but was connection-close, then the
933 # socket and response were made independent of this HTTPConnection
934 # object since a new request requires that we open a whole new
935 # connection
936 #
937 # this means the prior response had one of two states:
938 # 1) will_close: this connection was reset and the prior socket and
939 # response operate independently
940 # 2) persistent: the response was retained and we await its
941 # isclosed() status to become true.
942 #
943 if self.__state != _CS_REQ_SENT or self.__response:
944 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000945
Jeremy Hylton30f86742000-09-18 22:50:38 +0000946 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000947 response = self.response_class(self.sock, self.debuglevel,
Tim Petersc2659cf2003-05-12 20:19:37 +0000948 strict=self.strict,
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000949 method=self._method)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000950 else:
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000951 response = self.response_class(self.sock, strict=self.strict,
952 method=self._method)
Greg Stein5e0fa402000-06-26 08:28:01 +0000953
Jeremy Hylton39c03802002-07-12 14:04:09 +0000954 response.begin()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000955 assert response.will_close != _UNKNOWN
Greg Steindd6eefb2000-07-18 09:09:48 +0000956 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000957
Greg Steindd6eefb2000-07-18 09:09:48 +0000958 if response.will_close:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000959 # this effectively passes the connection to the response
960 self.close()
Greg Steindd6eefb2000-07-18 09:09:48 +0000961 else:
962 # remember this, so we can tell when it is complete
963 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000964
Greg Steindd6eefb2000-07-18 09:09:48 +0000965 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000966
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000967try:
968 import ssl
969except ImportError:
970 pass
971else:
972 class HTTPSConnection(HTTPConnection):
973 "This class allows communication via SSL."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000974
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000975 default_port = HTTPS_PORT
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000976
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000977 def __init__(self, host, port=None, key_file=None, cert_file=None,
978 strict=None, timeout=None):
979 HTTPConnection.__init__(self, host, port, strict, timeout)
980 self.key_file = key_file
981 self.cert_file = cert_file
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000982
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000983 def connect(self):
984 "Connect to a host on a given (SSL) port."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000985
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000986 sock = socket.create_connection((self.host, self.port), self.timeout)
987 self.sock = ssl.sslsocket(sock, self.key_file, self.cert_file)
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000988
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000989
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000990 def FakeSocket (sock, sslobj):
991 return sslobj
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000992
Thomas Wouters47b49bf2007-08-30 22:15:33 +0000993 __all__.append("HTTPSConnection")
Greg Stein5e0fa402000-06-26 08:28:01 +0000994
Greg Stein5e0fa402000-06-26 08:28:01 +0000995class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +0000996 # Subclasses that define an __init__ must call Exception.__init__
997 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +0000998 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000999
1000class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001001 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001002
Skip Montanaro9d389972002-03-24 16:53:50 +00001003class InvalidURL(HTTPException):
1004 pass
1005
Greg Stein5e0fa402000-06-26 08:28:01 +00001006class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001007 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001008 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001009 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001010
1011class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001012 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001013
Greg Stein5e0fa402000-06-26 08:28:01 +00001014class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001015 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001016
1017class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001018 def __init__(self, partial):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001019 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001020 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +00001021
1022class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001023 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001024
1025class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001026 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001027
1028class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001029 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001030
1031class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001032 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001033
1034class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001035 def __init__(self, line):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001036 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001037 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001038
1039# for backwards compatibility
1040error = HTTPException
1041
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001042class LineAndFileWrapper:
1043 """A limited file-like object for HTTP/0.9 responses."""
1044
1045 # The status-line parsing code calls readline(), which normally
1046 # get the HTTP status line. For a 0.9 response, however, this is
1047 # actually the first line of the body! Clients need to get a
1048 # readable file object that contains that line.
1049
1050 def __init__(self, line, file):
1051 self._line = line
1052 self._file = file
1053 self._line_consumed = 0
1054 self._line_offset = 0
1055 self._line_left = len(line)
1056
1057 def __getattr__(self, attr):
1058 return getattr(self._file, attr)
1059
1060 def _done(self):
1061 # called when the last byte is read from the line. After the
1062 # call, all read methods are delegated to the underlying file
Skip Montanaro74b9a7a2003-02-25 17:48:15 +00001063 # object.
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001064 self._line_consumed = 1
1065 self.read = self._file.read
1066 self.readline = self._file.readline
1067 self.readlines = self._file.readlines
1068
1069 def read(self, amt=None):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001070 if self._line_consumed:
1071 return self._file.read(amt)
1072 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001073 if amt is None or amt > self._line_left:
1074 s = self._line[self._line_offset:]
1075 self._done()
1076 if amt is None:
1077 return s + self._file.read()
1078 else:
Tim Petersc411dba2002-07-16 21:35:23 +00001079 return s + self._file.read(amt - len(s))
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001080 else:
1081 assert amt <= self._line_left
1082 i = self._line_offset
1083 j = i + amt
1084 s = self._line[i:j]
1085 self._line_offset = j
1086 self._line_left -= amt
1087 if self._line_left == 0:
1088 self._done()
1089 return s
Tim Petersc411dba2002-07-16 21:35:23 +00001090
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001091 def readline(self):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001092 if self._line_consumed:
1093 return self._file.readline()
1094 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001095 s = self._line[self._line_offset:]
1096 self._done()
1097 return s
1098
1099 def readlines(self, size=None):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001100 if self._line_consumed:
1101 return self._file.readlines(size)
1102 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001103 L = [self._line[self._line_offset:]]
1104 self._done()
1105 if size is None:
1106 return L + self._file.readlines()
1107 else:
1108 return L + self._file.readlines(size)