blob: 3f9e67bba482a63d8297919a3deb5414ed4d3b24 [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006HTTPConnection goes through a number of "states", which define when a client
Greg Stein5e0fa402000-06-26 08:28:01 +00007may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Barry Warsaw820c1202008-06-12 04:06:45 +000069import email.parser
70import email.message
Jeremy Hylton636950f2009-03-28 04:34:21 +000071import io
72import os
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +020073import re
Jeremy Hylton636950f2009-03-28 04:34:21 +000074import socket
Senthil Kumaran7bc0d872010-12-19 10:49:52 +000075import collections
Jeremy Hylton1afc1692008-06-18 20:49:58 +000076from urllib.parse import urlsplit
Guido van Rossum23acc951994-02-21 16:36:04 +000077
Berker Peksagbabc6882015-02-20 09:39:38 +020078# HTTPMessage, parse_headers(), and the HTTP status code constants are
79# intentionally omitted for simplicity
Thomas Wouters47b49bf2007-08-30 22:15:33 +000080__all__ = ["HTTPResponse", "HTTPConnection",
Skip Montanaro951a8842001-06-01 16:25:38 +000081 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000082 "UnknownTransferEncoding", "UnimplementedFileMode",
83 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
84 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
Berker Peksagbabc6882015-02-20 09:39:38 +020085 "BadStatusLine", "LineTooLong", "error", "responses"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000086
Guido van Rossum23acc951994-02-21 16:36:04 +000087HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000088HTTPS_PORT = 443
89
Greg Stein5e0fa402000-06-26 08:28:01 +000090_UNKNOWN = 'UNKNOWN'
91
92# connection states
93_CS_IDLE = 'Idle'
94_CS_REQ_STARTED = 'Request-started'
95_CS_REQ_SENT = 'Request-sent'
96
Martin v. Löwis39a31782004-09-18 09:03:49 +000097# status codes
98# informational
99CONTINUE = 100
100SWITCHING_PROTOCOLS = 101
101PROCESSING = 102
102
103# successful
104OK = 200
105CREATED = 201
106ACCEPTED = 202
107NON_AUTHORITATIVE_INFORMATION = 203
108NO_CONTENT = 204
109RESET_CONTENT = 205
110PARTIAL_CONTENT = 206
111MULTI_STATUS = 207
112IM_USED = 226
113
114# redirection
115MULTIPLE_CHOICES = 300
116MOVED_PERMANENTLY = 301
117FOUND = 302
118SEE_OTHER = 303
119NOT_MODIFIED = 304
120USE_PROXY = 305
121TEMPORARY_REDIRECT = 307
122
123# client error
124BAD_REQUEST = 400
125UNAUTHORIZED = 401
126PAYMENT_REQUIRED = 402
127FORBIDDEN = 403
128NOT_FOUND = 404
129METHOD_NOT_ALLOWED = 405
130NOT_ACCEPTABLE = 406
131PROXY_AUTHENTICATION_REQUIRED = 407
132REQUEST_TIMEOUT = 408
133CONFLICT = 409
134GONE = 410
135LENGTH_REQUIRED = 411
136PRECONDITION_FAILED = 412
137REQUEST_ENTITY_TOO_LARGE = 413
138REQUEST_URI_TOO_LONG = 414
139UNSUPPORTED_MEDIA_TYPE = 415
140REQUESTED_RANGE_NOT_SATISFIABLE = 416
141EXPECTATION_FAILED = 417
142UNPROCESSABLE_ENTITY = 422
143LOCKED = 423
144FAILED_DEPENDENCY = 424
145UPGRADE_REQUIRED = 426
Hynek Schlawack51b2ed52012-05-16 09:51:07 +0200146PRECONDITION_REQUIRED = 428
147TOO_MANY_REQUESTS = 429
148REQUEST_HEADER_FIELDS_TOO_LARGE = 431
Martin v. Löwis39a31782004-09-18 09:03:49 +0000149
150# server error
151INTERNAL_SERVER_ERROR = 500
152NOT_IMPLEMENTED = 501
153BAD_GATEWAY = 502
154SERVICE_UNAVAILABLE = 503
155GATEWAY_TIMEOUT = 504
156HTTP_VERSION_NOT_SUPPORTED = 505
157INSUFFICIENT_STORAGE = 507
158NOT_EXTENDED = 510
Hynek Schlawack51b2ed52012-05-16 09:51:07 +0200159NETWORK_AUTHENTICATION_REQUIRED = 511
Martin v. Löwis39a31782004-09-18 09:03:49 +0000160
Georg Brandl6aab16e2006-02-17 19:17:25 +0000161# Mapping status codes to official W3C names
162responses = {
163 100: 'Continue',
164 101: 'Switching Protocols',
165
166 200: 'OK',
167 201: 'Created',
168 202: 'Accepted',
169 203: 'Non-Authoritative Information',
170 204: 'No Content',
171 205: 'Reset Content',
172 206: 'Partial Content',
173
174 300: 'Multiple Choices',
175 301: 'Moved Permanently',
176 302: 'Found',
177 303: 'See Other',
178 304: 'Not Modified',
179 305: 'Use Proxy',
180 306: '(Unused)',
181 307: 'Temporary Redirect',
182
183 400: 'Bad Request',
184 401: 'Unauthorized',
185 402: 'Payment Required',
186 403: 'Forbidden',
187 404: 'Not Found',
188 405: 'Method Not Allowed',
189 406: 'Not Acceptable',
190 407: 'Proxy Authentication Required',
191 408: 'Request Timeout',
192 409: 'Conflict',
193 410: 'Gone',
194 411: 'Length Required',
195 412: 'Precondition Failed',
196 413: 'Request Entity Too Large',
197 414: 'Request-URI Too Long',
198 415: 'Unsupported Media Type',
199 416: 'Requested Range Not Satisfiable',
200 417: 'Expectation Failed',
Hynek Schlawack51b2ed52012-05-16 09:51:07 +0200201 428: 'Precondition Required',
202 429: 'Too Many Requests',
203 431: 'Request Header Fields Too Large',
Georg Brandl6aab16e2006-02-17 19:17:25 +0000204
205 500: 'Internal Server Error',
206 501: 'Not Implemented',
207 502: 'Bad Gateway',
208 503: 'Service Unavailable',
209 504: 'Gateway Timeout',
210 505: 'HTTP Version Not Supported',
Hynek Schlawack51b2ed52012-05-16 09:51:07 +0200211 511: 'Network Authentication Required',
Georg Brandl6aab16e2006-02-17 19:17:25 +0000212}
213
Georg Brandl80ba8e82005-09-29 20:16:07 +0000214# maximal amount of data to read at one time in _safe_read
215MAXAMOUNT = 1048576
216
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000217# maximal line length when calling readline().
218_MAXLINE = 65536
Georg Brandlbf3f8eb2013-10-27 07:34:48 +0100219_MAXHEADERS = 100
220
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +0200221# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
222#
223# VCHAR = %x21-7E
224# obs-text = %x80-FF
225# header-field = field-name ":" OWS field-value OWS
226# field-name = token
227# field-value = *( field-content / obs-fold )
228# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
229# field-vchar = VCHAR / obs-text
230#
231# obs-fold = CRLF 1*( SP / HTAB )
232# ; obsolete line folding
233# ; see Section 3.2.4
234
235# token = 1*tchar
236#
237# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
238# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
239# / DIGIT / ALPHA
240# ; any VCHAR, except delimiters
241#
242# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
243
244# the patterns for both name and value are more leniant than RFC
245# definitions to allow for backwards compatibility
246_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
247_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
248
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000249
Barry Warsaw820c1202008-06-12 04:06:45 +0000250class HTTPMessage(email.message.Message):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000251 # XXX The only usage of this method is in
252 # http.server.CGIHTTPRequestHandler. Maybe move the code there so
253 # that it doesn't need to be part of the public API. The API has
254 # never been defined so this could cause backwards compatibility
255 # issues.
256
Barry Warsaw820c1202008-06-12 04:06:45 +0000257 def getallmatchingheaders(self, name):
258 """Find all header lines matching a given header name.
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000259
Barry Warsaw820c1202008-06-12 04:06:45 +0000260 Look through the list of headers and find all lines matching a given
261 header name (and their continuation lines). A list of the lines is
262 returned, without interpretation. If the header does not occur, an
263 empty list is returned. If the header occurs multiple times, all
264 occurrences are returned. Case is not important in the header name.
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000265
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000266 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000267 name = name.lower() + ':'
268 n = len(name)
269 lst = []
270 hit = 0
271 for line in self.keys():
272 if line[:n].lower() == name:
273 hit = 1
274 elif not line[:1].isspace():
275 hit = 0
276 if hit:
277 lst.append(line)
278 return lst
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000279
Jeremy Hylton98eb6c22009-03-27 18:31:36 +0000280def parse_headers(fp, _class=HTTPMessage):
Barry Warsaw820c1202008-06-12 04:06:45 +0000281 """Parses only RFC2822 headers from a file pointer.
282
283 email Parser wants to see strings rather than bytes.
284 But a TextIOWrapper around self.rfile would buffer too many bytes
285 from the stream, bytes which we later need to read as bytes.
286 So we read the correct bytes here, as bytes, for email Parser
287 to parse.
288
289 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000290 headers = []
291 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000292 line = fp.readline(_MAXLINE + 1)
293 if len(line) > _MAXLINE:
294 raise LineTooLong("header line")
Barry Warsaw820c1202008-06-12 04:06:45 +0000295 headers.append(line)
Georg Brandlbf3f8eb2013-10-27 07:34:48 +0100296 if len(headers) > _MAXHEADERS:
297 raise HTTPException("got more than %d headers" % _MAXHEADERS)
Barry Warsaw820c1202008-06-12 04:06:45 +0000298 if line in (b'\r\n', b'\n', b''):
299 break
300 hstring = b''.join(headers).decode('iso-8859-1')
Jeremy Hylton98eb6c22009-03-27 18:31:36 +0000301 return email.parser.Parser(_class=_class).parsestr(hstring)
Greg Stein5e0fa402000-06-26 08:28:01 +0000302
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000303
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000304class HTTPResponse(io.RawIOBase):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000305
306 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
307
Jeremy Hylton811fc142007-08-03 13:30:02 +0000308 # The bytes from the socket object are iso-8859-1 strings.
309 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
310 # text following RFC 2047. The basic status line parsing only
311 # accepts iso-8859-1.
312
Senthil Kumaran052ddb02013-03-18 14:11:41 -0700313 def __init__(self, sock, debuglevel=0, method=None, url=None):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000314 # If the response includes a content-length header, we need to
315 # make sure that the client doesn't read more than the
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000316 # specified number of bytes. If it does, it will block until
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000317 # the server times out and closes the connection. This will
318 # happen if a self.fp.read() is done (without a size) whether
319 # self.fp is buffered or not. So, no self.fp.read() by
320 # clients unless they know what they are doing.
Benjamin Petersonf72d9fb2009-02-08 00:29:20 +0000321 self.fp = sock.makefile("rb")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000322 self.debuglevel = debuglevel
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000323 self._method = method
Greg Stein5e0fa402000-06-26 08:28:01 +0000324
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000325 # The HTTPResponse object is returned via urllib. The clients
326 # of http and urllib expect different attributes for the
327 # headers. headers is used here and supports urllib. msg is
328 # provided as a backwards compatibility layer for http
329 # clients.
330
331 self.headers = self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000332
Greg Steindd6eefb2000-07-18 09:09:48 +0000333 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000334 self.version = _UNKNOWN # HTTP-Version
335 self.status = _UNKNOWN # Status-Code
336 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000337
Tim Peters07e99cb2001-01-14 23:47:14 +0000338 self.chunked = _UNKNOWN # is "chunked" being used?
339 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
340 self.length = _UNKNOWN # number of bytes left in response
341 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000342
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000343 def _read_status(self):
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000344 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
345 if len(line) > _MAXLINE:
346 raise LineTooLong("status line")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000347 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000348 print("reply:", repr(line))
Jeremy Hyltonb6769522003-06-29 17:55:05 +0000349 if not line:
350 # Presumably, the server closed the connection before
351 # sending a valid response.
352 raise BadStatusLine(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000353 try:
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000354 version, status, reason = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000355 except ValueError:
356 try:
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000357 version, status = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000358 reason = ""
359 except ValueError:
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000360 # empty version will cause next test to fail.
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000361 version = ""
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000362 if not version.startswith("HTTP/"):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200363 self._close_conn()
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000364 raise BadStatusLine(line)
Greg Stein5e0fa402000-06-26 08:28:01 +0000365
Jeremy Hylton23d40472001-04-13 14:57:08 +0000366 # The status code is a three-digit number
367 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000368 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000369 if status < 100 or status > 999:
370 raise BadStatusLine(line)
371 except ValueError:
372 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000373 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000374
Jeremy Hylton39c03802002-07-12 14:04:09 +0000375 def begin(self):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000376 if self.headers is not None:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000377 # we've already started reading the response
378 return
379
380 # read until we get a non-100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000381 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000382 version, status, reason = self._read_status()
Martin v. Löwis39a31782004-09-18 09:03:49 +0000383 if status != CONTINUE:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000384 break
385 # skip the header from the 100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000386 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000387 skip = self.fp.readline(_MAXLINE + 1)
388 if len(skip) > _MAXLINE:
389 raise LineTooLong("header line")
390 skip = skip.strip()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000391 if not skip:
392 break
393 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000394 print("header:", skip)
Tim Petersc411dba2002-07-16 21:35:23 +0000395
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000396 self.code = self.status = status
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000397 self.reason = reason.strip()
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000398 if version in ("HTTP/1.0", "HTTP/0.9"):
399 # Some servers might still return "0.9", treat it as 1.0 anyway
Greg Steindd6eefb2000-07-18 09:09:48 +0000400 self.version = 10
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000401 elif version.startswith("HTTP/1."):
Tim Peters07e99cb2001-01-14 23:47:14 +0000402 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Greg Steindd6eefb2000-07-18 09:09:48 +0000403 else:
404 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000405
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000406 self.headers = self.msg = parse_headers(self.fp)
Barry Warsaw820c1202008-06-12 04:06:45 +0000407
Jeremy Hylton30f86742000-09-18 22:50:38 +0000408 if self.debuglevel > 0:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000409 for hdr in self.headers:
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000410 print("header:", hdr, end=" ")
Greg Stein5e0fa402000-06-26 08:28:01 +0000411
Greg Steindd6eefb2000-07-18 09:09:48 +0000412 # are we using the chunked-style of transfer encoding?
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000413 tr_enc = self.headers.get("transfer-encoding")
Jeremy Hyltond229b3a2002-09-03 19:24:24 +0000414 if tr_enc and tr_enc.lower() == "chunked":
Jeremy Hylton236156f2008-12-15 03:00:50 +0000415 self.chunked = True
Greg Steindd6eefb2000-07-18 09:09:48 +0000416 self.chunk_left = None
417 else:
Jeremy Hylton236156f2008-12-15 03:00:50 +0000418 self.chunked = False
Greg Stein5e0fa402000-06-26 08:28:01 +0000419
Greg Steindd6eefb2000-07-18 09:09:48 +0000420 # will the connection close at the end of the response?
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000421 self.will_close = self._check_close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000422
Greg Steindd6eefb2000-07-18 09:09:48 +0000423 # do we have a Content-Length?
424 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000425 self.length = None
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000426 length = self.headers.get("content-length")
Barry Warsaw820c1202008-06-12 04:06:45 +0000427
428 # are we using the chunked-style of transfer encoding?
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000429 tr_enc = self.headers.get("transfer-encoding")
Greg Steindd6eefb2000-07-18 09:09:48 +0000430 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000431 try:
432 self.length = int(length)
433 except ValueError:
Christian Heimesa612dc02008-02-24 13:08:18 +0000434 self.length = None
435 else:
436 if self.length < 0: # ignore nonsensical negative lengths
437 self.length = None
438 else:
439 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000440
Greg Steindd6eefb2000-07-18 09:09:48 +0000441 # does the body have a fixed length? (of zero)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000442 if (status == NO_CONTENT or status == NOT_MODIFIED or
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000443 100 <= status < 200 or # 1xx codes
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000444 self._method == "HEAD"):
Greg Steindd6eefb2000-07-18 09:09:48 +0000445 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000446
Greg Steindd6eefb2000-07-18 09:09:48 +0000447 # if the connection remains open, and we aren't using chunked, and
448 # a content-length was not provided, then assume that the connection
449 # WILL close.
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000450 if (not self.will_close and
451 not self.chunked and
452 self.length is None):
Jeremy Hylton236156f2008-12-15 03:00:50 +0000453 self.will_close = True
Greg Stein5e0fa402000-06-26 08:28:01 +0000454
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000455 def _check_close(self):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000456 conn = self.headers.get("connection")
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000457 if self.version == 11:
458 # An HTTP/1.1 proxy is assumed to stay open unless
459 # explicitly closed.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000460 conn = self.headers.get("connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000461 if conn and "close" in conn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000462 return True
463 return False
464
Jeremy Hylton2c178252004-08-07 16:28:14 +0000465 # Some HTTP/1.0 implementations have support for persistent
466 # connections, using rules different than HTTP/1.1.
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000467
Christian Heimes895627f2007-12-08 17:28:33 +0000468 # For older HTTP, Keep-Alive indicates persistent connection.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000469 if self.headers.get("keep-alive"):
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000470 return False
Tim Peters77c06fb2002-11-24 02:35:35 +0000471
Jeremy Hylton2c178252004-08-07 16:28:14 +0000472 # At least Akamai returns a "Connection: Keep-Alive" header,
473 # which was supposed to be sent by the client.
474 if conn and "keep-alive" in conn.lower():
475 return False
476
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000477 # Proxy-Connection is a netscape hack.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000478 pconn = self.headers.get("proxy-connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000479 if pconn and "keep-alive" in pconn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000480 return False
481
482 # otherwise, assume it will close
483 return True
484
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200485 def _close_conn(self):
486 fp = self.fp
487 self.fp = None
488 fp.close()
489
Greg Steindd6eefb2000-07-18 09:09:48 +0000490 def close(self):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200491 super().close() # set "closed" flag
Greg Steindd6eefb2000-07-18 09:09:48 +0000492 if self.fp:
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200493 self._close_conn()
Greg Stein5e0fa402000-06-26 08:28:01 +0000494
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000495 # These implementations are for the benefit of io.BufferedReader.
496
497 # XXX This class should probably be revised to act more like
498 # the "raw stream" that BufferedReader expects.
499
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000500 def flush(self):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200501 super().flush()
502 if self.fp:
503 self.fp.flush()
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000504
Jeremy Hyltona7cff022009-04-01 02:35:56 +0000505 def readable(self):
506 return True
507
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000508 # End of "raw stream" methods
509
Greg Steindd6eefb2000-07-18 09:09:48 +0000510 def isclosed(self):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200511 """True if the connection is closed."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000512 # NOTE: it is possible that we will not ever call self.close(). This
513 # case occurs when will_close is TRUE, length is None, and we
514 # read up to the last byte, but NOT past it.
515 #
516 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
517 # called, meaning self.isclosed() is meaningful.
518 return self.fp is None
519
520 def read(self, amt=None):
521 if self.fp is None:
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000522 return b""
Greg Steindd6eefb2000-07-18 09:09:48 +0000523
Senthil Kumaran71fb6c82010-04-28 17:39:48 +0000524 if self._method == "HEAD":
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200525 self._close_conn()
Senthil Kumaran71fb6c82010-04-28 17:39:48 +0000526 return b""
527
Antoine Pitrou38d96432011-12-06 22:33:57 +0100528 if amt is not None:
529 # Amount is given, so call base class version
530 # (which is implemented in terms of self.readinto)
531 return super(HTTPResponse, self).read(amt)
532 else:
533 # Amount is not given (unbounded read) so we must check self.length
534 # and self.chunked
Tim Peters230a60c2002-11-09 05:08:07 +0000535
Antoine Pitrou38d96432011-12-06 22:33:57 +0100536 if self.chunked:
537 return self._readall_chunked()
538
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000539 if self.length is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000540 s = self.fp.read()
541 else:
Antoine Pitroubeec61a2013-02-02 22:49:34 +0100542 try:
543 s = self._safe_read(self.length)
544 except IncompleteRead:
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200545 self._close_conn()
Antoine Pitroubeec61a2013-02-02 22:49:34 +0100546 raise
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000547 self.length = 0
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200548 self._close_conn() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000549 return s
550
Antoine Pitrou38d96432011-12-06 22:33:57 +0100551 def readinto(self, b):
552 if self.fp is None:
553 return 0
554
555 if self._method == "HEAD":
Serhiy Storchakab6c86fd2013-02-06 10:35:40 +0200556 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100557 return 0
558
559 if self.chunked:
560 return self._readinto_chunked(b)
561
Greg Steindd6eefb2000-07-18 09:09:48 +0000562 if self.length is not None:
Antoine Pitrou38d96432011-12-06 22:33:57 +0100563 if len(b) > self.length:
Greg Steindd6eefb2000-07-18 09:09:48 +0000564 # clip the read to the "end of response"
Antoine Pitrou38d96432011-12-06 22:33:57 +0100565 b = memoryview(b)[0:self.length]
Greg Steindd6eefb2000-07-18 09:09:48 +0000566
567 # we do not use _safe_read() here because this may be a .will_close
568 # connection, and the user is reading more bytes than will be provided
569 # (for example, reading in 1k chunks)
Antoine Pitrou38d96432011-12-06 22:33:57 +0100570 n = self.fp.readinto(b)
Serhiy Storchaka1c84ac12013-12-17 21:50:02 +0200571 if not n and b:
Antoine Pitroubeec61a2013-02-02 22:49:34 +0100572 # Ideally, we would raise IncompleteRead if the content-length
573 # wasn't satisfied, but it might break compatibility.
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200574 self._close_conn()
Antoine Pitrou6a35e182013-02-02 23:04:56 +0100575 elif self.length is not None:
Antoine Pitrou38d96432011-12-06 22:33:57 +0100576 self.length -= n
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000577 if not self.length:
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200578 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100579 return n
Greg Steindd6eefb2000-07-18 09:09:48 +0000580
Antoine Pitrou38d96432011-12-06 22:33:57 +0100581 def _read_next_chunk_size(self):
582 # Read the next chunk size from the file
583 line = self.fp.readline(_MAXLINE + 1)
584 if len(line) > _MAXLINE:
585 raise LineTooLong("chunk size")
586 i = line.find(b";")
587 if i >= 0:
588 line = line[:i] # strip chunk-extensions
589 try:
590 return int(line, 16)
591 except ValueError:
592 # close the connection as protocol synchronisation is
593 # probably lost
Serhiy Storchakab6c86fd2013-02-06 10:35:40 +0200594 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100595 raise
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000596
Antoine Pitrou38d96432011-12-06 22:33:57 +0100597 def _read_and_discard_trailer(self):
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000598 # read and discard trailer up to the CRLF terminator
599 ### note: we shouldn't have any trailers!
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000600 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000601 line = self.fp.readline(_MAXLINE + 1)
602 if len(line) > _MAXLINE:
603 raise LineTooLong("trailer line")
Christian Heimes0bd4e112008-02-12 22:59:25 +0000604 if not line:
605 # a vanishingly small number of sites EOF without
606 # sending the trailer
607 break
Senthil Kumaran7e70a5c2012-04-29 10:39:49 +0800608 if line in (b'\r\n', b'\n', b''):
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000609 break
610
Antoine Pitrou38d96432011-12-06 22:33:57 +0100611 def _readall_chunked(self):
612 assert self.chunked != _UNKNOWN
613 chunk_left = self.chunk_left
614 value = []
615 while True:
616 if chunk_left is None:
617 try:
618 chunk_left = self._read_next_chunk_size()
619 if chunk_left == 0:
620 break
621 except ValueError:
622 raise IncompleteRead(b''.join(value))
623 value.append(self._safe_read(chunk_left))
624
625 # we read the whole chunk, get another
626 self._safe_read(2) # toss the CRLF at the end of the chunk
627 chunk_left = None
628
629 self._read_and_discard_trailer()
630
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000631 # we read everything; close the "file"
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200632 self._close_conn()
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000633
Chris Withers9c33cbf2009-09-04 17:04:16 +0000634 return b''.join(value)
Tim Peters230a60c2002-11-09 05:08:07 +0000635
Antoine Pitrou38d96432011-12-06 22:33:57 +0100636 def _readinto_chunked(self, b):
637 assert self.chunked != _UNKNOWN
638 chunk_left = self.chunk_left
639
640 total_bytes = 0
641 mvb = memoryview(b)
642 while True:
643 if chunk_left is None:
644 try:
645 chunk_left = self._read_next_chunk_size()
646 if chunk_left == 0:
647 break
648 except ValueError:
649 raise IncompleteRead(bytes(b[0:total_bytes]))
Antoine Pitrou4ce6aa42011-12-06 22:34:36 +0100650
Antoine Pitrou38d96432011-12-06 22:33:57 +0100651 if len(mvb) < chunk_left:
652 n = self._safe_readinto(mvb)
653 self.chunk_left = chunk_left - n
Antoine Pitrouf7e78182012-01-04 18:57:22 +0100654 return total_bytes + n
Antoine Pitrou38d96432011-12-06 22:33:57 +0100655 elif len(mvb) == chunk_left:
656 n = self._safe_readinto(mvb)
657 self._safe_read(2) # toss the CRLF at the end of the chunk
658 self.chunk_left = None
Antoine Pitrouf7e78182012-01-04 18:57:22 +0100659 return total_bytes + n
Antoine Pitrou38d96432011-12-06 22:33:57 +0100660 else:
661 temp_mvb = mvb[0:chunk_left]
662 n = self._safe_readinto(temp_mvb)
663 mvb = mvb[n:]
664 total_bytes += n
665
666 # we read the whole chunk, get another
667 self._safe_read(2) # toss the CRLF at the end of the chunk
668 chunk_left = None
669
670 self._read_and_discard_trailer()
671
672 # we read everything; close the "file"
Serhiy Storchakab6c86fd2013-02-06 10:35:40 +0200673 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100674
675 return total_bytes
676
Greg Steindd6eefb2000-07-18 09:09:48 +0000677 def _safe_read(self, amt):
678 """Read the number of bytes requested, compensating for partial reads.
679
680 Normally, we have a blocking socket, but a read() can be interrupted
681 by a signal (resulting in a partial read).
682
683 Note that we cannot distinguish between EOF and an interrupt when zero
684 bytes have been read. IncompleteRead() will be raised in this
685 situation.
686
687 This function should be used when <amt> bytes "should" be present for
688 reading. If the bytes are truly not available (due to EOF), then the
689 IncompleteRead exception can be used to detect the problem.
690 """
Georg Brandl80ba8e82005-09-29 20:16:07 +0000691 s = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000692 while amt > 0:
Georg Brandl80ba8e82005-09-29 20:16:07 +0000693 chunk = self.fp.read(min(amt, MAXAMOUNT))
Greg Steindd6eefb2000-07-18 09:09:48 +0000694 if not chunk:
Benjamin Peterson6accb982009-03-02 22:50:25 +0000695 raise IncompleteRead(b''.join(s), amt)
Georg Brandl80ba8e82005-09-29 20:16:07 +0000696 s.append(chunk)
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000697 amt -= len(chunk)
Guido van Rossuma00f1232007-09-12 19:43:09 +0000698 return b"".join(s)
Greg Steindd6eefb2000-07-18 09:09:48 +0000699
Antoine Pitrou38d96432011-12-06 22:33:57 +0100700 def _safe_readinto(self, b):
701 """Same as _safe_read, but for reading into a buffer."""
702 total_bytes = 0
703 mvb = memoryview(b)
704 while total_bytes < len(b):
705 if MAXAMOUNT < len(mvb):
706 temp_mvb = mvb[0:MAXAMOUNT]
707 n = self.fp.readinto(temp_mvb)
708 else:
709 n = self.fp.readinto(mvb)
710 if not n:
711 raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
712 mvb = mvb[n:]
713 total_bytes += n
714 return total_bytes
715
Antoine Pitroub353c122009-02-11 00:39:14 +0000716 def fileno(self):
717 return self.fp.fileno()
718
Greg Steindd6eefb2000-07-18 09:09:48 +0000719 def getheader(self, name, default=None):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000720 if self.headers is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000721 raise ResponseNotReady()
Senthil Kumaran9f8dc442010-08-02 11:04:58 +0000722 headers = self.headers.get_all(name) or default
723 if isinstance(headers, str) or not hasattr(headers, '__iter__'):
724 return headers
725 else:
726 return ', '.join(headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000727
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000728 def getheaders(self):
729 """Return list of (header, value) tuples."""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000730 if self.headers is None:
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000731 raise ResponseNotReady()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000732 return list(self.headers.items())
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000733
Antoine Pitroub353c122009-02-11 00:39:14 +0000734 # We override IOBase.__iter__ so that it doesn't check for closed-ness
735
736 def __iter__(self):
737 return self
738
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000739 # For compatibility with old-style urllib responses.
740
741 def info(self):
742 return self.headers
743
744 def geturl(self):
745 return self.url
746
747 def getcode(self):
748 return self.status
Greg Stein5e0fa402000-06-26 08:28:01 +0000749
750class HTTPConnection:
751
Greg Steindd6eefb2000-07-18 09:09:48 +0000752 _http_vsn = 11
753 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000754
Greg Steindd6eefb2000-07-18 09:09:48 +0000755 response_class = HTTPResponse
756 default_port = HTTP_PORT
757 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000758 debuglevel = 0
Antoine Pitrou90e47742013-01-02 22:10:47 +0100759 # TCP Maximum Segment Size (MSS) is determined by the TCP stack on
760 # a per-connection basis. There is no simple and efficient
761 # platform independent mechanism for determining the MSS, so
762 # instead a reasonable estimate is chosen. The getsockopt()
763 # interface using the TCP_MAXSEG parameter may be a suitable
764 # approach on some operating systems. A value of 16KiB is chosen
765 # as a reasonable estimate of the maximum MSS.
766 mss = 16384
Greg Stein5e0fa402000-06-26 08:28:01 +0000767
Senthil Kumaran052ddb02013-03-18 14:11:41 -0700768 def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
769 source_address=None):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000770 self.timeout = timeout
Gregory P. Smithb4066372010-01-03 03:28:29 +0000771 self.source_address = source_address
Greg Steindd6eefb2000-07-18 09:09:48 +0000772 self.sock = None
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000773 self._buffer = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000774 self.__response = None
775 self.__state = _CS_IDLE
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000776 self._method = None
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000777 self._tunnel_host = None
778 self._tunnel_port = None
Georg Brandlc7c199f2011-01-22 22:06:24 +0000779 self._tunnel_headers = {}
Tim Petersc411dba2002-07-16 21:35:23 +0000780
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400781 (self.host, self.port) = self._get_hostport(host, port)
782
783 # This is stored as an instance variable to allow unit
784 # tests to replace it with a suitable mockup
785 self._create_connection = socket.create_connection
Greg Stein5e0fa402000-06-26 08:28:01 +0000786
Senthil Kumaran47fff872009-12-20 07:10:31 +0000787 def set_tunnel(self, host, port=None, headers=None):
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400788 """Set up host and port for HTTP CONNECT tunnelling.
Senthil Kumaran47fff872009-12-20 07:10:31 +0000789
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400790 In a connection that uses HTTP CONNECT tunneling, the host passed to the
791 constructor is used as a proxy server that relays all communication to
792 the endpoint passed to `set_tunnel`. This done by sending an HTTP
793 CONNECT request to the proxy server when the connection is established.
794
795 This method must be called before the HTML connection has been
796 established.
797
798 The headers argument should be a mapping of extra HTTP headers to send
799 with the CONNECT request.
Senthil Kumaran47fff872009-12-20 07:10:31 +0000800 """
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400801
802 if self.sock:
803 raise RuntimeError("Can't set up tunnel for established connection")
804
Serhiy Storchaka4ac7ed92014-12-12 09:29:15 +0200805 self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
Senthil Kumaran47fff872009-12-20 07:10:31 +0000806 if headers:
807 self._tunnel_headers = headers
808 else:
809 self._tunnel_headers.clear()
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000810
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400811 def _get_hostport(self, host, port):
Greg Steindd6eefb2000-07-18 09:09:48 +0000812 if port is None:
Skip Montanaro10e6e0e2004-09-14 16:32:02 +0000813 i = host.rfind(':')
Skip Montanarocae14d22004-09-14 17:55:21 +0000814 j = host.rfind(']') # ipv6 addresses have [...]
815 if i > j:
Skip Montanaro9d389972002-03-24 16:53:50 +0000816 try:
817 port = int(host[i+1:])
818 except ValueError:
Łukasz Langaa5a9a9c2011-10-18 21:17:39 +0200819 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
820 port = self.default_port
821 else:
822 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000823 host = host[:i]
824 else:
825 port = self.default_port
Raymond Hettinger4d037912004-10-14 15:23:38 +0000826 if host and host[0] == '[' and host[-1] == ']':
Brett Cannon0a1af4a2004-09-15 23:26:23 +0000827 host = host[1:-1]
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400828
829 return (host, port)
Greg Stein5e0fa402000-06-26 08:28:01 +0000830
Jeremy Hylton30f86742000-09-18 22:50:38 +0000831 def set_debuglevel(self, level):
832 self.debuglevel = level
833
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000834 def _tunnel(self):
Serhiy Storchaka4ac7ed92014-12-12 09:29:15 +0200835 connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,
836 self._tunnel_port)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000837 connect_bytes = connect_str.encode("ascii")
838 self.send(connect_bytes)
Georg Brandlc7c199f2011-01-22 22:06:24 +0000839 for header, value in self._tunnel_headers.items():
Senthil Kumaran47fff872009-12-20 07:10:31 +0000840 header_str = "%s: %s\r\n" % (header, value)
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000841 header_bytes = header_str.encode("latin-1")
Senthil Kumaran47fff872009-12-20 07:10:31 +0000842 self.send(header_bytes)
Georg Brandlc7c199f2011-01-22 22:06:24 +0000843 self.send(b'\r\n')
Senthil Kumaran47fff872009-12-20 07:10:31 +0000844
Georg Brandlc7c199f2011-01-22 22:06:24 +0000845 response = self.response_class(self.sock, method=self._method)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000846 (version, code, message) = response._read_status()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000847
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000848 if code != 200:
849 self.close()
Andrew Svetlov0832af62012-12-18 23:10:48 +0200850 raise OSError("Tunnel connection failed: %d %s" % (code,
851 message.strip()))
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000852 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000853 line = response.fp.readline(_MAXLINE + 1)
854 if len(line) > _MAXLINE:
855 raise LineTooLong("header line")
Senthil Kumaranb12771a2012-04-23 23:50:07 +0800856 if not line:
857 # for sites which EOF without sending a trailer
858 break
Senthil Kumaran7e70a5c2012-04-29 10:39:49 +0800859 if line in (b'\r\n', b'\n', b''):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000860 break
861
Greg Steindd6eefb2000-07-18 09:09:48 +0000862 def connect(self):
863 """Connect to the host and port specified in __init__."""
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400864 self.sock = self._create_connection((self.host,self.port),
865 self.timeout, self.source_address)
866
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000867 if self._tunnel_host:
868 self._tunnel()
Greg Stein5e0fa402000-06-26 08:28:01 +0000869
Greg Steindd6eefb2000-07-18 09:09:48 +0000870 def close(self):
871 """Close the connection to the HTTP server."""
872 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000873 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000874 self.sock = None
875 if self.__response:
876 self.__response.close()
877 self.__response = None
878 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000879
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000880 def send(self, data):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000881 """Send `data' to the server.
882 ``data`` can be a string object, a bytes object, an array object, a
883 file-like object that supports a .read() method, or an iterable object.
884 """
885
Greg Steindd6eefb2000-07-18 09:09:48 +0000886 if self.sock is None:
887 if self.auto_open:
888 self.connect()
889 else:
890 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000891
Jeremy Hylton30f86742000-09-18 22:50:38 +0000892 if self.debuglevel > 0:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000893 print("send:", repr(data))
Jeremy Hylton636950f2009-03-28 04:34:21 +0000894 blocksize = 8192
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000895 if hasattr(data, "read") :
Jeremy Hylton636950f2009-03-28 04:34:21 +0000896 if self.debuglevel > 0:
897 print("sendIng a read()able")
898 encode = False
Antoine Pitrouead1d622009-09-29 18:44:53 +0000899 try:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000900 mode = data.mode
Antoine Pitrouead1d622009-09-29 18:44:53 +0000901 except AttributeError:
902 # io.BytesIO and other file-like objects don't have a `mode`
903 # attribute.
904 pass
905 else:
906 if "b" not in mode:
907 encode = True
908 if self.debuglevel > 0:
909 print("encoding file using iso-8859-1")
Jeremy Hylton636950f2009-03-28 04:34:21 +0000910 while 1:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000911 datablock = data.read(blocksize)
912 if not datablock:
Jeremy Hylton636950f2009-03-28 04:34:21 +0000913 break
914 if encode:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000915 datablock = datablock.encode("iso-8859-1")
916 self.sock.sendall(datablock)
Andrew Svetlov7b2c8bb2013-04-12 22:49:19 +0300917 return
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000918 try:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000919 self.sock.sendall(data)
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000920 except TypeError:
921 if isinstance(data, collections.Iterable):
922 for d in data:
923 self.sock.sendall(d)
924 else:
Georg Brandl25e2cd12011-08-03 08:27:00 +0200925 raise TypeError("data should be a bytes-like object "
926 "or an iterable, got %r" % type(data))
Greg Stein5e0fa402000-06-26 08:28:01 +0000927
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000928 def _output(self, s):
929 """Add a line of output to the current request buffer.
Tim Peters469cdad2002-08-08 20:19:19 +0000930
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000931 Assumes that the line does *not* end with \\r\\n.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000932 """
933 self._buffer.append(s)
934
Benjamin Peterson1742e402008-11-30 22:15:29 +0000935 def _send_output(self, message_body=None):
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000936 """Send the currently buffered request and clear the buffer.
937
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000938 Appends an extra \\r\\n to the buffer.
Benjamin Peterson822b21c2009-01-18 00:04:57 +0000939 A message_body may be specified, to be appended to the request.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000940 """
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000941 self._buffer.extend((b"", b""))
942 msg = b"\r\n".join(self._buffer)
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000943 del self._buffer[:]
Benjamin Peterson1742e402008-11-30 22:15:29 +0000944 # If msg and message_body are sent in a single send() call,
945 # it will avoid performance problems caused by the interaction
Antoine Pitrou90e47742013-01-02 22:10:47 +0100946 # between delayed ack and the Nagle algorithm. However,
947 # there is no performance gain if the message is larger
948 # than MSS (and there is a memory penalty for the message
949 # copy).
950 if isinstance(message_body, bytes) and len(message_body) < self.mss:
Benjamin Peterson1742e402008-11-30 22:15:29 +0000951 msg += message_body
Benjamin Peterson822b21c2009-01-18 00:04:57 +0000952 message_body = None
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000953 self.send(msg)
Benjamin Peterson822b21c2009-01-18 00:04:57 +0000954 if message_body is not None:
Jeremy Hylton236654b2009-03-27 20:24:34 +0000955 # message_body was not a string (i.e. it is a file), and
956 # we must run the risk of Nagle.
Benjamin Peterson822b21c2009-01-18 00:04:57 +0000957 self.send(message_body)
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000958
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000959 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000960 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000961
Greg Steindd6eefb2000-07-18 09:09:48 +0000962 `method' specifies an HTTP request method, e.g. 'GET'.
963 `url' specifies the object being requested, e.g. '/index.html'.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000964 `skip_host' if True does not add automatically a 'Host:' header
965 `skip_accept_encoding' if True does not add automatically an
966 'Accept-Encoding:' header
Greg Steindd6eefb2000-07-18 09:09:48 +0000967 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000968
Greg Stein616a58d2003-06-24 06:35:19 +0000969 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000970 if self.__response and self.__response.isclosed():
971 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000972
Tim Peters58eb11c2004-01-18 20:29:55 +0000973
Greg Steindd6eefb2000-07-18 09:09:48 +0000974 # in certain cases, we cannot issue another request on this connection.
975 # this occurs when:
976 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
977 # 2) a response to a previous request has signalled that it is going
978 # to close the connection upon completion.
979 # 3) the headers for the previous response have not been read, thus
980 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
981 #
982 # if there is no prior response, then we can request at will.
983 #
984 # if point (2) is true, then we will have passed the socket to the
985 # response (effectively meaning, "there is no prior response"), and
986 # will open a new one when a new request is made.
987 #
988 # Note: if a prior response exists, then we *can* start a new request.
989 # We are not allowed to begin fetching the response to this new
990 # request, however, until that prior response is complete.
991 #
992 if self.__state == _CS_IDLE:
993 self.__state = _CS_REQ_STARTED
994 else:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000995 raise CannotSendRequest(self.__state)
Greg Stein5e0fa402000-06-26 08:28:01 +0000996
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000997 # Save the method we use, we need it later in the response phase
998 self._method = method
Greg Steindd6eefb2000-07-18 09:09:48 +0000999 if not url:
1000 url = '/'
Martin v. Löwisdd5a8602007-06-30 09:22:09 +00001001 request = '%s %s %s' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +00001002
Martin v. Löwisdd5a8602007-06-30 09:22:09 +00001003 # Non-ASCII characters should have been eliminated earlier
1004 self._output(request.encode('ascii'))
Greg Stein5e0fa402000-06-26 08:28:01 +00001005
Greg Steindd6eefb2000-07-18 09:09:48 +00001006 if self._http_vsn == 11:
1007 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +00001008
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001009 if not skip_host:
1010 # this header is issued *only* for HTTP/1.1
1011 # connections. more specifically, this means it is
1012 # only issued when the client uses the new
1013 # HTTPConnection() class. backwards-compat clients
1014 # will be using HTTP/1.0 and those clients may be
1015 # issuing this header themselves. we should NOT issue
1016 # it twice; some web servers (such as Apache) barf
1017 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +00001018
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001019 # If we need a non-standard port,include it in the
1020 # header. If the request is going through a proxy,
1021 # but the host of the actual URL, not the host of the
1022 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +00001023
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001024 netloc = ''
1025 if url.startswith('http'):
1026 nil, netloc, nil, nil, nil = urlsplit(url)
1027
1028 if netloc:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001029 try:
1030 netloc_enc = netloc.encode("ascii")
1031 except UnicodeEncodeError:
1032 netloc_enc = netloc.encode("idna")
1033 self.putheader('Host', netloc_enc)
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001034 else:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001035 if self._tunnel_host:
1036 host = self._tunnel_host
1037 port = self._tunnel_port
1038 else:
1039 host = self.host
1040 port = self.port
1041
Thomas Wouters477c8d52006-05-27 19:21:47 +00001042 try:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001043 host_enc = host.encode("ascii")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001044 except UnicodeEncodeError:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001045 host_enc = host.encode("idna")
Senthil Kumaran74ebd9e2010-11-13 12:27:49 +00001046
1047 # As per RFC 273, IPv6 address should be wrapped with []
1048 # when used as Host header
1049
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001050 if host.find(':') >= 0:
Senthil Kumaran74ebd9e2010-11-13 12:27:49 +00001051 host_enc = b'[' + host_enc + b']'
1052
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001053 if port == self.default_port:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001054 self.putheader('Host', host_enc)
1055 else:
Guido van Rossum98297ee2007-11-06 21:34:58 +00001056 host_enc = host_enc.decode("ascii")
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001057 self.putheader('Host', "%s:%s" % (host_enc, port))
Greg Stein5e0fa402000-06-26 08:28:01 +00001058
Greg Steindd6eefb2000-07-18 09:09:48 +00001059 # note: we are assuming that clients will not attempt to set these
1060 # headers since *this* library must deal with the
1061 # consequences. this also means that when the supporting
1062 # libraries are updated to recognize other forms, then this
1063 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +00001064
Greg Steindd6eefb2000-07-18 09:09:48 +00001065 # we only want a Content-Encoding of "identity" since we don't
1066 # support encodings such as x-gzip or x-deflate.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +00001067 if not skip_accept_encoding:
1068 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +00001069
Greg Steindd6eefb2000-07-18 09:09:48 +00001070 # we can accept "chunked" Transfer-Encodings, but no others
1071 # NOTE: no TE header implies *only* "chunked"
1072 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +00001073
Greg Steindd6eefb2000-07-18 09:09:48 +00001074 # if TE is supplied in the header, then it must appear in a
1075 # Connection header.
1076 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +00001077
Greg Steindd6eefb2000-07-18 09:09:48 +00001078 else:
1079 # For HTTP/1.0, the server will assume "not chunked"
1080 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001081
Benjamin Petersonf608c612008-11-16 18:33:53 +00001082 def putheader(self, header, *values):
Greg Steindd6eefb2000-07-18 09:09:48 +00001083 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +00001084
Greg Steindd6eefb2000-07-18 09:09:48 +00001085 For example: h.putheader('Accept', 'text/html')
1086 """
1087 if self.__state != _CS_REQ_STARTED:
1088 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +00001089
Guido van Rossum98297ee2007-11-06 21:34:58 +00001090 if hasattr(header, 'encode'):
1091 header = header.encode('ascii')
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +02001092
1093 if not _is_legal_header_name(header):
1094 raise ValueError('Invalid header name %r' % (header,))
1095
Benjamin Petersonf608c612008-11-16 18:33:53 +00001096 values = list(values)
1097 for i, one_value in enumerate(values):
1098 if hasattr(one_value, 'encode'):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001099 values[i] = one_value.encode('latin-1')
Senthil Kumaran58d5dbf2010-10-03 18:22:42 +00001100 elif isinstance(one_value, int):
1101 values[i] = str(one_value).encode('ascii')
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +02001102
1103 if _is_illegal_header_value(values[i]):
1104 raise ValueError('Invalid header value %r' % (values[i],))
1105
Benjamin Petersonf608c612008-11-16 18:33:53 +00001106 value = b'\r\n\t'.join(values)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001107 header = header + b': ' + value
1108 self._output(header)
Greg Stein5e0fa402000-06-26 08:28:01 +00001109
Benjamin Peterson1742e402008-11-30 22:15:29 +00001110 def endheaders(self, message_body=None):
1111 """Indicate that the last header line has been sent to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +00001112
Senthil Kumaranad87fa62011-10-05 23:26:49 +08001113 This method sends the request to the server. The optional message_body
1114 argument can be used to pass a message body associated with the
1115 request. The message body will be sent in the same packet as the
1116 message headers if it is a string, otherwise it is sent as a separate
1117 packet.
Benjamin Peterson1742e402008-11-30 22:15:29 +00001118 """
Greg Steindd6eefb2000-07-18 09:09:48 +00001119 if self.__state == _CS_REQ_STARTED:
1120 self.__state = _CS_REQ_SENT
1121 else:
1122 raise CannotSendHeader()
Benjamin Peterson1742e402008-11-30 22:15:29 +00001123 self._send_output(message_body)
Greg Stein5e0fa402000-06-26 08:28:01 +00001124
Greg Steindd6eefb2000-07-18 09:09:48 +00001125 def request(self, method, url, body=None, headers={}):
1126 """Send a complete request to the server."""
Jeremy Hylton636950f2009-03-28 04:34:21 +00001127 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +00001128
Benjamin Peterson1742e402008-11-30 22:15:29 +00001129 def _set_content_length(self, body):
1130 # Set the content-length based on the body.
1131 thelen = None
1132 try:
1133 thelen = str(len(body))
1134 except TypeError as te:
1135 # If this is a file-like object, try to
1136 # fstat its file descriptor
Benjamin Peterson1742e402008-11-30 22:15:29 +00001137 try:
1138 thelen = str(os.fstat(body.fileno()).st_size)
1139 except (AttributeError, OSError):
1140 # Don't send a length if this failed
1141 if self.debuglevel > 0: print("Cannot stat!!")
1142
1143 if thelen is not None:
1144 self.putheader('Content-Length', thelen)
1145
Greg Steindd6eefb2000-07-18 09:09:48 +00001146 def _send_request(self, method, url, body, headers):
Jeremy Hylton636950f2009-03-28 04:34:21 +00001147 # Honor explicitly requested Host: and Accept-Encoding: headers.
Jeremy Hylton2c178252004-08-07 16:28:14 +00001148 header_names = dict.fromkeys([k.lower() for k in headers])
1149 skips = {}
1150 if 'host' in header_names:
1151 skips['skip_host'] = 1
1152 if 'accept-encoding' in header_names:
1153 skips['skip_accept_encoding'] = 1
Greg Stein5e0fa402000-06-26 08:28:01 +00001154
Jeremy Hylton2c178252004-08-07 16:28:14 +00001155 self.putrequest(method, url, **skips)
1156
Senthil Kumaran5fa4a892012-05-19 16:58:09 +08001157 if body is not None and ('content-length' not in header_names):
Benjamin Peterson1742e402008-11-30 22:15:29 +00001158 self._set_content_length(body)
Guido van Rossumcc2b0162007-02-11 06:12:03 +00001159 for hdr, value in headers.items():
Greg Steindd6eefb2000-07-18 09:09:48 +00001160 self.putheader(hdr, value)
Jeremy Hyltonef9f48e2009-03-26 22:04:05 +00001161 if isinstance(body, str):
Jeremy Hylton236654b2009-03-27 20:24:34 +00001162 # RFC 2616 Section 3.7.1 says that text default has a
1163 # default charset of iso-8859-1.
1164 body = body.encode('iso-8859-1')
Jeremy Hyltonef9f48e2009-03-26 22:04:05 +00001165 self.endheaders(body)
Greg Stein5e0fa402000-06-26 08:28:01 +00001166
Greg Steindd6eefb2000-07-18 09:09:48 +00001167 def getresponse(self):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001168 """Get the response from the server.
1169
1170 If the HTTPConnection is in the correct state, returns an
1171 instance of HTTPResponse or of whatever object is returned by
1172 class the response_class variable.
1173
1174 If a request has not been sent or if a previous response has
1175 not be handled, ResponseNotReady is raised. If the HTTP
1176 response indicates that the connection should be closed, then
1177 it will be closed before the response is returned. When the
1178 connection is closed, the underlying socket is closed.
1179 """
Greg Stein5e0fa402000-06-26 08:28:01 +00001180
Greg Stein616a58d2003-06-24 06:35:19 +00001181 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +00001182 if self.__response and self.__response.isclosed():
1183 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001184
Greg Steindd6eefb2000-07-18 09:09:48 +00001185 # if a prior response exists, then it must be completed (otherwise, we
1186 # cannot read this response's header to determine the connection-close
1187 # behavior)
1188 #
1189 # note: if a prior response existed, but was connection-close, then the
1190 # socket and response were made independent of this HTTPConnection
1191 # object since a new request requires that we open a whole new
1192 # connection
1193 #
1194 # this means the prior response had one of two states:
1195 # 1) will_close: this connection was reset and the prior socket and
1196 # response operate independently
1197 # 2) persistent: the response was retained and we await its
1198 # isclosed() status to become true.
1199 #
1200 if self.__state != _CS_REQ_SENT or self.__response:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001201 raise ResponseNotReady(self.__state)
Greg Stein5e0fa402000-06-26 08:28:01 +00001202
Jeremy Hylton30f86742000-09-18 22:50:38 +00001203 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001204 response = self.response_class(self.sock, self.debuglevel,
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +00001205 method=self._method)
Jeremy Hylton30f86742000-09-18 22:50:38 +00001206 else:
Antoine Pitrou988dbd72010-12-17 17:35:56 +00001207 response = self.response_class(self.sock, method=self._method)
Greg Stein5e0fa402000-06-26 08:28:01 +00001208
Serhiy Storchakab491e052014-12-01 13:07:45 +02001209 try:
1210 response.begin()
1211 assert response.will_close != _UNKNOWN
1212 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +00001213
Serhiy Storchakab491e052014-12-01 13:07:45 +02001214 if response.will_close:
1215 # this effectively passes the connection to the response
1216 self.close()
1217 else:
1218 # remember this, so we can tell when it is complete
1219 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +00001220
Serhiy Storchakab491e052014-12-01 13:07:45 +02001221 return response
1222 except:
1223 response.close()
1224 raise
Greg Stein5e0fa402000-06-26 08:28:01 +00001225
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001226try:
1227 import ssl
Brett Cannoncd171c82013-07-04 17:43:24 -04001228except ImportError:
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001229 pass
1230else:
1231 class HTTPSConnection(HTTPConnection):
1232 "This class allows communication via SSL."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001233
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001234 default_port = HTTPS_PORT
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001235
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001236 # XXX Should key_file and cert_file be deprecated in favour of context?
1237
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001238 def __init__(self, host, port=None, key_file=None, cert_file=None,
Senthil Kumaran052ddb02013-03-18 14:11:41 -07001239 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1240 source_address=None, *, context=None,
1241 check_hostname=None):
1242 super(HTTPSConnection, self).__init__(host, port, timeout,
Gregory P. Smithb4066372010-01-03 03:28:29 +00001243 source_address)
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001244 self.key_file = key_file
1245 self.cert_file = cert_file
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001246 if context is None:
Benjamin Peterson4ffb0752014-11-03 14:29:33 -05001247 context = ssl._create_default_https_context()
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001248 will_verify = context.verify_mode != ssl.CERT_NONE
1249 if check_hostname is None:
Benjamin Petersona090f012014-12-07 13:18:25 -05001250 check_hostname = context.check_hostname
1251 if check_hostname and not will_verify:
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001252 raise ValueError("check_hostname needs a SSL context with "
1253 "either CERT_OPTIONAL or CERT_REQUIRED")
1254 if key_file or cert_file:
Antoine Pitrou80603992010-10-13 10:45:16 +00001255 context.load_cert_chain(cert_file, key_file)
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001256 self._context = context
1257 self._check_hostname = check_hostname
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001258
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001259 def connect(self):
1260 "Connect to a host on a given (SSL) port."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001261
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001262 super().connect()
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001263
1264 if self._tunnel_host:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001265 server_hostname = self._tunnel_host
1266 else:
1267 server_hostname = self.host
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001268
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001269 self.sock = self._context.wrap_socket(self.sock,
Benjamin Peterson7243b572014-11-23 17:04:34 -06001270 server_hostname=server_hostname)
Christian Heimesf723c712013-12-05 07:51:17 +01001271 if not self._context.check_hostname and self._check_hostname:
1272 try:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001273 ssl.match_hostname(self.sock.getpeercert(), server_hostname)
Christian Heimesf723c712013-12-05 07:51:17 +01001274 except Exception:
1275 self.sock.shutdown(socket.SHUT_RDWR)
1276 self.sock.close()
1277 raise
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001278
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001279 __all__.append("HTTPSConnection")
Greg Stein5e0fa402000-06-26 08:28:01 +00001280
Greg Stein5e0fa402000-06-26 08:28:01 +00001281class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001282 # Subclasses that define an __init__ must call Exception.__init__
1283 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +00001284 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001285
1286class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001287 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001288
Skip Montanaro9d389972002-03-24 16:53:50 +00001289class InvalidURL(HTTPException):
1290 pass
1291
Greg Stein5e0fa402000-06-26 08:28:01 +00001292class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001293 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001294 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001295 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001296
1297class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001298 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001299
Greg Stein5e0fa402000-06-26 08:28:01 +00001300class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001301 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001302
1303class IncompleteRead(HTTPException):
Benjamin Peterson6accb982009-03-02 22:50:25 +00001304 def __init__(self, partial, expected=None):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001305 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001306 self.partial = partial
Benjamin Peterson6accb982009-03-02 22:50:25 +00001307 self.expected = expected
1308 def __repr__(self):
1309 if self.expected is not None:
1310 e = ', %i more expected' % self.expected
1311 else:
1312 e = ''
1313 return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1314 def __str__(self):
1315 return repr(self)
Greg Stein5e0fa402000-06-26 08:28:01 +00001316
1317class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001318 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001319
1320class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001321 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001322
1323class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001324 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001325
1326class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001327 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001328
1329class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001330 def __init__(self, line):
Benjamin Peterson11dbfd42010-03-21 22:50:04 +00001331 if not line:
1332 line = repr(line)
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001333 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001334 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001335
Senthil Kumaran5466bf12010-12-18 16:55:23 +00001336class LineTooLong(HTTPException):
1337 def __init__(self, line_type):
1338 HTTPException.__init__(self, "got more than %d bytes when reading %s"
1339 % (_MAXLINE, line_type))
1340
Greg Stein5e0fa402000-06-26 08:28:01 +00001341# for backwards compatibility
1342error = HTTPException