blob: a6ab135b2c3879b588bc24f8fa4c267180d51348 [file] [log] [blame]
R David Murray44b548d2016-09-08 13:59:53 -04001r"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006HTTPConnection goes through a number of "states", which define when a client
Greg Stein5e0fa402000-06-26 08:28:01 +00007may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
R David Murraycae7bdb2015-04-05 19:26:29 -040023 |\_____________________________
24 | | getresponse() raises
25 | response = getresponse() | ConnectionError
26 v v
27 Unread-response Idle
28 [Response-headers-read]
Greg Stein5e0fa402000-06-26 08:28:01 +000029 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000030 | |
31 | response.read() | putrequest()
32 v v
33 Idle Req-started-unread-response
34 ______/|
35 / |
36 response.read() | | ( putheader() )* endheaders()
37 v v
38 Request-started Req-sent-unread-response
39 |
40 | response.read()
41 v
42 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000043
44This diagram presents the following rules:
45 -- a second request may not be started until {response-headers-read}
46 -- a response [object] cannot be retrieved until {request-sent}
47 -- there is no differentiation between an unread response body and a
48 partially read response body
49
50Note: this enforcement is applied by the HTTPConnection class. The
51 HTTPResponse class does not enforce this state machine, which
52 implies sophisticated clients may accelerate the request/response
53 pipeline. Caution should be taken, though: accelerating the states
54 beyond the above pattern may imply knowledge of the server's
55 connection-close behavior for certain requests. For example, it
56 is impossible to tell whether the server will close the connection
57 UNTIL the response headers have been read; this means that further
58 requests cannot be placed into the pipeline until it is known that
59 the server will NOT be closing the connection.
60
61Logical State __state __response
62------------- ------- ----------
63Idle _CS_IDLE None
64Request-started _CS_REQ_STARTED None
65Request-sent _CS_REQ_SENT None
66Unread-response _CS_IDLE <response_class>
67Req-started-unread-response _CS_REQ_STARTED <response_class>
68Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000069"""
Guido van Rossum23acc951994-02-21 16:36:04 +000070
Barry Warsaw820c1202008-06-12 04:06:45 +000071import email.parser
72import email.message
Miss Islington (bot)4c35a2a2021-10-06 11:29:23 -070073import errno
Serhiy Storchakae4db7692014-12-23 16:28:28 +020074import http
Jeremy Hylton636950f2009-03-28 04:34:21 +000075import io
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +020076import re
Jeremy Hylton636950f2009-03-28 04:34:21 +000077import socket
Saiyang Gou927b8412021-04-23 03:19:08 -070078import sys
Serhiy Storchaka2e576f52017-04-24 09:05:00 +030079import collections.abc
Jeremy Hylton1afc1692008-06-18 20:49:58 +000080from urllib.parse import urlsplit
Guido van Rossum23acc951994-02-21 16:36:04 +000081
Berker Peksagbabc6882015-02-20 09:39:38 +020082# HTTPMessage, parse_headers(), and the HTTP status code constants are
83# intentionally omitted for simplicity
Thomas Wouters47b49bf2007-08-30 22:15:33 +000084__all__ = ["HTTPResponse", "HTTPConnection",
Skip Montanaro951a8842001-06-01 16:25:38 +000085 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000086 "UnknownTransferEncoding", "UnimplementedFileMode",
87 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
88 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
R David Murraycae7bdb2015-04-05 19:26:29 -040089 "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
90 "responses"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000091
Guido van Rossum23acc951994-02-21 16:36:04 +000092HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000093HTTPS_PORT = 443
94
Greg Stein5e0fa402000-06-26 08:28:01 +000095_UNKNOWN = 'UNKNOWN'
96
97# connection states
98_CS_IDLE = 'Idle'
99_CS_REQ_STARTED = 'Request-started'
100_CS_REQ_SENT = 'Request-sent'
101
Martin v. Löwis39a31782004-09-18 09:03:49 +0000102
Serhiy Storchakae4db7692014-12-23 16:28:28 +0200103# hack to maintain backwards compatibility
104globals().update(http.HTTPStatus.__members__)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000105
Serhiy Storchakae4db7692014-12-23 16:28:28 +0200106# another hack to maintain backwards compatibility
Georg Brandl6aab16e2006-02-17 19:17:25 +0000107# Mapping status codes to official W3C names
Serhiy Storchakae4db7692014-12-23 16:28:28 +0200108responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
Georg Brandl6aab16e2006-02-17 19:17:25 +0000109
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000110# maximal line length when calling readline().
111_MAXLINE = 65536
Georg Brandlbf3f8eb2013-10-27 07:34:48 +0100112_MAXHEADERS = 100
113
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +0200114# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
115#
116# VCHAR = %x21-7E
117# obs-text = %x80-FF
118# header-field = field-name ":" OWS field-value OWS
119# field-name = token
120# field-value = *( field-content / obs-fold )
121# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
122# field-vchar = VCHAR / obs-text
123#
124# obs-fold = CRLF 1*( SP / HTAB )
125# ; obsolete line folding
126# ; see Section 3.2.4
127
128# token = 1*tchar
129#
130# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
131# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
132# / DIGIT / ALPHA
133# ; any VCHAR, except delimiters
134#
135# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
136
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700137# the patterns for both name and value are more lenient than RFC
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +0200138# definitions to allow for backwards compatibility
139_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
140_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
141
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700142# These characters are not allowed within HTTP URL paths.
143# See https://tools.ietf.org/html/rfc3986#section-3.3 and the
144# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
145# Prevents CVE-2019-9740. Includes control characters such as \r\n.
146# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
147_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
148# Arguably only these _should_ allowed:
149# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
150# We are more lenient for assumed real world compatibility purposes.
151
AMIR8ca8a2e2020-07-19 00:46:10 +0430152# These characters are not allowed within HTTP method names
153# to prevent http header injection.
154_contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')
155
R David Murraybeed8402015-03-22 15:18:23 -0400156# We always set the Content-Length header for these methods because some
157# servers will otherwise respond with a 411
158_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
159
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000160
Martin Panter44391482016-02-09 10:20:52 +0000161def _encode(data, name='data'):
162 """Call data.encode("latin-1") but show a better error message."""
163 try:
164 return data.encode("latin-1")
165 except UnicodeEncodeError as err:
166 raise UnicodeEncodeError(
167 err.encoding,
168 err.object,
169 err.start,
170 err.end,
171 "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "
172 "if you want to send it encoded in UTF-8." %
173 (name.title(), data[err.start:err.end], name)) from None
174
175
Barry Warsaw820c1202008-06-12 04:06:45 +0000176class HTTPMessage(email.message.Message):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000177 # XXX The only usage of this method is in
178 # http.server.CGIHTTPRequestHandler. Maybe move the code there so
179 # that it doesn't need to be part of the public API. The API has
180 # never been defined so this could cause backwards compatibility
181 # issues.
182
Barry Warsaw820c1202008-06-12 04:06:45 +0000183 def getallmatchingheaders(self, name):
184 """Find all header lines matching a given header name.
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000185
Barry Warsaw820c1202008-06-12 04:06:45 +0000186 Look through the list of headers and find all lines matching a given
187 header name (and their continuation lines). A list of the lines is
188 returned, without interpretation. If the header does not occur, an
189 empty list is returned. If the header occurs multiple times, all
190 occurrences are returned. Case is not important in the header name.
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000191
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000192 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000193 name = name.lower() + ':'
194 n = len(name)
195 lst = []
196 hit = 0
197 for line in self.keys():
198 if line[:n].lower() == name:
199 hit = 1
200 elif not line[:1].isspace():
201 hit = 0
202 if hit:
203 lst.append(line)
204 return lst
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000205
Miss Islington (bot)60ba0b62021-05-05 16:14:28 -0700206def _read_headers(fp):
207 """Reads potential header lines into a list from a file pointer.
Barry Warsaw820c1202008-06-12 04:06:45 +0000208
Miss Islington (bot)60ba0b62021-05-05 16:14:28 -0700209 Length of line is limited by _MAXLINE, and number of
210 headers is limited by _MAXHEADERS.
Barry Warsaw820c1202008-06-12 04:06:45 +0000211 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000212 headers = []
213 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000214 line = fp.readline(_MAXLINE + 1)
215 if len(line) > _MAXLINE:
216 raise LineTooLong("header line")
Barry Warsaw820c1202008-06-12 04:06:45 +0000217 headers.append(line)
Georg Brandlbf3f8eb2013-10-27 07:34:48 +0100218 if len(headers) > _MAXHEADERS:
219 raise HTTPException("got more than %d headers" % _MAXHEADERS)
Barry Warsaw820c1202008-06-12 04:06:45 +0000220 if line in (b'\r\n', b'\n', b''):
221 break
Miss Islington (bot)60ba0b62021-05-05 16:14:28 -0700222 return headers
223
224def parse_headers(fp, _class=HTTPMessage):
225 """Parses only RFC2822 headers from a file pointer.
226
227 email Parser wants to see strings rather than bytes.
228 But a TextIOWrapper around self.rfile would buffer too many bytes
229 from the stream, bytes which we later need to read as bytes.
230 So we read the correct bytes here, as bytes, for email Parser
231 to parse.
232
233 """
234 headers = _read_headers(fp)
Barry Warsaw820c1202008-06-12 04:06:45 +0000235 hstring = b''.join(headers).decode('iso-8859-1')
Jeremy Hylton98eb6c22009-03-27 18:31:36 +0000236 return email.parser.Parser(_class=_class).parsestr(hstring)
Greg Stein5e0fa402000-06-26 08:28:01 +0000237
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000238
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000239class HTTPResponse(io.BufferedIOBase):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000240
241 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
242
Jeremy Hylton811fc142007-08-03 13:30:02 +0000243 # The bytes from the socket object are iso-8859-1 strings.
244 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
245 # text following RFC 2047. The basic status line parsing only
246 # accepts iso-8859-1.
247
Senthil Kumaran052ddb02013-03-18 14:11:41 -0700248 def __init__(self, sock, debuglevel=0, method=None, url=None):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000249 # If the response includes a content-length header, we need to
250 # make sure that the client doesn't read more than the
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000251 # specified number of bytes. If it does, it will block until
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000252 # the server times out and closes the connection. This will
253 # happen if a self.fp.read() is done (without a size) whether
254 # self.fp is buffered or not. So, no self.fp.read() by
255 # clients unless they know what they are doing.
Benjamin Petersonf72d9fb2009-02-08 00:29:20 +0000256 self.fp = sock.makefile("rb")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000257 self.debuglevel = debuglevel
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000258 self._method = method
Greg Stein5e0fa402000-06-26 08:28:01 +0000259
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000260 # The HTTPResponse object is returned via urllib. The clients
261 # of http and urllib expect different attributes for the
262 # headers. headers is used here and supports urllib. msg is
263 # provided as a backwards compatibility layer for http
264 # clients.
265
266 self.headers = self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000267
Greg Steindd6eefb2000-07-18 09:09:48 +0000268 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000269 self.version = _UNKNOWN # HTTP-Version
270 self.status = _UNKNOWN # Status-Code
271 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000272
Tim Peters07e99cb2001-01-14 23:47:14 +0000273 self.chunked = _UNKNOWN # is "chunked" being used?
274 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
275 self.length = _UNKNOWN # number of bytes left in response
276 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000277
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000278 def _read_status(self):
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000279 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
280 if len(line) > _MAXLINE:
281 raise LineTooLong("status line")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000282 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000283 print("reply:", repr(line))
Jeremy Hyltonb6769522003-06-29 17:55:05 +0000284 if not line:
285 # Presumably, the server closed the connection before
286 # sending a valid response.
R David Murraycae7bdb2015-04-05 19:26:29 -0400287 raise RemoteDisconnected("Remote end closed connection without"
288 " response")
Greg Steindd6eefb2000-07-18 09:09:48 +0000289 try:
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000290 version, status, reason = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000291 except ValueError:
292 try:
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000293 version, status = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000294 reason = ""
295 except ValueError:
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000296 # empty version will cause next test to fail.
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000297 version = ""
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000298 if not version.startswith("HTTP/"):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200299 self._close_conn()
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000300 raise BadStatusLine(line)
Greg Stein5e0fa402000-06-26 08:28:01 +0000301
Jeremy Hylton23d40472001-04-13 14:57:08 +0000302 # The status code is a three-digit number
303 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000304 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000305 if status < 100 or status > 999:
306 raise BadStatusLine(line)
307 except ValueError:
308 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000309 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000310
Jeremy Hylton39c03802002-07-12 14:04:09 +0000311 def begin(self):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000312 if self.headers is not None:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000313 # we've already started reading the response
314 return
315
316 # read until we get a non-100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000317 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000318 version, status, reason = self._read_status()
Martin v. Löwis39a31782004-09-18 09:03:49 +0000319 if status != CONTINUE:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000320 break
321 # skip the header from the 100 response
Miss Islington (bot)60ba0b62021-05-05 16:14:28 -0700322 skipped_headers = _read_headers(self.fp)
323 if self.debuglevel > 0:
324 print("headers:", skipped_headers)
325 del skipped_headers
Tim Petersc411dba2002-07-16 21:35:23 +0000326
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000327 self.code = self.status = status
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000328 self.reason = reason.strip()
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000329 if version in ("HTTP/1.0", "HTTP/0.9"):
330 # Some servers might still return "0.9", treat it as 1.0 anyway
Greg Steindd6eefb2000-07-18 09:09:48 +0000331 self.version = 10
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000332 elif version.startswith("HTTP/1."):
Tim Peters07e99cb2001-01-14 23:47:14 +0000333 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Greg Steindd6eefb2000-07-18 09:09:48 +0000334 else:
335 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000336
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000337 self.headers = self.msg = parse_headers(self.fp)
Barry Warsaw820c1202008-06-12 04:06:45 +0000338
Jeremy Hylton30f86742000-09-18 22:50:38 +0000339 if self.debuglevel > 0:
Matt Houglum461c4162019-04-03 21:36:47 -0700340 for hdr, val in self.headers.items():
341 print("header:", hdr + ":", val)
Greg Stein5e0fa402000-06-26 08:28:01 +0000342
Greg Steindd6eefb2000-07-18 09:09:48 +0000343 # are we using the chunked-style of transfer encoding?
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000344 tr_enc = self.headers.get("transfer-encoding")
Jeremy Hyltond229b3a2002-09-03 19:24:24 +0000345 if tr_enc and tr_enc.lower() == "chunked":
Jeremy Hylton236156f2008-12-15 03:00:50 +0000346 self.chunked = True
Greg Steindd6eefb2000-07-18 09:09:48 +0000347 self.chunk_left = None
348 else:
Jeremy Hylton236156f2008-12-15 03:00:50 +0000349 self.chunked = False
Greg Stein5e0fa402000-06-26 08:28:01 +0000350
Greg Steindd6eefb2000-07-18 09:09:48 +0000351 # will the connection close at the end of the response?
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000352 self.will_close = self._check_close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000353
Greg Steindd6eefb2000-07-18 09:09:48 +0000354 # do we have a Content-Length?
355 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000356 self.length = None
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000357 length = self.headers.get("content-length")
Greg Steindd6eefb2000-07-18 09:09:48 +0000358 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000359 try:
360 self.length = int(length)
361 except ValueError:
Christian Heimesa612dc02008-02-24 13:08:18 +0000362 self.length = None
363 else:
364 if self.length < 0: # ignore nonsensical negative lengths
365 self.length = None
366 else:
367 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000368
Greg Steindd6eefb2000-07-18 09:09:48 +0000369 # does the body have a fixed length? (of zero)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000370 if (status == NO_CONTENT or status == NOT_MODIFIED or
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000371 100 <= status < 200 or # 1xx codes
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000372 self._method == "HEAD"):
Greg Steindd6eefb2000-07-18 09:09:48 +0000373 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000374
Greg Steindd6eefb2000-07-18 09:09:48 +0000375 # if the connection remains open, and we aren't using chunked, and
376 # a content-length was not provided, then assume that the connection
377 # WILL close.
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000378 if (not self.will_close and
379 not self.chunked and
380 self.length is None):
Jeremy Hylton236156f2008-12-15 03:00:50 +0000381 self.will_close = True
Greg Stein5e0fa402000-06-26 08:28:01 +0000382
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000383 def _check_close(self):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000384 conn = self.headers.get("connection")
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000385 if self.version == 11:
386 # An HTTP/1.1 proxy is assumed to stay open unless
387 # explicitly closed.
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000388 if conn and "close" in conn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000389 return True
390 return False
391
Jeremy Hylton2c178252004-08-07 16:28:14 +0000392 # Some HTTP/1.0 implementations have support for persistent
393 # connections, using rules different than HTTP/1.1.
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000394
Christian Heimes895627f2007-12-08 17:28:33 +0000395 # For older HTTP, Keep-Alive indicates persistent connection.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000396 if self.headers.get("keep-alive"):
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000397 return False
Tim Peters77c06fb2002-11-24 02:35:35 +0000398
Jeremy Hylton2c178252004-08-07 16:28:14 +0000399 # At least Akamai returns a "Connection: Keep-Alive" header,
400 # which was supposed to be sent by the client.
401 if conn and "keep-alive" in conn.lower():
402 return False
403
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000404 # Proxy-Connection is a netscape hack.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000405 pconn = self.headers.get("proxy-connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000406 if pconn and "keep-alive" in pconn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000407 return False
408
409 # otherwise, assume it will close
410 return True
411
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200412 def _close_conn(self):
413 fp = self.fp
414 self.fp = None
415 fp.close()
416
Greg Steindd6eefb2000-07-18 09:09:48 +0000417 def close(self):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300418 try:
419 super().close() # set "closed" flag
420 finally:
421 if self.fp:
422 self._close_conn()
Greg Stein5e0fa402000-06-26 08:28:01 +0000423
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000424 # These implementations are for the benefit of io.BufferedReader.
425
426 # XXX This class should probably be revised to act more like
427 # the "raw stream" that BufferedReader expects.
428
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000429 def flush(self):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200430 super().flush()
431 if self.fp:
432 self.fp.flush()
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000433
Jeremy Hyltona7cff022009-04-01 02:35:56 +0000434 def readable(self):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700435 """Always returns True"""
Jeremy Hyltona7cff022009-04-01 02:35:56 +0000436 return True
437
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000438 # End of "raw stream" methods
439
Greg Steindd6eefb2000-07-18 09:09:48 +0000440 def isclosed(self):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200441 """True if the connection is closed."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000442 # NOTE: it is possible that we will not ever call self.close(). This
443 # case occurs when will_close is TRUE, length is None, and we
444 # read up to the last byte, but NOT past it.
445 #
446 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
447 # called, meaning self.isclosed() is meaningful.
448 return self.fp is None
449
450 def read(self, amt=None):
451 if self.fp is None:
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000452 return b""
Greg Steindd6eefb2000-07-18 09:09:48 +0000453
Senthil Kumaran71fb6c82010-04-28 17:39:48 +0000454 if self._method == "HEAD":
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200455 self._close_conn()
Senthil Kumaran71fb6c82010-04-28 17:39:48 +0000456 return b""
457
Bruce Merry152f0b82020-06-25 08:30:21 +0200458 if self.chunked:
459 return self._read_chunked(amt)
460
Antoine Pitrou38d96432011-12-06 22:33:57 +0100461 if amt is not None:
Bruce Merry152f0b82020-06-25 08:30:21 +0200462 if self.length is not None and amt > self.length:
463 # clip the read to the "end of response"
464 amt = self.length
465 s = self.fp.read(amt)
466 if not s and amt:
467 # Ideally, we would raise IncompleteRead if the content-length
468 # wasn't satisfied, but it might break compatibility.
469 self._close_conn()
470 elif self.length is not None:
471 self.length -= len(s)
472 if not self.length:
473 self._close_conn()
474 return s
Antoine Pitrou38d96432011-12-06 22:33:57 +0100475 else:
476 # Amount is not given (unbounded read) so we must check self.length
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000477 if self.length is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000478 s = self.fp.read()
479 else:
Antoine Pitroubeec61a2013-02-02 22:49:34 +0100480 try:
481 s = self._safe_read(self.length)
482 except IncompleteRead:
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200483 self._close_conn()
Antoine Pitroubeec61a2013-02-02 22:49:34 +0100484 raise
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000485 self.length = 0
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200486 self._close_conn() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000487 return s
488
Antoine Pitrou38d96432011-12-06 22:33:57 +0100489 def readinto(self, b):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700490 """Read up to len(b) bytes into bytearray b and return the number
491 of bytes read.
492 """
493
Antoine Pitrou38d96432011-12-06 22:33:57 +0100494 if self.fp is None:
495 return 0
496
497 if self._method == "HEAD":
Serhiy Storchakab6c86fd2013-02-06 10:35:40 +0200498 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100499 return 0
500
501 if self.chunked:
502 return self._readinto_chunked(b)
503
Greg Steindd6eefb2000-07-18 09:09:48 +0000504 if self.length is not None:
Antoine Pitrou38d96432011-12-06 22:33:57 +0100505 if len(b) > self.length:
Greg Steindd6eefb2000-07-18 09:09:48 +0000506 # clip the read to the "end of response"
Antoine Pitrou38d96432011-12-06 22:33:57 +0100507 b = memoryview(b)[0:self.length]
Greg Steindd6eefb2000-07-18 09:09:48 +0000508
509 # we do not use _safe_read() here because this may be a .will_close
510 # connection, and the user is reading more bytes than will be provided
511 # (for example, reading in 1k chunks)
Antoine Pitrou38d96432011-12-06 22:33:57 +0100512 n = self.fp.readinto(b)
Serhiy Storchaka1c84ac12013-12-17 21:50:02 +0200513 if not n and b:
Antoine Pitroubeec61a2013-02-02 22:49:34 +0100514 # Ideally, we would raise IncompleteRead if the content-length
515 # wasn't satisfied, but it might break compatibility.
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200516 self._close_conn()
Antoine Pitrou6a35e182013-02-02 23:04:56 +0100517 elif self.length is not None:
Antoine Pitrou38d96432011-12-06 22:33:57 +0100518 self.length -= n
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000519 if not self.length:
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200520 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100521 return n
Greg Steindd6eefb2000-07-18 09:09:48 +0000522
Antoine Pitrou38d96432011-12-06 22:33:57 +0100523 def _read_next_chunk_size(self):
524 # Read the next chunk size from the file
525 line = self.fp.readline(_MAXLINE + 1)
526 if len(line) > _MAXLINE:
527 raise LineTooLong("chunk size")
528 i = line.find(b";")
529 if i >= 0:
530 line = line[:i] # strip chunk-extensions
531 try:
532 return int(line, 16)
533 except ValueError:
534 # close the connection as protocol synchronisation is
535 # probably lost
Serhiy Storchakab6c86fd2013-02-06 10:35:40 +0200536 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100537 raise
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000538
Antoine Pitrou38d96432011-12-06 22:33:57 +0100539 def _read_and_discard_trailer(self):
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000540 # read and discard trailer up to the CRLF terminator
541 ### note: we shouldn't have any trailers!
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000542 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000543 line = self.fp.readline(_MAXLINE + 1)
544 if len(line) > _MAXLINE:
545 raise LineTooLong("trailer line")
Christian Heimes0bd4e112008-02-12 22:59:25 +0000546 if not line:
547 # a vanishingly small number of sites EOF without
548 # sending the trailer
549 break
Senthil Kumaran7e70a5c2012-04-29 10:39:49 +0800550 if line in (b'\r\n', b'\n', b''):
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000551 break
552
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000553 def _get_chunk_left(self):
554 # return self.chunk_left, reading a new chunk if necessary.
555 # chunk_left == 0: at the end of the current chunk, need to close it
556 # chunk_left == None: No current chunk, should read next.
557 # This function returns non-zero or None if the last chunk has
558 # been read.
559 chunk_left = self.chunk_left
560 if not chunk_left: # Can be 0 or None
561 if chunk_left is not None:
Mike53f7a7c2017-12-14 14:04:53 +0300562 # We are at the end of chunk, discard chunk end
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000563 self._safe_read(2) # toss the CRLF at the end of the chunk
564 try:
565 chunk_left = self._read_next_chunk_size()
566 except ValueError:
567 raise IncompleteRead(b'')
568 if chunk_left == 0:
569 # last chunk: 1*("0") [ chunk-extension ] CRLF
570 self._read_and_discard_trailer()
571 # we read everything; close the "file"
572 self._close_conn()
573 chunk_left = None
574 self.chunk_left = chunk_left
575 return chunk_left
576
Bruce Merry152f0b82020-06-25 08:30:21 +0200577 def _read_chunked(self, amt=None):
Antoine Pitrou38d96432011-12-06 22:33:57 +0100578 assert self.chunked != _UNKNOWN
Antoine Pitrou38d96432011-12-06 22:33:57 +0100579 value = []
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000580 try:
581 while True:
582 chunk_left = self._get_chunk_left()
583 if chunk_left is None:
584 break
Bruce Merry152f0b82020-06-25 08:30:21 +0200585
586 if amt is not None and amt <= chunk_left:
587 value.append(self._safe_read(amt))
588 self.chunk_left = chunk_left - amt
589 break
590
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000591 value.append(self._safe_read(chunk_left))
Bruce Merry152f0b82020-06-25 08:30:21 +0200592 if amt is not None:
593 amt -= chunk_left
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000594 self.chunk_left = 0
595 return b''.join(value)
596 except IncompleteRead:
597 raise IncompleteRead(b''.join(value))
Tim Peters230a60c2002-11-09 05:08:07 +0000598
Antoine Pitrou38d96432011-12-06 22:33:57 +0100599 def _readinto_chunked(self, b):
600 assert self.chunked != _UNKNOWN
Antoine Pitrou38d96432011-12-06 22:33:57 +0100601 total_bytes = 0
602 mvb = memoryview(b)
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000603 try:
604 while True:
605 chunk_left = self._get_chunk_left()
606 if chunk_left is None:
607 return total_bytes
Antoine Pitrou4ce6aa42011-12-06 22:34:36 +0100608
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000609 if len(mvb) <= chunk_left:
610 n = self._safe_readinto(mvb)
611 self.chunk_left = chunk_left - n
612 return total_bytes + n
613
614 temp_mvb = mvb[:chunk_left]
Antoine Pitrou38d96432011-12-06 22:33:57 +0100615 n = self._safe_readinto(temp_mvb)
616 mvb = mvb[n:]
617 total_bytes += n
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000618 self.chunk_left = 0
Antoine Pitrou38d96432011-12-06 22:33:57 +0100619
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000620 except IncompleteRead:
621 raise IncompleteRead(bytes(b[0:total_bytes]))
Antoine Pitrou38d96432011-12-06 22:33:57 +0100622
Greg Steindd6eefb2000-07-18 09:09:48 +0000623 def _safe_read(self, amt):
Inada Naokid6bf6f22019-04-06 18:06:19 +0900624 """Read the number of bytes requested.
Greg Steindd6eefb2000-07-18 09:09:48 +0000625
626 This function should be used when <amt> bytes "should" be present for
627 reading. If the bytes are truly not available (due to EOF), then the
628 IncompleteRead exception can be used to detect the problem.
629 """
Inada Naokid6bf6f22019-04-06 18:06:19 +0900630 data = self.fp.read(amt)
631 if len(data) < amt:
632 raise IncompleteRead(data, amt-len(data))
633 return data
Greg Steindd6eefb2000-07-18 09:09:48 +0000634
Antoine Pitrou38d96432011-12-06 22:33:57 +0100635 def _safe_readinto(self, b):
636 """Same as _safe_read, but for reading into a buffer."""
Inada Naokid6bf6f22019-04-06 18:06:19 +0900637 amt = len(b)
638 n = self.fp.readinto(b)
639 if n < amt:
640 raise IncompleteRead(bytes(b[:n]), amt-n)
641 return n
Antoine Pitrou38d96432011-12-06 22:33:57 +0100642
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000643 def read1(self, n=-1):
644 """Read with at most one underlying system call. If at least one
645 byte is buffered, return that instead.
646 """
647 if self.fp is None or self._method == "HEAD":
648 return b""
649 if self.chunked:
650 return self._read1_chunked(n)
Martin Panterce911c32016-03-17 06:42:48 +0000651 if self.length is not None and (n < 0 or n > self.length):
652 n = self.length
Martin Panter741d4942016-10-21 00:52:04 +0000653 result = self.fp.read1(n)
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000654 if not result and n:
655 self._close_conn()
Martin Panterce911c32016-03-17 06:42:48 +0000656 elif self.length is not None:
657 self.length -= len(result)
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000658 return result
659
660 def peek(self, n=-1):
661 # Having this enables IOBase.readline() to read more than one
662 # byte at a time
663 if self.fp is None or self._method == "HEAD":
664 return b""
665 if self.chunked:
666 return self._peek_chunked(n)
667 return self.fp.peek(n)
668
669 def readline(self, limit=-1):
670 if self.fp is None or self._method == "HEAD":
671 return b""
672 if self.chunked:
673 # Fallback to IOBase readline which uses peek() and read()
674 return super().readline(limit)
Martin Panterce911c32016-03-17 06:42:48 +0000675 if self.length is not None and (limit < 0 or limit > self.length):
676 limit = self.length
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000677 result = self.fp.readline(limit)
678 if not result and limit:
679 self._close_conn()
Martin Panterce911c32016-03-17 06:42:48 +0000680 elif self.length is not None:
681 self.length -= len(result)
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000682 return result
683
684 def _read1_chunked(self, n):
685 # Strictly speaking, _get_chunk_left() may cause more than one read,
686 # but that is ok, since that is to satisfy the chunked protocol.
687 chunk_left = self._get_chunk_left()
688 if chunk_left is None or n == 0:
689 return b''
690 if not (0 <= n <= chunk_left):
691 n = chunk_left # if n is negative or larger than chunk_left
692 read = self.fp.read1(n)
693 self.chunk_left -= len(read)
694 if not read:
695 raise IncompleteRead(b"")
696 return read
697
698 def _peek_chunked(self, n):
699 # Strictly speaking, _get_chunk_left() may cause more than one read,
700 # but that is ok, since that is to satisfy the chunked protocol.
701 try:
702 chunk_left = self._get_chunk_left()
703 except IncompleteRead:
704 return b'' # peek doesn't worry about protocol
705 if chunk_left is None:
706 return b'' # eof
707 # peek is allowed to return more than requested. Just request the
708 # entire chunk, and truncate what we get.
709 return self.fp.peek(chunk_left)[:chunk_left]
710
Antoine Pitroub353c122009-02-11 00:39:14 +0000711 def fileno(self):
712 return self.fp.fileno()
713
Greg Steindd6eefb2000-07-18 09:09:48 +0000714 def getheader(self, name, default=None):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700715 '''Returns the value of the header matching *name*.
716
717 If there are multiple matching headers, the values are
718 combined into a single string separated by commas and spaces.
719
720 If no matching header is found, returns *default* or None if
721 the *default* is not specified.
722
723 If the headers are unknown, raises http.client.ResponseNotReady.
724
725 '''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000726 if self.headers is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000727 raise ResponseNotReady()
Senthil Kumaran9f8dc442010-08-02 11:04:58 +0000728 headers = self.headers.get_all(name) or default
729 if isinstance(headers, str) or not hasattr(headers, '__iter__'):
730 return headers
731 else:
732 return ', '.join(headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000733
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000734 def getheaders(self):
735 """Return list of (header, value) tuples."""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000736 if self.headers is None:
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000737 raise ResponseNotReady()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000738 return list(self.headers.items())
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000739
Antoine Pitroub353c122009-02-11 00:39:14 +0000740 # We override IOBase.__iter__ so that it doesn't check for closed-ness
741
742 def __iter__(self):
743 return self
744
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000745 # For compatibility with old-style urllib responses.
746
747 def info(self):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700748 '''Returns an instance of the class mimetools.Message containing
749 meta-information associated with the URL.
750
751 When the method is HTTP, these headers are those returned by
752 the server at the head of the retrieved HTML page (including
753 Content-Length and Content-Type).
754
755 When the method is FTP, a Content-Length header will be
756 present if (as is now usual) the server passed back a file
757 length in response to the FTP retrieval request. A
758 Content-Type header will be present if the MIME type can be
759 guessed.
760
761 When the method is local-file, returned headers will include
Serhiy Storchaka3fd4a732015-12-18 13:10:37 +0200762 a Date representing the file's last-modified time, a
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700763 Content-Length giving file size, and a Content-Type
Serhiy Storchaka3fd4a732015-12-18 13:10:37 +0200764 containing a guess at the file's type. See also the
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700765 description of the mimetools module.
766
767 '''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000768 return self.headers
769
770 def geturl(self):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700771 '''Return the real URL of the page.
772
773 In some cases, the HTTP server redirects a client to another
774 URL. The urlopen() function handles this transparently, but in
775 some cases the caller needs to know which URL the client was
776 redirected to. The geturl() method can be used to get at this
777 redirected URL.
778
779 '''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000780 return self.url
781
782 def getcode(self):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700783 '''Return the HTTP status code that was sent with the response,
784 or None if the URL is not an HTTP URL.
785
786 '''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000787 return self.status
Greg Stein5e0fa402000-06-26 08:28:01 +0000788
789class HTTPConnection:
790
Greg Steindd6eefb2000-07-18 09:09:48 +0000791 _http_vsn = 11
792 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000793
Greg Steindd6eefb2000-07-18 09:09:48 +0000794 response_class = HTTPResponse
795 default_port = HTTP_PORT
796 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000797 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000798
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000799 @staticmethod
800 def _is_textIO(stream):
801 """Test whether a file-like object is a text or a binary stream.
802 """
803 return isinstance(stream, io.TextIOBase)
804
805 @staticmethod
806 def _get_content_length(body, method):
807 """Get the content-length based on the body.
808
Martin Panteref91bb22016-08-27 01:39:26 +0000809 If the body is None, we set Content-Length: 0 for methods that expect
810 a body (RFC 7230, Section 3.3.2). We also set the Content-Length for
811 any method if the body is a str or bytes-like object and not a file.
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000812 """
Martin Panteref91bb22016-08-27 01:39:26 +0000813 if body is None:
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000814 # do an explicit check for not None here to distinguish
815 # between unset and set but empty
Martin Panteref91bb22016-08-27 01:39:26 +0000816 if method.upper() in _METHODS_EXPECTING_BODY:
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000817 return 0
818 else:
819 return None
820
821 if hasattr(body, 'read'):
822 # file-like object.
Martin Panteref91bb22016-08-27 01:39:26 +0000823 return None
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000824
825 try:
826 # does it implement the buffer protocol (bytes, bytearray, array)?
827 mv = memoryview(body)
828 return mv.nbytes
829 except TypeError:
830 pass
831
832 if isinstance(body, str):
833 return len(body)
834
835 return None
836
Senthil Kumaran052ddb02013-03-18 14:11:41 -0700837 def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Nir Sofferad455cd2017-11-06 23:16:37 +0200838 source_address=None, blocksize=8192):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000839 self.timeout = timeout
Gregory P. Smithb4066372010-01-03 03:28:29 +0000840 self.source_address = source_address
Nir Sofferad455cd2017-11-06 23:16:37 +0200841 self.blocksize = blocksize
Greg Steindd6eefb2000-07-18 09:09:48 +0000842 self.sock = None
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000843 self._buffer = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000844 self.__response = None
845 self.__state = _CS_IDLE
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000846 self._method = None
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000847 self._tunnel_host = None
848 self._tunnel_port = None
Georg Brandlc7c199f2011-01-22 22:06:24 +0000849 self._tunnel_headers = {}
Tim Petersc411dba2002-07-16 21:35:23 +0000850
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400851 (self.host, self.port) = self._get_hostport(host, port)
852
Ashwin Ramaswami9165add2020-03-14 14:56:06 -0400853 self._validate_host(self.host)
854
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400855 # This is stored as an instance variable to allow unit
856 # tests to replace it with a suitable mockup
857 self._create_connection = socket.create_connection
Greg Stein5e0fa402000-06-26 08:28:01 +0000858
Senthil Kumaran47fff872009-12-20 07:10:31 +0000859 def set_tunnel(self, host, port=None, headers=None):
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400860 """Set up host and port for HTTP CONNECT tunnelling.
Senthil Kumaran47fff872009-12-20 07:10:31 +0000861
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400862 In a connection that uses HTTP CONNECT tunneling, the host passed to the
863 constructor is used as a proxy server that relays all communication to
864 the endpoint passed to `set_tunnel`. This done by sending an HTTP
865 CONNECT request to the proxy server when the connection is established.
866
Fernando Toledo711381d2020-12-21 11:06:31 -0300867 This method must be called before the HTTP connection has been
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400868 established.
869
870 The headers argument should be a mapping of extra HTTP headers to send
871 with the CONNECT request.
Senthil Kumaran47fff872009-12-20 07:10:31 +0000872 """
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400873
874 if self.sock:
875 raise RuntimeError("Can't set up tunnel for established connection")
876
Serhiy Storchaka4ac7ed92014-12-12 09:29:15 +0200877 self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
Senthil Kumaran47fff872009-12-20 07:10:31 +0000878 if headers:
879 self._tunnel_headers = headers
880 else:
881 self._tunnel_headers.clear()
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000882
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400883 def _get_hostport(self, host, port):
Greg Steindd6eefb2000-07-18 09:09:48 +0000884 if port is None:
Skip Montanaro10e6e0e2004-09-14 16:32:02 +0000885 i = host.rfind(':')
Skip Montanarocae14d22004-09-14 17:55:21 +0000886 j = host.rfind(']') # ipv6 addresses have [...]
887 if i > j:
Skip Montanaro9d389972002-03-24 16:53:50 +0000888 try:
889 port = int(host[i+1:])
890 except ValueError:
Łukasz Langaa5a9a9c2011-10-18 21:17:39 +0200891 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
892 port = self.default_port
893 else:
894 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000895 host = host[:i]
896 else:
897 port = self.default_port
Raymond Hettinger4d037912004-10-14 15:23:38 +0000898 if host and host[0] == '[' and host[-1] == ']':
Brett Cannon0a1af4a2004-09-15 23:26:23 +0000899 host = host[1:-1]
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400900
901 return (host, port)
Greg Stein5e0fa402000-06-26 08:28:01 +0000902
Jeremy Hylton30f86742000-09-18 22:50:38 +0000903 def set_debuglevel(self, level):
904 self.debuglevel = level
905
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000906 def _tunnel(self):
Gregory P. Smithc25910a2021-03-07 23:35:13 -0800907 connect = b"CONNECT %s:%d HTTP/1.0\r\n" % (
908 self._tunnel_host.encode("ascii"), self._tunnel_port)
909 headers = [connect]
Georg Brandlc7c199f2011-01-22 22:06:24 +0000910 for header, value in self._tunnel_headers.items():
Gregory P. Smithc25910a2021-03-07 23:35:13 -0800911 headers.append(f"{header}: {value}\r\n".encode("latin-1"))
912 headers.append(b"\r\n")
913 # Making a single send() call instead of one per line encourages
914 # the host OS to use a more optimal packet size instead of
915 # potentially emitting a series of small packets.
916 self.send(b"".join(headers))
917 del headers
Senthil Kumaran47fff872009-12-20 07:10:31 +0000918
Georg Brandlc7c199f2011-01-22 22:06:24 +0000919 response = self.response_class(self.sock, method=self._method)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000920 (version, code, message) = response._read_status()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000921
Serhiy Storchakae4db7692014-12-23 16:28:28 +0200922 if code != http.HTTPStatus.OK:
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000923 self.close()
Gregory P. Smithc25910a2021-03-07 23:35:13 -0800924 raise OSError(f"Tunnel connection failed: {code} {message.strip()}")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000925 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000926 line = response.fp.readline(_MAXLINE + 1)
927 if len(line) > _MAXLINE:
928 raise LineTooLong("header line")
Senthil Kumaranb12771a2012-04-23 23:50:07 +0800929 if not line:
930 # for sites which EOF without sending a trailer
931 break
Senthil Kumaran7e70a5c2012-04-29 10:39:49 +0800932 if line in (b'\r\n', b'\n', b''):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000933 break
934
Berker Peksagab53ab02015-02-03 12:22:11 +0200935 if self.debuglevel > 0:
936 print('header:', line.decode())
937
Greg Steindd6eefb2000-07-18 09:09:48 +0000938 def connect(self):
939 """Connect to the host and port specified in __init__."""
Saiyang Gou927b8412021-04-23 03:19:08 -0700940 sys.audit("http.client.connect", self, self.host, self.port)
Benjamin Peterson9d8a3ad2015-01-23 11:02:57 -0500941 self.sock = self._create_connection(
942 (self.host,self.port), self.timeout, self.source_address)
Miss Islington (bot)4c35a2a2021-10-06 11:29:23 -0700943 # Might fail in OSs that don't implement TCP_NODELAY
944 try:
945 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
946 except OSError as e:
947 if e.errno != errno.ENOPROTOOPT:
948 raise
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400949
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000950 if self._tunnel_host:
951 self._tunnel()
Greg Stein5e0fa402000-06-26 08:28:01 +0000952
Greg Steindd6eefb2000-07-18 09:09:48 +0000953 def close(self):
954 """Close the connection to the HTTP server."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000955 self.__state = _CS_IDLE
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300956 try:
957 sock = self.sock
958 if sock:
959 self.sock = None
960 sock.close() # close it manually... there may be other refs
961 finally:
962 response = self.__response
963 if response:
964 self.__response = None
965 response.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000966
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000967 def send(self, data):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000968 """Send `data' to the server.
969 ``data`` can be a string object, a bytes object, an array object, a
970 file-like object that supports a .read() method, or an iterable object.
971 """
972
Greg Steindd6eefb2000-07-18 09:09:48 +0000973 if self.sock is None:
974 if self.auto_open:
975 self.connect()
976 else:
977 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000978
Jeremy Hylton30f86742000-09-18 22:50:38 +0000979 if self.debuglevel > 0:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000980 print("send:", repr(data))
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000981 if hasattr(data, "read") :
Jeremy Hylton636950f2009-03-28 04:34:21 +0000982 if self.debuglevel > 0:
983 print("sendIng a read()able")
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000984 encode = self._is_textIO(data)
985 if encode and self.debuglevel > 0:
986 print("encoding file using iso-8859-1")
Jeremy Hylton636950f2009-03-28 04:34:21 +0000987 while 1:
Nir Sofferad455cd2017-11-06 23:16:37 +0200988 datablock = data.read(self.blocksize)
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000989 if not datablock:
Jeremy Hylton636950f2009-03-28 04:34:21 +0000990 break
991 if encode:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000992 datablock = datablock.encode("iso-8859-1")
Saiyang Gou927b8412021-04-23 03:19:08 -0700993 sys.audit("http.client.send", self, datablock)
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000994 self.sock.sendall(datablock)
Andrew Svetlov7b2c8bb2013-04-12 22:49:19 +0300995 return
Saiyang Gou927b8412021-04-23 03:19:08 -0700996 sys.audit("http.client.send", self, data)
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000997 try:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000998 self.sock.sendall(data)
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000999 except TypeError:
Serhiy Storchaka2e576f52017-04-24 09:05:00 +03001000 if isinstance(data, collections.abc.Iterable):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001001 for d in data:
1002 self.sock.sendall(d)
1003 else:
Georg Brandl25e2cd12011-08-03 08:27:00 +02001004 raise TypeError("data should be a bytes-like object "
1005 "or an iterable, got %r" % type(data))
Greg Stein5e0fa402000-06-26 08:28:01 +00001006
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001007 def _output(self, s):
1008 """Add a line of output to the current request buffer.
Tim Peters469cdad2002-08-08 20:19:19 +00001009
Jeremy Hyltone3252ec2002-07-16 21:41:43 +00001010 Assumes that the line does *not* end with \\r\\n.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001011 """
1012 self._buffer.append(s)
1013
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001014 def _read_readable(self, readable):
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001015 if self.debuglevel > 0:
1016 print("sendIng a read()able")
1017 encode = self._is_textIO(readable)
1018 if encode and self.debuglevel > 0:
1019 print("encoding file using iso-8859-1")
1020 while True:
Nir Sofferad455cd2017-11-06 23:16:37 +02001021 datablock = readable.read(self.blocksize)
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001022 if not datablock:
1023 break
1024 if encode:
1025 datablock = datablock.encode("iso-8859-1")
1026 yield datablock
1027
1028 def _send_output(self, message_body=None, encode_chunked=False):
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001029 """Send the currently buffered request and clear the buffer.
1030
Jeremy Hyltone3252ec2002-07-16 21:41:43 +00001031 Appends an extra \\r\\n to the buffer.
Benjamin Peterson822b21c2009-01-18 00:04:57 +00001032 A message_body may be specified, to be appended to the request.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001033 """
Martin v. Löwisdd5a8602007-06-30 09:22:09 +00001034 self._buffer.extend((b"", b""))
1035 msg = b"\r\n".join(self._buffer)
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001036 del self._buffer[:]
1037 self.send(msg)
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001038
Benjamin Peterson822b21c2009-01-18 00:04:57 +00001039 if message_body is not None:
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001040
1041 # create a consistent interface to message_body
1042 if hasattr(message_body, 'read'):
1043 # Let file-like take precedence over byte-like. This
1044 # is needed to allow the current position of mmap'ed
1045 # files to be taken into account.
1046 chunks = self._read_readable(message_body)
1047 else:
1048 try:
1049 # this is solely to check to see if message_body
1050 # implements the buffer API. it /would/ be easier
1051 # to capture if PyObject_CheckBuffer was exposed
1052 # to Python.
1053 memoryview(message_body)
1054 except TypeError:
1055 try:
1056 chunks = iter(message_body)
1057 except TypeError:
1058 raise TypeError("message_body should be a bytes-like "
1059 "object or an iterable, got %r"
1060 % type(message_body))
1061 else:
1062 # the object implements the buffer interface and
1063 # can be passed directly into socket methods
1064 chunks = (message_body,)
1065
1066 for chunk in chunks:
1067 if not chunk:
1068 if self.debuglevel > 0:
1069 print('Zero length chunk ignored')
1070 continue
1071
1072 if encode_chunked and self._http_vsn == 11:
1073 # chunked encoding
Eric V. Smith451d0e32016-09-09 21:56:20 -04001074 chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001075 + b'\r\n'
1076 self.send(chunk)
1077
1078 if encode_chunked and self._http_vsn == 11:
1079 # end chunked transfer
1080 self.send(b'0\r\n\r\n')
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001081
Senthil Kumaran5dc504c2016-09-08 14:28:01 -07001082 def putrequest(self, method, url, skip_host=False,
1083 skip_accept_encoding=False):
Greg Steindd6eefb2000-07-18 09:09:48 +00001084 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +00001085
Greg Steindd6eefb2000-07-18 09:09:48 +00001086 `method' specifies an HTTP request method, e.g. 'GET'.
1087 `url' specifies the object being requested, e.g. '/index.html'.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +00001088 `skip_host' if True does not add automatically a 'Host:' header
1089 `skip_accept_encoding' if True does not add automatically an
1090 'Accept-Encoding:' header
Greg Steindd6eefb2000-07-18 09:09:48 +00001091 """
Greg Stein5e0fa402000-06-26 08:28:01 +00001092
Greg Stein616a58d2003-06-24 06:35:19 +00001093 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +00001094 if self.__response and self.__response.isclosed():
1095 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001096
Tim Peters58eb11c2004-01-18 20:29:55 +00001097
Greg Steindd6eefb2000-07-18 09:09:48 +00001098 # in certain cases, we cannot issue another request on this connection.
1099 # this occurs when:
1100 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
1101 # 2) a response to a previous request has signalled that it is going
1102 # to close the connection upon completion.
1103 # 3) the headers for the previous response have not been read, thus
1104 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
1105 #
1106 # if there is no prior response, then we can request at will.
1107 #
1108 # if point (2) is true, then we will have passed the socket to the
1109 # response (effectively meaning, "there is no prior response"), and
1110 # will open a new one when a new request is made.
1111 #
1112 # Note: if a prior response exists, then we *can* start a new request.
1113 # We are not allowed to begin fetching the response to this new
1114 # request, however, until that prior response is complete.
1115 #
1116 if self.__state == _CS_IDLE:
1117 self.__state = _CS_REQ_STARTED
1118 else:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001119 raise CannotSendRequest(self.__state)
Greg Stein5e0fa402000-06-26 08:28:01 +00001120
AMIR8ca8a2e2020-07-19 00:46:10 +04301121 self._validate_method(method)
1122
Jason R. Coombs7774d782019-09-28 08:32:01 -04001123 # Save the method for use later in the response phase
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +00001124 self._method = method
Jason R. Coombs7774d782019-09-28 08:32:01 -04001125
1126 url = url or '/'
1127 self._validate_path(url)
1128
Martin v. Löwisdd5a8602007-06-30 09:22:09 +00001129 request = '%s %s %s' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +00001130
Jason R. Coombs7774d782019-09-28 08:32:01 -04001131 self._output(self._encode_request(request))
Greg Stein5e0fa402000-06-26 08:28:01 +00001132
Greg Steindd6eefb2000-07-18 09:09:48 +00001133 if self._http_vsn == 11:
1134 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +00001135
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001136 if not skip_host:
1137 # this header is issued *only* for HTTP/1.1
1138 # connections. more specifically, this means it is
1139 # only issued when the client uses the new
1140 # HTTPConnection() class. backwards-compat clients
1141 # will be using HTTP/1.0 and those clients may be
1142 # issuing this header themselves. we should NOT issue
1143 # it twice; some web servers (such as Apache) barf
1144 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +00001145
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001146 # If we need a non-standard port,include it in the
1147 # header. If the request is going through a proxy,
1148 # but the host of the actual URL, not the host of the
1149 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +00001150
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001151 netloc = ''
1152 if url.startswith('http'):
1153 nil, netloc, nil, nil, nil = urlsplit(url)
1154
1155 if netloc:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001156 try:
1157 netloc_enc = netloc.encode("ascii")
1158 except UnicodeEncodeError:
1159 netloc_enc = netloc.encode("idna")
1160 self.putheader('Host', netloc_enc)
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001161 else:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001162 if self._tunnel_host:
1163 host = self._tunnel_host
1164 port = self._tunnel_port
1165 else:
1166 host = self.host
1167 port = self.port
1168
Thomas Wouters477c8d52006-05-27 19:21:47 +00001169 try:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001170 host_enc = host.encode("ascii")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001171 except UnicodeEncodeError:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001172 host_enc = host.encode("idna")
Senthil Kumaran74ebd9e2010-11-13 12:27:49 +00001173
1174 # As per RFC 273, IPv6 address should be wrapped with []
1175 # when used as Host header
1176
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001177 if host.find(':') >= 0:
Senthil Kumaran74ebd9e2010-11-13 12:27:49 +00001178 host_enc = b'[' + host_enc + b']'
1179
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001180 if port == self.default_port:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001181 self.putheader('Host', host_enc)
1182 else:
Guido van Rossum98297ee2007-11-06 21:34:58 +00001183 host_enc = host_enc.decode("ascii")
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001184 self.putheader('Host', "%s:%s" % (host_enc, port))
Greg Stein5e0fa402000-06-26 08:28:01 +00001185
Greg Steindd6eefb2000-07-18 09:09:48 +00001186 # note: we are assuming that clients will not attempt to set these
1187 # headers since *this* library must deal with the
1188 # consequences. this also means that when the supporting
1189 # libraries are updated to recognize other forms, then this
1190 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +00001191
Greg Steindd6eefb2000-07-18 09:09:48 +00001192 # we only want a Content-Encoding of "identity" since we don't
1193 # support encodings such as x-gzip or x-deflate.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +00001194 if not skip_accept_encoding:
1195 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +00001196
Greg Steindd6eefb2000-07-18 09:09:48 +00001197 # we can accept "chunked" Transfer-Encodings, but no others
1198 # NOTE: no TE header implies *only* "chunked"
1199 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +00001200
Greg Steindd6eefb2000-07-18 09:09:48 +00001201 # if TE is supplied in the header, then it must appear in a
1202 # Connection header.
1203 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +00001204
Greg Steindd6eefb2000-07-18 09:09:48 +00001205 else:
1206 # For HTTP/1.0, the server will assume "not chunked"
1207 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001208
Jason R. Coombs7774d782019-09-28 08:32:01 -04001209 def _encode_request(self, request):
1210 # ASCII also helps prevent CVE-2019-9740.
1211 return request.encode('ascii')
1212
AMIR8ca8a2e2020-07-19 00:46:10 +04301213 def _validate_method(self, method):
1214 """Validate a method name for putrequest."""
1215 # prevent http header injection
1216 match = _contains_disallowed_method_pchar_re.search(method)
1217 if match:
1218 raise ValueError(
1219 f"method can't contain control characters. {method!r} "
1220 f"(found at least {match.group()!r})")
1221
Jason R. Coombs7774d782019-09-28 08:32:01 -04001222 def _validate_path(self, url):
1223 """Validate a url for putrequest."""
1224 # Prevent CVE-2019-9740.
1225 match = _contains_disallowed_url_pchar_re.search(url)
1226 if match:
1227 raise InvalidURL(f"URL can't contain control characters. {url!r} "
1228 f"(found at least {match.group()!r})")
1229
Ashwin Ramaswami9165add2020-03-14 14:56:06 -04001230 def _validate_host(self, host):
1231 """Validate a host so it doesn't contain control characters."""
1232 # Prevent CVE-2019-18348.
1233 match = _contains_disallowed_url_pchar_re.search(host)
1234 if match:
1235 raise InvalidURL(f"URL can't contain control characters. {host!r} "
1236 f"(found at least {match.group()!r})")
1237
Benjamin Petersonf608c612008-11-16 18:33:53 +00001238 def putheader(self, header, *values):
Greg Steindd6eefb2000-07-18 09:09:48 +00001239 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +00001240
Greg Steindd6eefb2000-07-18 09:09:48 +00001241 For example: h.putheader('Accept', 'text/html')
1242 """
1243 if self.__state != _CS_REQ_STARTED:
1244 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +00001245
Guido van Rossum98297ee2007-11-06 21:34:58 +00001246 if hasattr(header, 'encode'):
1247 header = header.encode('ascii')
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +02001248
1249 if not _is_legal_header_name(header):
1250 raise ValueError('Invalid header name %r' % (header,))
1251
Benjamin Petersonf608c612008-11-16 18:33:53 +00001252 values = list(values)
1253 for i, one_value in enumerate(values):
1254 if hasattr(one_value, 'encode'):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001255 values[i] = one_value.encode('latin-1')
Senthil Kumaran58d5dbf2010-10-03 18:22:42 +00001256 elif isinstance(one_value, int):
1257 values[i] = str(one_value).encode('ascii')
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +02001258
1259 if _is_illegal_header_value(values[i]):
1260 raise ValueError('Invalid header value %r' % (values[i],))
1261
Benjamin Petersonf608c612008-11-16 18:33:53 +00001262 value = b'\r\n\t'.join(values)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001263 header = header + b': ' + value
1264 self._output(header)
Greg Stein5e0fa402000-06-26 08:28:01 +00001265
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001266 def endheaders(self, message_body=None, *, encode_chunked=False):
Benjamin Peterson1742e402008-11-30 22:15:29 +00001267 """Indicate that the last header line has been sent to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +00001268
Senthil Kumaranad87fa62011-10-05 23:26:49 +08001269 This method sends the request to the server. The optional message_body
1270 argument can be used to pass a message body associated with the
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001271 request.
Benjamin Peterson1742e402008-11-30 22:15:29 +00001272 """
Greg Steindd6eefb2000-07-18 09:09:48 +00001273 if self.__state == _CS_REQ_STARTED:
1274 self.__state = _CS_REQ_SENT
1275 else:
1276 raise CannotSendHeader()
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001277 self._send_output(message_body, encode_chunked=encode_chunked)
Greg Stein5e0fa402000-06-26 08:28:01 +00001278
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001279 def request(self, method, url, body=None, headers={}, *,
1280 encode_chunked=False):
Greg Steindd6eefb2000-07-18 09:09:48 +00001281 """Send a complete request to the server."""
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001282 self._send_request(method, url, body, headers, encode_chunked)
Greg Stein5e0fa402000-06-26 08:28:01 +00001283
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001284 def _send_request(self, method, url, body, headers, encode_chunked):
Jeremy Hylton636950f2009-03-28 04:34:21 +00001285 # Honor explicitly requested Host: and Accept-Encoding: headers.
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001286 header_names = frozenset(k.lower() for k in headers)
Jeremy Hylton2c178252004-08-07 16:28:14 +00001287 skips = {}
1288 if 'host' in header_names:
1289 skips['skip_host'] = 1
1290 if 'accept-encoding' in header_names:
1291 skips['skip_accept_encoding'] = 1
Greg Stein5e0fa402000-06-26 08:28:01 +00001292
Jeremy Hylton2c178252004-08-07 16:28:14 +00001293 self.putrequest(method, url, **skips)
1294
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001295 # chunked encoding will happen if HTTP/1.1 is used and either
1296 # the caller passes encode_chunked=True or the following
1297 # conditions hold:
1298 # 1. content-length has not been explicitly set
Martin Panteref91bb22016-08-27 01:39:26 +00001299 # 2. the body is a file or iterable, but not a str or bytes-like
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001300 # 3. Transfer-Encoding has NOT been explicitly set by the caller
1301
R David Murraybeed8402015-03-22 15:18:23 -04001302 if 'content-length' not in header_names:
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001303 # only chunk body if not explicitly set for backwards
1304 # compatibility, assuming the client code is already handling the
1305 # chunking
1306 if 'transfer-encoding' not in header_names:
1307 # if content-length cannot be automatically determined, fall
1308 # back to chunked encoding
1309 encode_chunked = False
1310 content_length = self._get_content_length(body, method)
1311 if content_length is None:
Martin Panteref91bb22016-08-27 01:39:26 +00001312 if body is not None:
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001313 if self.debuglevel > 0:
1314 print('Unable to determine size of %r' % body)
1315 encode_chunked = True
1316 self.putheader('Transfer-Encoding', 'chunked')
1317 else:
1318 self.putheader('Content-Length', str(content_length))
1319 else:
1320 encode_chunked = False
1321
Guido van Rossumcc2b0162007-02-11 06:12:03 +00001322 for hdr, value in headers.items():
Greg Steindd6eefb2000-07-18 09:09:48 +00001323 self.putheader(hdr, value)
Jeremy Hyltonef9f48e2009-03-26 22:04:05 +00001324 if isinstance(body, str):
Jeremy Hylton236654b2009-03-27 20:24:34 +00001325 # RFC 2616 Section 3.7.1 says that text default has a
1326 # default charset of iso-8859-1.
Martin Panter44391482016-02-09 10:20:52 +00001327 body = _encode(body, 'body')
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001328 self.endheaders(body, encode_chunked=encode_chunked)
Greg Stein5e0fa402000-06-26 08:28:01 +00001329
Greg Steindd6eefb2000-07-18 09:09:48 +00001330 def getresponse(self):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001331 """Get the response from the server.
1332
1333 If the HTTPConnection is in the correct state, returns an
1334 instance of HTTPResponse or of whatever object is returned by
Martin Pantercc71a792016-04-05 06:19:42 +00001335 the response_class variable.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001336
1337 If a request has not been sent or if a previous response has
1338 not be handled, ResponseNotReady is raised. If the HTTP
1339 response indicates that the connection should be closed, then
1340 it will be closed before the response is returned. When the
1341 connection is closed, the underlying socket is closed.
1342 """
Greg Stein5e0fa402000-06-26 08:28:01 +00001343
Greg Stein616a58d2003-06-24 06:35:19 +00001344 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +00001345 if self.__response and self.__response.isclosed():
1346 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001347
Greg Steindd6eefb2000-07-18 09:09:48 +00001348 # if a prior response exists, then it must be completed (otherwise, we
1349 # cannot read this response's header to determine the connection-close
1350 # behavior)
1351 #
1352 # note: if a prior response existed, but was connection-close, then the
1353 # socket and response were made independent of this HTTPConnection
1354 # object since a new request requires that we open a whole new
1355 # connection
1356 #
1357 # this means the prior response had one of two states:
1358 # 1) will_close: this connection was reset and the prior socket and
1359 # response operate independently
1360 # 2) persistent: the response was retained and we await its
1361 # isclosed() status to become true.
1362 #
1363 if self.__state != _CS_REQ_SENT or self.__response:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001364 raise ResponseNotReady(self.__state)
Greg Stein5e0fa402000-06-26 08:28:01 +00001365
Jeremy Hylton30f86742000-09-18 22:50:38 +00001366 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001367 response = self.response_class(self.sock, self.debuglevel,
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +00001368 method=self._method)
Jeremy Hylton30f86742000-09-18 22:50:38 +00001369 else:
Antoine Pitrou988dbd72010-12-17 17:35:56 +00001370 response = self.response_class(self.sock, method=self._method)
Greg Stein5e0fa402000-06-26 08:28:01 +00001371
Serhiy Storchakab491e052014-12-01 13:07:45 +02001372 try:
R David Murraycae7bdb2015-04-05 19:26:29 -04001373 try:
1374 response.begin()
1375 except ConnectionError:
1376 self.close()
1377 raise
Serhiy Storchakab491e052014-12-01 13:07:45 +02001378 assert response.will_close != _UNKNOWN
1379 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +00001380
Serhiy Storchakab491e052014-12-01 13:07:45 +02001381 if response.will_close:
1382 # this effectively passes the connection to the response
1383 self.close()
1384 else:
1385 # remember this, so we can tell when it is complete
1386 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +00001387
Serhiy Storchakab491e052014-12-01 13:07:45 +02001388 return response
1389 except:
1390 response.close()
1391 raise
Greg Stein5e0fa402000-06-26 08:28:01 +00001392
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001393try:
1394 import ssl
Brett Cannoncd171c82013-07-04 17:43:24 -04001395except ImportError:
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001396 pass
1397else:
1398 class HTTPSConnection(HTTPConnection):
1399 "This class allows communication via SSL."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001400
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001401 default_port = HTTPS_PORT
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001402
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001403 # XXX Should key_file and cert_file be deprecated in favour of context?
1404
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001405 def __init__(self, host, port=None, key_file=None, cert_file=None,
Senthil Kumaran052ddb02013-03-18 14:11:41 -07001406 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1407 source_address=None, *, context=None,
Nir Sofferad455cd2017-11-06 23:16:37 +02001408 check_hostname=None, blocksize=8192):
Senthil Kumaran052ddb02013-03-18 14:11:41 -07001409 super(HTTPSConnection, self).__init__(host, port, timeout,
Nir Sofferad455cd2017-11-06 23:16:37 +02001410 source_address,
1411 blocksize=blocksize)
Christian Heimesd0486372016-09-10 23:23:33 +02001412 if (key_file is not None or cert_file is not None or
1413 check_hostname is not None):
1414 import warnings
1415 warnings.warn("key_file, cert_file and check_hostname are "
1416 "deprecated, use a custom context instead.",
1417 DeprecationWarning, 2)
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001418 self.key_file = key_file
1419 self.cert_file = cert_file
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001420 if context is None:
Benjamin Peterson4ffb0752014-11-03 14:29:33 -05001421 context = ssl._create_default_https_context()
Christian Heimesf97406b2020-11-13 16:37:52 +01001422 # send ALPN extension to indicate HTTP/1.1 protocol
1423 if self._http_vsn == 11:
1424 context.set_alpn_protocols(['http/1.1'])
Christian Heimesd1bd6e72019-07-01 08:32:24 +02001425 # enable PHA for TLS 1.3 connections if available
1426 if context.post_handshake_auth is not None:
1427 context.post_handshake_auth = True
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001428 will_verify = context.verify_mode != ssl.CERT_NONE
1429 if check_hostname is None:
Benjamin Petersona090f012014-12-07 13:18:25 -05001430 check_hostname = context.check_hostname
1431 if check_hostname and not will_verify:
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001432 raise ValueError("check_hostname needs a SSL context with "
1433 "either CERT_OPTIONAL or CERT_REQUIRED")
1434 if key_file or cert_file:
Antoine Pitrou80603992010-10-13 10:45:16 +00001435 context.load_cert_chain(cert_file, key_file)
Christian Heimesd1bd6e72019-07-01 08:32:24 +02001436 # cert and key file means the user wants to authenticate.
1437 # enable TLS 1.3 PHA implicitly even for custom contexts.
1438 if context.post_handshake_auth is not None:
1439 context.post_handshake_auth = True
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001440 self._context = context
Christian Heimes61d478c2018-01-27 15:51:38 +01001441 if check_hostname is not None:
1442 self._context.check_hostname = check_hostname
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001443
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001444 def connect(self):
1445 "Connect to a host on a given (SSL) port."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001446
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001447 super().connect()
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001448
1449 if self._tunnel_host:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001450 server_hostname = self._tunnel_host
1451 else:
1452 server_hostname = self.host
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001453
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001454 self.sock = self._context.wrap_socket(self.sock,
Benjamin Peterson7243b572014-11-23 17:04:34 -06001455 server_hostname=server_hostname)
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001456
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001457 __all__.append("HTTPSConnection")
Greg Stein5e0fa402000-06-26 08:28:01 +00001458
Greg Stein5e0fa402000-06-26 08:28:01 +00001459class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001460 # Subclasses that define an __init__ must call Exception.__init__
1461 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +00001462 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001463
1464class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001465 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001466
Skip Montanaro9d389972002-03-24 16:53:50 +00001467class InvalidURL(HTTPException):
1468 pass
1469
Greg Stein5e0fa402000-06-26 08:28:01 +00001470class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001471 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001472 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001473 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001474
1475class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001476 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001477
Greg Stein5e0fa402000-06-26 08:28:01 +00001478class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001479 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001480
1481class IncompleteRead(HTTPException):
Benjamin Peterson6accb982009-03-02 22:50:25 +00001482 def __init__(self, partial, expected=None):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001483 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001484 self.partial = partial
Benjamin Peterson6accb982009-03-02 22:50:25 +00001485 self.expected = expected
1486 def __repr__(self):
1487 if self.expected is not None:
1488 e = ', %i more expected' % self.expected
1489 else:
1490 e = ''
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03001491 return '%s(%i bytes read%s)' % (self.__class__.__name__,
1492 len(self.partial), e)
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001493 __str__ = object.__str__
Greg Stein5e0fa402000-06-26 08:28:01 +00001494
1495class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001496 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001497
1498class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001499 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001500
1501class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001502 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001503
1504class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001505 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001506
1507class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001508 def __init__(self, line):
Benjamin Peterson11dbfd42010-03-21 22:50:04 +00001509 if not line:
1510 line = repr(line)
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001511 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001512 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001513
Senthil Kumaran5466bf12010-12-18 16:55:23 +00001514class LineTooLong(HTTPException):
1515 def __init__(self, line_type):
1516 HTTPException.__init__(self, "got more than %d bytes when reading %s"
1517 % (_MAXLINE, line_type))
1518
R David Murraycae7bdb2015-04-05 19:26:29 -04001519class RemoteDisconnected(ConnectionResetError, BadStatusLine):
1520 def __init__(self, *pos, **kw):
1521 BadStatusLine.__init__(self, "")
1522 ConnectionResetError.__init__(self, *pos, **kw)
1523
Greg Stein5e0fa402000-06-26 08:28:01 +00001524# for backwards compatibility
1525error = HTTPException