blob: 4b1f692844474f8971da779895ccb8461fcb32b3 [file] [log] [blame]
R David Murray44b548d2016-09-08 13:59:53 -04001r"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006HTTPConnection goes through a number of "states", which define when a client
Greg Stein5e0fa402000-06-26 08:28:01 +00007may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
R David Murraycae7bdb2015-04-05 19:26:29 -040023 |\_____________________________
24 | | getresponse() raises
25 | response = getresponse() | ConnectionError
26 v v
27 Unread-response Idle
28 [Response-headers-read]
Greg Stein5e0fa402000-06-26 08:28:01 +000029 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000030 | |
31 | response.read() | putrequest()
32 v v
33 Idle Req-started-unread-response
34 ______/|
35 / |
36 response.read() | | ( putheader() )* endheaders()
37 v v
38 Request-started Req-sent-unread-response
39 |
40 | response.read()
41 v
42 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000043
44This diagram presents the following rules:
45 -- a second request may not be started until {response-headers-read}
46 -- a response [object] cannot be retrieved until {request-sent}
47 -- there is no differentiation between an unread response body and a
48 partially read response body
49
50Note: this enforcement is applied by the HTTPConnection class. The
51 HTTPResponse class does not enforce this state machine, which
52 implies sophisticated clients may accelerate the request/response
53 pipeline. Caution should be taken, though: accelerating the states
54 beyond the above pattern may imply knowledge of the server's
55 connection-close behavior for certain requests. For example, it
56 is impossible to tell whether the server will close the connection
57 UNTIL the response headers have been read; this means that further
58 requests cannot be placed into the pipeline until it is known that
59 the server will NOT be closing the connection.
60
61Logical State __state __response
62------------- ------- ----------
63Idle _CS_IDLE None
64Request-started _CS_REQ_STARTED None
65Request-sent _CS_REQ_SENT None
66Unread-response _CS_IDLE <response_class>
67Req-started-unread-response _CS_REQ_STARTED <response_class>
68Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000069"""
Guido van Rossum23acc951994-02-21 16:36:04 +000070
Barry Warsaw820c1202008-06-12 04:06:45 +000071import email.parser
72import email.message
Serhiy Storchakae4db7692014-12-23 16:28:28 +020073import http
Jeremy Hylton636950f2009-03-28 04:34:21 +000074import io
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +020075import re
Jeremy Hylton636950f2009-03-28 04:34:21 +000076import socket
Saiyang Gou927b8412021-04-23 03:19:08 -070077import sys
Serhiy Storchaka2e576f52017-04-24 09:05:00 +030078import collections.abc
Jeremy Hylton1afc1692008-06-18 20:49:58 +000079from urllib.parse import urlsplit
Guido van Rossum23acc951994-02-21 16:36:04 +000080
Berker Peksagbabc6882015-02-20 09:39:38 +020081# HTTPMessage, parse_headers(), and the HTTP status code constants are
82# intentionally omitted for simplicity
Thomas Wouters47b49bf2007-08-30 22:15:33 +000083__all__ = ["HTTPResponse", "HTTPConnection",
Skip Montanaro951a8842001-06-01 16:25:38 +000084 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000085 "UnknownTransferEncoding", "UnimplementedFileMode",
86 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
87 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
R David Murraycae7bdb2015-04-05 19:26:29 -040088 "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
89 "responses"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000090
Guido van Rossum23acc951994-02-21 16:36:04 +000091HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000092HTTPS_PORT = 443
93
Greg Stein5e0fa402000-06-26 08:28:01 +000094_UNKNOWN = 'UNKNOWN'
95
96# connection states
97_CS_IDLE = 'Idle'
98_CS_REQ_STARTED = 'Request-started'
99_CS_REQ_SENT = 'Request-sent'
100
Martin v. Löwis39a31782004-09-18 09:03:49 +0000101
Serhiy Storchakae4db7692014-12-23 16:28:28 +0200102# hack to maintain backwards compatibility
103globals().update(http.HTTPStatus.__members__)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000104
Serhiy Storchakae4db7692014-12-23 16:28:28 +0200105# another hack to maintain backwards compatibility
Georg Brandl6aab16e2006-02-17 19:17:25 +0000106# Mapping status codes to official W3C names
Serhiy Storchakae4db7692014-12-23 16:28:28 +0200107responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
Georg Brandl6aab16e2006-02-17 19:17:25 +0000108
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000109# maximal line length when calling readline().
110_MAXLINE = 65536
Georg Brandlbf3f8eb2013-10-27 07:34:48 +0100111_MAXHEADERS = 100
112
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +0200113# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
114#
115# VCHAR = %x21-7E
116# obs-text = %x80-FF
117# header-field = field-name ":" OWS field-value OWS
118# field-name = token
119# field-value = *( field-content / obs-fold )
120# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
121# field-vchar = VCHAR / obs-text
122#
123# obs-fold = CRLF 1*( SP / HTAB )
124# ; obsolete line folding
125# ; see Section 3.2.4
126
127# token = 1*tchar
128#
129# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
130# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
131# / DIGIT / ALPHA
132# ; any VCHAR, except delimiters
133#
134# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
135
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700136# the patterns for both name and value are more lenient than RFC
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +0200137# definitions to allow for backwards compatibility
138_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
139_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
140
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700141# These characters are not allowed within HTTP URL paths.
142# See https://tools.ietf.org/html/rfc3986#section-3.3 and the
143# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
144# Prevents CVE-2019-9740. Includes control characters such as \r\n.
145# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
146_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
147# Arguably only these _should_ allowed:
148# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
149# We are more lenient for assumed real world compatibility purposes.
150
AMIR8ca8a2e2020-07-19 00:46:10 +0430151# These characters are not allowed within HTTP method names
152# to prevent http header injection.
153_contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')
154
R David Murraybeed8402015-03-22 15:18:23 -0400155# We always set the Content-Length header for these methods because some
156# servers will otherwise respond with a 411
157_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
158
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000159
Martin Panter44391482016-02-09 10:20:52 +0000160def _encode(data, name='data'):
161 """Call data.encode("latin-1") but show a better error message."""
162 try:
163 return data.encode("latin-1")
164 except UnicodeEncodeError as err:
165 raise UnicodeEncodeError(
166 err.encoding,
167 err.object,
168 err.start,
169 err.end,
170 "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "
171 "if you want to send it encoded in UTF-8." %
172 (name.title(), data[err.start:err.end], name)) from None
173
174
Barry Warsaw820c1202008-06-12 04:06:45 +0000175class HTTPMessage(email.message.Message):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000176 # XXX The only usage of this method is in
177 # http.server.CGIHTTPRequestHandler. Maybe move the code there so
178 # that it doesn't need to be part of the public API. The API has
179 # never been defined so this could cause backwards compatibility
180 # issues.
181
Barry Warsaw820c1202008-06-12 04:06:45 +0000182 def getallmatchingheaders(self, name):
183 """Find all header lines matching a given header name.
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000184
Barry Warsaw820c1202008-06-12 04:06:45 +0000185 Look through the list of headers and find all lines matching a given
186 header name (and their continuation lines). A list of the lines is
187 returned, without interpretation. If the header does not occur, an
188 empty list is returned. If the header occurs multiple times, all
189 occurrences are returned. Case is not important in the header name.
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000190
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000191 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000192 name = name.lower() + ':'
193 n = len(name)
194 lst = []
195 hit = 0
196 for line in self.keys():
197 if line[:n].lower() == name:
198 hit = 1
199 elif not line[:1].isspace():
200 hit = 0
201 if hit:
202 lst.append(line)
203 return lst
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000204
Jeremy Hylton98eb6c22009-03-27 18:31:36 +0000205def parse_headers(fp, _class=HTTPMessage):
Barry Warsaw820c1202008-06-12 04:06:45 +0000206 """Parses only RFC2822 headers from a file pointer.
207
208 email Parser wants to see strings rather than bytes.
209 But a TextIOWrapper around self.rfile would buffer too many bytes
210 from the stream, bytes which we later need to read as bytes.
211 So we read the correct bytes here, as bytes, for email Parser
212 to parse.
213
214 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000215 headers = []
216 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000217 line = fp.readline(_MAXLINE + 1)
218 if len(line) > _MAXLINE:
219 raise LineTooLong("header line")
Barry Warsaw820c1202008-06-12 04:06:45 +0000220 headers.append(line)
Georg Brandlbf3f8eb2013-10-27 07:34:48 +0100221 if len(headers) > _MAXHEADERS:
222 raise HTTPException("got more than %d headers" % _MAXHEADERS)
Barry Warsaw820c1202008-06-12 04:06:45 +0000223 if line in (b'\r\n', b'\n', b''):
224 break
225 hstring = b''.join(headers).decode('iso-8859-1')
Jeremy Hylton98eb6c22009-03-27 18:31:36 +0000226 return email.parser.Parser(_class=_class).parsestr(hstring)
Greg Stein5e0fa402000-06-26 08:28:01 +0000227
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000228
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000229class HTTPResponse(io.BufferedIOBase):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000230
231 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
232
Jeremy Hylton811fc142007-08-03 13:30:02 +0000233 # The bytes from the socket object are iso-8859-1 strings.
234 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
235 # text following RFC 2047. The basic status line parsing only
236 # accepts iso-8859-1.
237
Senthil Kumaran052ddb02013-03-18 14:11:41 -0700238 def __init__(self, sock, debuglevel=0, method=None, url=None):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000239 # If the response includes a content-length header, we need to
240 # make sure that the client doesn't read more than the
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000241 # specified number of bytes. If it does, it will block until
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000242 # the server times out and closes the connection. This will
243 # happen if a self.fp.read() is done (without a size) whether
244 # self.fp is buffered or not. So, no self.fp.read() by
245 # clients unless they know what they are doing.
Benjamin Petersonf72d9fb2009-02-08 00:29:20 +0000246 self.fp = sock.makefile("rb")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000247 self.debuglevel = debuglevel
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000248 self._method = method
Greg Stein5e0fa402000-06-26 08:28:01 +0000249
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000250 # The HTTPResponse object is returned via urllib. The clients
251 # of http and urllib expect different attributes for the
252 # headers. headers is used here and supports urllib. msg is
253 # provided as a backwards compatibility layer for http
254 # clients.
255
256 self.headers = self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000257
Greg Steindd6eefb2000-07-18 09:09:48 +0000258 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000259 self.version = _UNKNOWN # HTTP-Version
260 self.status = _UNKNOWN # Status-Code
261 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000262
Tim Peters07e99cb2001-01-14 23:47:14 +0000263 self.chunked = _UNKNOWN # is "chunked" being used?
264 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
265 self.length = _UNKNOWN # number of bytes left in response
266 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000267
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000268 def _read_status(self):
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000269 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
270 if len(line) > _MAXLINE:
271 raise LineTooLong("status line")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000272 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000273 print("reply:", repr(line))
Jeremy Hyltonb6769522003-06-29 17:55:05 +0000274 if not line:
275 # Presumably, the server closed the connection before
276 # sending a valid response.
R David Murraycae7bdb2015-04-05 19:26:29 -0400277 raise RemoteDisconnected("Remote end closed connection without"
278 " response")
Greg Steindd6eefb2000-07-18 09:09:48 +0000279 try:
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000280 version, status, reason = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000281 except ValueError:
282 try:
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000283 version, status = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000284 reason = ""
285 except ValueError:
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000286 # empty version will cause next test to fail.
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000287 version = ""
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000288 if not version.startswith("HTTP/"):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200289 self._close_conn()
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000290 raise BadStatusLine(line)
Greg Stein5e0fa402000-06-26 08:28:01 +0000291
Jeremy Hylton23d40472001-04-13 14:57:08 +0000292 # The status code is a three-digit number
293 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000294 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000295 if status < 100 or status > 999:
296 raise BadStatusLine(line)
297 except ValueError:
298 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000299 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000300
Jeremy Hylton39c03802002-07-12 14:04:09 +0000301 def begin(self):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000302 if self.headers is not None:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000303 # we've already started reading the response
304 return
305
306 # read until we get a non-100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000307 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000308 version, status, reason = self._read_status()
Martin v. Löwis39a31782004-09-18 09:03:49 +0000309 if status != CONTINUE:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000310 break
311 # skip the header from the 100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000312 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000313 skip = self.fp.readline(_MAXLINE + 1)
314 if len(skip) > _MAXLINE:
315 raise LineTooLong("header line")
316 skip = skip.strip()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000317 if not skip:
318 break
319 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000320 print("header:", skip)
Tim Petersc411dba2002-07-16 21:35:23 +0000321
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000322 self.code = self.status = status
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000323 self.reason = reason.strip()
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000324 if version in ("HTTP/1.0", "HTTP/0.9"):
325 # Some servers might still return "0.9", treat it as 1.0 anyway
Greg Steindd6eefb2000-07-18 09:09:48 +0000326 self.version = 10
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000327 elif version.startswith("HTTP/1."):
Tim Peters07e99cb2001-01-14 23:47:14 +0000328 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Greg Steindd6eefb2000-07-18 09:09:48 +0000329 else:
330 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000331
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000332 self.headers = self.msg = parse_headers(self.fp)
Barry Warsaw820c1202008-06-12 04:06:45 +0000333
Jeremy Hylton30f86742000-09-18 22:50:38 +0000334 if self.debuglevel > 0:
Matt Houglum461c4162019-04-03 21:36:47 -0700335 for hdr, val in self.headers.items():
336 print("header:", hdr + ":", val)
Greg Stein5e0fa402000-06-26 08:28:01 +0000337
Greg Steindd6eefb2000-07-18 09:09:48 +0000338 # are we using the chunked-style of transfer encoding?
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000339 tr_enc = self.headers.get("transfer-encoding")
Jeremy Hyltond229b3a2002-09-03 19:24:24 +0000340 if tr_enc and tr_enc.lower() == "chunked":
Jeremy Hylton236156f2008-12-15 03:00:50 +0000341 self.chunked = True
Greg Steindd6eefb2000-07-18 09:09:48 +0000342 self.chunk_left = None
343 else:
Jeremy Hylton236156f2008-12-15 03:00:50 +0000344 self.chunked = False
Greg Stein5e0fa402000-06-26 08:28:01 +0000345
Greg Steindd6eefb2000-07-18 09:09:48 +0000346 # will the connection close at the end of the response?
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000347 self.will_close = self._check_close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000348
Greg Steindd6eefb2000-07-18 09:09:48 +0000349 # do we have a Content-Length?
350 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000351 self.length = None
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000352 length = self.headers.get("content-length")
Greg Steindd6eefb2000-07-18 09:09:48 +0000353 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000354 try:
355 self.length = int(length)
356 except ValueError:
Christian Heimesa612dc02008-02-24 13:08:18 +0000357 self.length = None
358 else:
359 if self.length < 0: # ignore nonsensical negative lengths
360 self.length = None
361 else:
362 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000363
Greg Steindd6eefb2000-07-18 09:09:48 +0000364 # does the body have a fixed length? (of zero)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000365 if (status == NO_CONTENT or status == NOT_MODIFIED or
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000366 100 <= status < 200 or # 1xx codes
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000367 self._method == "HEAD"):
Greg Steindd6eefb2000-07-18 09:09:48 +0000368 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000369
Greg Steindd6eefb2000-07-18 09:09:48 +0000370 # if the connection remains open, and we aren't using chunked, and
371 # a content-length was not provided, then assume that the connection
372 # WILL close.
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000373 if (not self.will_close and
374 not self.chunked and
375 self.length is None):
Jeremy Hylton236156f2008-12-15 03:00:50 +0000376 self.will_close = True
Greg Stein5e0fa402000-06-26 08:28:01 +0000377
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000378 def _check_close(self):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000379 conn = self.headers.get("connection")
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000380 if self.version == 11:
381 # An HTTP/1.1 proxy is assumed to stay open unless
382 # explicitly closed.
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000383 if conn and "close" in conn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000384 return True
385 return False
386
Jeremy Hylton2c178252004-08-07 16:28:14 +0000387 # Some HTTP/1.0 implementations have support for persistent
388 # connections, using rules different than HTTP/1.1.
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000389
Christian Heimes895627f2007-12-08 17:28:33 +0000390 # For older HTTP, Keep-Alive indicates persistent connection.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000391 if self.headers.get("keep-alive"):
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000392 return False
Tim Peters77c06fb2002-11-24 02:35:35 +0000393
Jeremy Hylton2c178252004-08-07 16:28:14 +0000394 # At least Akamai returns a "Connection: Keep-Alive" header,
395 # which was supposed to be sent by the client.
396 if conn and "keep-alive" in conn.lower():
397 return False
398
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000399 # Proxy-Connection is a netscape hack.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000400 pconn = self.headers.get("proxy-connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000401 if pconn and "keep-alive" in pconn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000402 return False
403
404 # otherwise, assume it will close
405 return True
406
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200407 def _close_conn(self):
408 fp = self.fp
409 self.fp = None
410 fp.close()
411
Greg Steindd6eefb2000-07-18 09:09:48 +0000412 def close(self):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300413 try:
414 super().close() # set "closed" flag
415 finally:
416 if self.fp:
417 self._close_conn()
Greg Stein5e0fa402000-06-26 08:28:01 +0000418
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000419 # These implementations are for the benefit of io.BufferedReader.
420
421 # XXX This class should probably be revised to act more like
422 # the "raw stream" that BufferedReader expects.
423
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000424 def flush(self):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200425 super().flush()
426 if self.fp:
427 self.fp.flush()
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000428
Jeremy Hyltona7cff022009-04-01 02:35:56 +0000429 def readable(self):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700430 """Always returns True"""
Jeremy Hyltona7cff022009-04-01 02:35:56 +0000431 return True
432
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000433 # End of "raw stream" methods
434
Greg Steindd6eefb2000-07-18 09:09:48 +0000435 def isclosed(self):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200436 """True if the connection is closed."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000437 # NOTE: it is possible that we will not ever call self.close(). This
438 # case occurs when will_close is TRUE, length is None, and we
439 # read up to the last byte, but NOT past it.
440 #
441 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
442 # called, meaning self.isclosed() is meaningful.
443 return self.fp is None
444
445 def read(self, amt=None):
446 if self.fp is None:
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000447 return b""
Greg Steindd6eefb2000-07-18 09:09:48 +0000448
Senthil Kumaran71fb6c82010-04-28 17:39:48 +0000449 if self._method == "HEAD":
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200450 self._close_conn()
Senthil Kumaran71fb6c82010-04-28 17:39:48 +0000451 return b""
452
Bruce Merry152f0b82020-06-25 08:30:21 +0200453 if self.chunked:
454 return self._read_chunked(amt)
455
Antoine Pitrou38d96432011-12-06 22:33:57 +0100456 if amt is not None:
Bruce Merry152f0b82020-06-25 08:30:21 +0200457 if self.length is not None and amt > self.length:
458 # clip the read to the "end of response"
459 amt = self.length
460 s = self.fp.read(amt)
461 if not s and amt:
462 # Ideally, we would raise IncompleteRead if the content-length
463 # wasn't satisfied, but it might break compatibility.
464 self._close_conn()
465 elif self.length is not None:
466 self.length -= len(s)
467 if not self.length:
468 self._close_conn()
469 return s
Antoine Pitrou38d96432011-12-06 22:33:57 +0100470 else:
471 # Amount is not given (unbounded read) so we must check self.length
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000472 if self.length is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000473 s = self.fp.read()
474 else:
Antoine Pitroubeec61a2013-02-02 22:49:34 +0100475 try:
476 s = self._safe_read(self.length)
477 except IncompleteRead:
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200478 self._close_conn()
Antoine Pitroubeec61a2013-02-02 22:49:34 +0100479 raise
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000480 self.length = 0
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200481 self._close_conn() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000482 return s
483
Antoine Pitrou38d96432011-12-06 22:33:57 +0100484 def readinto(self, b):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700485 """Read up to len(b) bytes into bytearray b and return the number
486 of bytes read.
487 """
488
Antoine Pitrou38d96432011-12-06 22:33:57 +0100489 if self.fp is None:
490 return 0
491
492 if self._method == "HEAD":
Serhiy Storchakab6c86fd2013-02-06 10:35:40 +0200493 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100494 return 0
495
496 if self.chunked:
497 return self._readinto_chunked(b)
498
Greg Steindd6eefb2000-07-18 09:09:48 +0000499 if self.length is not None:
Antoine Pitrou38d96432011-12-06 22:33:57 +0100500 if len(b) > self.length:
Greg Steindd6eefb2000-07-18 09:09:48 +0000501 # clip the read to the "end of response"
Antoine Pitrou38d96432011-12-06 22:33:57 +0100502 b = memoryview(b)[0:self.length]
Greg Steindd6eefb2000-07-18 09:09:48 +0000503
504 # we do not use _safe_read() here because this may be a .will_close
505 # connection, and the user is reading more bytes than will be provided
506 # (for example, reading in 1k chunks)
Antoine Pitrou38d96432011-12-06 22:33:57 +0100507 n = self.fp.readinto(b)
Serhiy Storchaka1c84ac12013-12-17 21:50:02 +0200508 if not n and b:
Antoine Pitroubeec61a2013-02-02 22:49:34 +0100509 # Ideally, we would raise IncompleteRead if the content-length
510 # wasn't satisfied, but it might break compatibility.
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200511 self._close_conn()
Antoine Pitrou6a35e182013-02-02 23:04:56 +0100512 elif self.length is not None:
Antoine Pitrou38d96432011-12-06 22:33:57 +0100513 self.length -= n
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000514 if not self.length:
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200515 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100516 return n
Greg Steindd6eefb2000-07-18 09:09:48 +0000517
Antoine Pitrou38d96432011-12-06 22:33:57 +0100518 def _read_next_chunk_size(self):
519 # Read the next chunk size from the file
520 line = self.fp.readline(_MAXLINE + 1)
521 if len(line) > _MAXLINE:
522 raise LineTooLong("chunk size")
523 i = line.find(b";")
524 if i >= 0:
525 line = line[:i] # strip chunk-extensions
526 try:
527 return int(line, 16)
528 except ValueError:
529 # close the connection as protocol synchronisation is
530 # probably lost
Serhiy Storchakab6c86fd2013-02-06 10:35:40 +0200531 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100532 raise
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000533
Antoine Pitrou38d96432011-12-06 22:33:57 +0100534 def _read_and_discard_trailer(self):
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000535 # read and discard trailer up to the CRLF terminator
536 ### note: we shouldn't have any trailers!
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000537 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000538 line = self.fp.readline(_MAXLINE + 1)
539 if len(line) > _MAXLINE:
540 raise LineTooLong("trailer line")
Christian Heimes0bd4e112008-02-12 22:59:25 +0000541 if not line:
542 # a vanishingly small number of sites EOF without
543 # sending the trailer
544 break
Senthil Kumaran7e70a5c2012-04-29 10:39:49 +0800545 if line in (b'\r\n', b'\n', b''):
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000546 break
547
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000548 def _get_chunk_left(self):
549 # return self.chunk_left, reading a new chunk if necessary.
550 # chunk_left == 0: at the end of the current chunk, need to close it
551 # chunk_left == None: No current chunk, should read next.
552 # This function returns non-zero or None if the last chunk has
553 # been read.
554 chunk_left = self.chunk_left
555 if not chunk_left: # Can be 0 or None
556 if chunk_left is not None:
Mike53f7a7c2017-12-14 14:04:53 +0300557 # We are at the end of chunk, discard chunk end
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000558 self._safe_read(2) # toss the CRLF at the end of the chunk
559 try:
560 chunk_left = self._read_next_chunk_size()
561 except ValueError:
562 raise IncompleteRead(b'')
563 if chunk_left == 0:
564 # last chunk: 1*("0") [ chunk-extension ] CRLF
565 self._read_and_discard_trailer()
566 # we read everything; close the "file"
567 self._close_conn()
568 chunk_left = None
569 self.chunk_left = chunk_left
570 return chunk_left
571
Bruce Merry152f0b82020-06-25 08:30:21 +0200572 def _read_chunked(self, amt=None):
Antoine Pitrou38d96432011-12-06 22:33:57 +0100573 assert self.chunked != _UNKNOWN
Antoine Pitrou38d96432011-12-06 22:33:57 +0100574 value = []
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000575 try:
576 while True:
577 chunk_left = self._get_chunk_left()
578 if chunk_left is None:
579 break
Bruce Merry152f0b82020-06-25 08:30:21 +0200580
581 if amt is not None and amt <= chunk_left:
582 value.append(self._safe_read(amt))
583 self.chunk_left = chunk_left - amt
584 break
585
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000586 value.append(self._safe_read(chunk_left))
Bruce Merry152f0b82020-06-25 08:30:21 +0200587 if amt is not None:
588 amt -= chunk_left
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000589 self.chunk_left = 0
590 return b''.join(value)
591 except IncompleteRead:
592 raise IncompleteRead(b''.join(value))
Tim Peters230a60c2002-11-09 05:08:07 +0000593
Antoine Pitrou38d96432011-12-06 22:33:57 +0100594 def _readinto_chunked(self, b):
595 assert self.chunked != _UNKNOWN
Antoine Pitrou38d96432011-12-06 22:33:57 +0100596 total_bytes = 0
597 mvb = memoryview(b)
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000598 try:
599 while True:
600 chunk_left = self._get_chunk_left()
601 if chunk_left is None:
602 return total_bytes
Antoine Pitrou4ce6aa42011-12-06 22:34:36 +0100603
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000604 if len(mvb) <= chunk_left:
605 n = self._safe_readinto(mvb)
606 self.chunk_left = chunk_left - n
607 return total_bytes + n
608
609 temp_mvb = mvb[:chunk_left]
Antoine Pitrou38d96432011-12-06 22:33:57 +0100610 n = self._safe_readinto(temp_mvb)
611 mvb = mvb[n:]
612 total_bytes += n
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000613 self.chunk_left = 0
Antoine Pitrou38d96432011-12-06 22:33:57 +0100614
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000615 except IncompleteRead:
616 raise IncompleteRead(bytes(b[0:total_bytes]))
Antoine Pitrou38d96432011-12-06 22:33:57 +0100617
Greg Steindd6eefb2000-07-18 09:09:48 +0000618 def _safe_read(self, amt):
Inada Naokid6bf6f22019-04-06 18:06:19 +0900619 """Read the number of bytes requested.
Greg Steindd6eefb2000-07-18 09:09:48 +0000620
621 This function should be used when <amt> bytes "should" be present for
622 reading. If the bytes are truly not available (due to EOF), then the
623 IncompleteRead exception can be used to detect the problem.
624 """
Inada Naokid6bf6f22019-04-06 18:06:19 +0900625 data = self.fp.read(amt)
626 if len(data) < amt:
627 raise IncompleteRead(data, amt-len(data))
628 return data
Greg Steindd6eefb2000-07-18 09:09:48 +0000629
Antoine Pitrou38d96432011-12-06 22:33:57 +0100630 def _safe_readinto(self, b):
631 """Same as _safe_read, but for reading into a buffer."""
Inada Naokid6bf6f22019-04-06 18:06:19 +0900632 amt = len(b)
633 n = self.fp.readinto(b)
634 if n < amt:
635 raise IncompleteRead(bytes(b[:n]), amt-n)
636 return n
Antoine Pitrou38d96432011-12-06 22:33:57 +0100637
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000638 def read1(self, n=-1):
639 """Read with at most one underlying system call. If at least one
640 byte is buffered, return that instead.
641 """
642 if self.fp is None or self._method == "HEAD":
643 return b""
644 if self.chunked:
645 return self._read1_chunked(n)
Martin Panterce911c32016-03-17 06:42:48 +0000646 if self.length is not None and (n < 0 or n > self.length):
647 n = self.length
Martin Panter741d4942016-10-21 00:52:04 +0000648 result = self.fp.read1(n)
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000649 if not result and n:
650 self._close_conn()
Martin Panterce911c32016-03-17 06:42:48 +0000651 elif self.length is not None:
652 self.length -= len(result)
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000653 return result
654
655 def peek(self, n=-1):
656 # Having this enables IOBase.readline() to read more than one
657 # byte at a time
658 if self.fp is None or self._method == "HEAD":
659 return b""
660 if self.chunked:
661 return self._peek_chunked(n)
662 return self.fp.peek(n)
663
664 def readline(self, limit=-1):
665 if self.fp is None or self._method == "HEAD":
666 return b""
667 if self.chunked:
668 # Fallback to IOBase readline which uses peek() and read()
669 return super().readline(limit)
Martin Panterce911c32016-03-17 06:42:48 +0000670 if self.length is not None and (limit < 0 or limit > self.length):
671 limit = self.length
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000672 result = self.fp.readline(limit)
673 if not result and limit:
674 self._close_conn()
Martin Panterce911c32016-03-17 06:42:48 +0000675 elif self.length is not None:
676 self.length -= len(result)
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000677 return result
678
679 def _read1_chunked(self, n):
680 # Strictly speaking, _get_chunk_left() may cause more than one read,
681 # but that is ok, since that is to satisfy the chunked protocol.
682 chunk_left = self._get_chunk_left()
683 if chunk_left is None or n == 0:
684 return b''
685 if not (0 <= n <= chunk_left):
686 n = chunk_left # if n is negative or larger than chunk_left
687 read = self.fp.read1(n)
688 self.chunk_left -= len(read)
689 if not read:
690 raise IncompleteRead(b"")
691 return read
692
693 def _peek_chunked(self, n):
694 # Strictly speaking, _get_chunk_left() may cause more than one read,
695 # but that is ok, since that is to satisfy the chunked protocol.
696 try:
697 chunk_left = self._get_chunk_left()
698 except IncompleteRead:
699 return b'' # peek doesn't worry about protocol
700 if chunk_left is None:
701 return b'' # eof
702 # peek is allowed to return more than requested. Just request the
703 # entire chunk, and truncate what we get.
704 return self.fp.peek(chunk_left)[:chunk_left]
705
Antoine Pitroub353c122009-02-11 00:39:14 +0000706 def fileno(self):
707 return self.fp.fileno()
708
Greg Steindd6eefb2000-07-18 09:09:48 +0000709 def getheader(self, name, default=None):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700710 '''Returns the value of the header matching *name*.
711
712 If there are multiple matching headers, the values are
713 combined into a single string separated by commas and spaces.
714
715 If no matching header is found, returns *default* or None if
716 the *default* is not specified.
717
718 If the headers are unknown, raises http.client.ResponseNotReady.
719
720 '''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000721 if self.headers is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000722 raise ResponseNotReady()
Senthil Kumaran9f8dc442010-08-02 11:04:58 +0000723 headers = self.headers.get_all(name) or default
724 if isinstance(headers, str) or not hasattr(headers, '__iter__'):
725 return headers
726 else:
727 return ', '.join(headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000728
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000729 def getheaders(self):
730 """Return list of (header, value) tuples."""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000731 if self.headers is None:
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000732 raise ResponseNotReady()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000733 return list(self.headers.items())
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000734
Antoine Pitroub353c122009-02-11 00:39:14 +0000735 # We override IOBase.__iter__ so that it doesn't check for closed-ness
736
737 def __iter__(self):
738 return self
739
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000740 # For compatibility with old-style urllib responses.
741
742 def info(self):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700743 '''Returns an instance of the class mimetools.Message containing
744 meta-information associated with the URL.
745
746 When the method is HTTP, these headers are those returned by
747 the server at the head of the retrieved HTML page (including
748 Content-Length and Content-Type).
749
750 When the method is FTP, a Content-Length header will be
751 present if (as is now usual) the server passed back a file
752 length in response to the FTP retrieval request. A
753 Content-Type header will be present if the MIME type can be
754 guessed.
755
756 When the method is local-file, returned headers will include
Serhiy Storchaka3fd4a732015-12-18 13:10:37 +0200757 a Date representing the file's last-modified time, a
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700758 Content-Length giving file size, and a Content-Type
Serhiy Storchaka3fd4a732015-12-18 13:10:37 +0200759 containing a guess at the file's type. See also the
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700760 description of the mimetools module.
761
762 '''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000763 return self.headers
764
765 def geturl(self):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700766 '''Return the real URL of the page.
767
768 In some cases, the HTTP server redirects a client to another
769 URL. The urlopen() function handles this transparently, but in
770 some cases the caller needs to know which URL the client was
771 redirected to. The geturl() method can be used to get at this
772 redirected URL.
773
774 '''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000775 return self.url
776
777 def getcode(self):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700778 '''Return the HTTP status code that was sent with the response,
779 or None if the URL is not an HTTP URL.
780
781 '''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000782 return self.status
Greg Stein5e0fa402000-06-26 08:28:01 +0000783
784class HTTPConnection:
785
Greg Steindd6eefb2000-07-18 09:09:48 +0000786 _http_vsn = 11
787 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000788
Greg Steindd6eefb2000-07-18 09:09:48 +0000789 response_class = HTTPResponse
790 default_port = HTTP_PORT
791 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000792 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000793
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000794 @staticmethod
795 def _is_textIO(stream):
796 """Test whether a file-like object is a text or a binary stream.
797 """
798 return isinstance(stream, io.TextIOBase)
799
800 @staticmethod
801 def _get_content_length(body, method):
802 """Get the content-length based on the body.
803
Martin Panteref91bb22016-08-27 01:39:26 +0000804 If the body is None, we set Content-Length: 0 for methods that expect
805 a body (RFC 7230, Section 3.3.2). We also set the Content-Length for
806 any method if the body is a str or bytes-like object and not a file.
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000807 """
Martin Panteref91bb22016-08-27 01:39:26 +0000808 if body is None:
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000809 # do an explicit check for not None here to distinguish
810 # between unset and set but empty
Martin Panteref91bb22016-08-27 01:39:26 +0000811 if method.upper() in _METHODS_EXPECTING_BODY:
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000812 return 0
813 else:
814 return None
815
816 if hasattr(body, 'read'):
817 # file-like object.
Martin Panteref91bb22016-08-27 01:39:26 +0000818 return None
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000819
820 try:
821 # does it implement the buffer protocol (bytes, bytearray, array)?
822 mv = memoryview(body)
823 return mv.nbytes
824 except TypeError:
825 pass
826
827 if isinstance(body, str):
828 return len(body)
829
830 return None
831
Senthil Kumaran052ddb02013-03-18 14:11:41 -0700832 def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Nir Sofferad455cd2017-11-06 23:16:37 +0200833 source_address=None, blocksize=8192):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834 self.timeout = timeout
Gregory P. Smithb4066372010-01-03 03:28:29 +0000835 self.source_address = source_address
Nir Sofferad455cd2017-11-06 23:16:37 +0200836 self.blocksize = blocksize
Greg Steindd6eefb2000-07-18 09:09:48 +0000837 self.sock = None
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000838 self._buffer = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000839 self.__response = None
840 self.__state = _CS_IDLE
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000841 self._method = None
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000842 self._tunnel_host = None
843 self._tunnel_port = None
Georg Brandlc7c199f2011-01-22 22:06:24 +0000844 self._tunnel_headers = {}
Tim Petersc411dba2002-07-16 21:35:23 +0000845
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400846 (self.host, self.port) = self._get_hostport(host, port)
847
Ashwin Ramaswami9165add2020-03-14 14:56:06 -0400848 self._validate_host(self.host)
849
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400850 # This is stored as an instance variable to allow unit
851 # tests to replace it with a suitable mockup
852 self._create_connection = socket.create_connection
Greg Stein5e0fa402000-06-26 08:28:01 +0000853
Senthil Kumaran47fff872009-12-20 07:10:31 +0000854 def set_tunnel(self, host, port=None, headers=None):
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400855 """Set up host and port for HTTP CONNECT tunnelling.
Senthil Kumaran47fff872009-12-20 07:10:31 +0000856
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400857 In a connection that uses HTTP CONNECT tunneling, the host passed to the
858 constructor is used as a proxy server that relays all communication to
859 the endpoint passed to `set_tunnel`. This done by sending an HTTP
860 CONNECT request to the proxy server when the connection is established.
861
Fernando Toledo711381d2020-12-21 11:06:31 -0300862 This method must be called before the HTTP connection has been
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400863 established.
864
865 The headers argument should be a mapping of extra HTTP headers to send
866 with the CONNECT request.
Senthil Kumaran47fff872009-12-20 07:10:31 +0000867 """
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400868
869 if self.sock:
870 raise RuntimeError("Can't set up tunnel for established connection")
871
Serhiy Storchaka4ac7ed92014-12-12 09:29:15 +0200872 self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
Senthil Kumaran47fff872009-12-20 07:10:31 +0000873 if headers:
874 self._tunnel_headers = headers
875 else:
876 self._tunnel_headers.clear()
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000877
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400878 def _get_hostport(self, host, port):
Greg Steindd6eefb2000-07-18 09:09:48 +0000879 if port is None:
Skip Montanaro10e6e0e2004-09-14 16:32:02 +0000880 i = host.rfind(':')
Skip Montanarocae14d22004-09-14 17:55:21 +0000881 j = host.rfind(']') # ipv6 addresses have [...]
882 if i > j:
Skip Montanaro9d389972002-03-24 16:53:50 +0000883 try:
884 port = int(host[i+1:])
885 except ValueError:
Łukasz Langaa5a9a9c2011-10-18 21:17:39 +0200886 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
887 port = self.default_port
888 else:
889 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000890 host = host[:i]
891 else:
892 port = self.default_port
Raymond Hettinger4d037912004-10-14 15:23:38 +0000893 if host and host[0] == '[' and host[-1] == ']':
Brett Cannon0a1af4a2004-09-15 23:26:23 +0000894 host = host[1:-1]
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400895
896 return (host, port)
Greg Stein5e0fa402000-06-26 08:28:01 +0000897
Jeremy Hylton30f86742000-09-18 22:50:38 +0000898 def set_debuglevel(self, level):
899 self.debuglevel = level
900
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000901 def _tunnel(self):
Gregory P. Smithc25910a2021-03-07 23:35:13 -0800902 connect = b"CONNECT %s:%d HTTP/1.0\r\n" % (
903 self._tunnel_host.encode("ascii"), self._tunnel_port)
904 headers = [connect]
Georg Brandlc7c199f2011-01-22 22:06:24 +0000905 for header, value in self._tunnel_headers.items():
Gregory P. Smithc25910a2021-03-07 23:35:13 -0800906 headers.append(f"{header}: {value}\r\n".encode("latin-1"))
907 headers.append(b"\r\n")
908 # Making a single send() call instead of one per line encourages
909 # the host OS to use a more optimal packet size instead of
910 # potentially emitting a series of small packets.
911 self.send(b"".join(headers))
912 del headers
Senthil Kumaran47fff872009-12-20 07:10:31 +0000913
Georg Brandlc7c199f2011-01-22 22:06:24 +0000914 response = self.response_class(self.sock, method=self._method)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000915 (version, code, message) = response._read_status()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000916
Serhiy Storchakae4db7692014-12-23 16:28:28 +0200917 if code != http.HTTPStatus.OK:
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000918 self.close()
Gregory P. Smithc25910a2021-03-07 23:35:13 -0800919 raise OSError(f"Tunnel connection failed: {code} {message.strip()}")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000920 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000921 line = response.fp.readline(_MAXLINE + 1)
922 if len(line) > _MAXLINE:
923 raise LineTooLong("header line")
Senthil Kumaranb12771a2012-04-23 23:50:07 +0800924 if not line:
925 # for sites which EOF without sending a trailer
926 break
Senthil Kumaran7e70a5c2012-04-29 10:39:49 +0800927 if line in (b'\r\n', b'\n', b''):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000928 break
929
Berker Peksagab53ab02015-02-03 12:22:11 +0200930 if self.debuglevel > 0:
931 print('header:', line.decode())
932
Greg Steindd6eefb2000-07-18 09:09:48 +0000933 def connect(self):
934 """Connect to the host and port specified in __init__."""
Saiyang Gou927b8412021-04-23 03:19:08 -0700935 sys.audit("http.client.connect", self, self.host, self.port)
Benjamin Peterson9d8a3ad2015-01-23 11:02:57 -0500936 self.sock = self._create_connection(
937 (self.host,self.port), self.timeout, self.source_address)
938 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400939
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000940 if self._tunnel_host:
941 self._tunnel()
Greg Stein5e0fa402000-06-26 08:28:01 +0000942
Greg Steindd6eefb2000-07-18 09:09:48 +0000943 def close(self):
944 """Close the connection to the HTTP server."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000945 self.__state = _CS_IDLE
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300946 try:
947 sock = self.sock
948 if sock:
949 self.sock = None
950 sock.close() # close it manually... there may be other refs
951 finally:
952 response = self.__response
953 if response:
954 self.__response = None
955 response.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000956
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000957 def send(self, data):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000958 """Send `data' to the server.
959 ``data`` can be a string object, a bytes object, an array object, a
960 file-like object that supports a .read() method, or an iterable object.
961 """
962
Greg Steindd6eefb2000-07-18 09:09:48 +0000963 if self.sock is None:
964 if self.auto_open:
965 self.connect()
966 else:
967 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000968
Jeremy Hylton30f86742000-09-18 22:50:38 +0000969 if self.debuglevel > 0:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000970 print("send:", repr(data))
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000971 if hasattr(data, "read") :
Jeremy Hylton636950f2009-03-28 04:34:21 +0000972 if self.debuglevel > 0:
973 print("sendIng a read()able")
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000974 encode = self._is_textIO(data)
975 if encode and self.debuglevel > 0:
976 print("encoding file using iso-8859-1")
Jeremy Hylton636950f2009-03-28 04:34:21 +0000977 while 1:
Nir Sofferad455cd2017-11-06 23:16:37 +0200978 datablock = data.read(self.blocksize)
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000979 if not datablock:
Jeremy Hylton636950f2009-03-28 04:34:21 +0000980 break
981 if encode:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000982 datablock = datablock.encode("iso-8859-1")
Saiyang Gou927b8412021-04-23 03:19:08 -0700983 sys.audit("http.client.send", self, datablock)
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000984 self.sock.sendall(datablock)
Andrew Svetlov7b2c8bb2013-04-12 22:49:19 +0300985 return
Saiyang Gou927b8412021-04-23 03:19:08 -0700986 sys.audit("http.client.send", self, data)
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000987 try:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000988 self.sock.sendall(data)
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000989 except TypeError:
Serhiy Storchaka2e576f52017-04-24 09:05:00 +0300990 if isinstance(data, collections.abc.Iterable):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000991 for d in data:
992 self.sock.sendall(d)
993 else:
Georg Brandl25e2cd12011-08-03 08:27:00 +0200994 raise TypeError("data should be a bytes-like object "
995 "or an iterable, got %r" % type(data))
Greg Stein5e0fa402000-06-26 08:28:01 +0000996
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000997 def _output(self, s):
998 """Add a line of output to the current request buffer.
Tim Peters469cdad2002-08-08 20:19:19 +0000999
Jeremy Hyltone3252ec2002-07-16 21:41:43 +00001000 Assumes that the line does *not* end with \\r\\n.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001001 """
1002 self._buffer.append(s)
1003
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001004 def _read_readable(self, readable):
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001005 if self.debuglevel > 0:
1006 print("sendIng a read()able")
1007 encode = self._is_textIO(readable)
1008 if encode and self.debuglevel > 0:
1009 print("encoding file using iso-8859-1")
1010 while True:
Nir Sofferad455cd2017-11-06 23:16:37 +02001011 datablock = readable.read(self.blocksize)
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001012 if not datablock:
1013 break
1014 if encode:
1015 datablock = datablock.encode("iso-8859-1")
1016 yield datablock
1017
1018 def _send_output(self, message_body=None, encode_chunked=False):
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001019 """Send the currently buffered request and clear the buffer.
1020
Jeremy Hyltone3252ec2002-07-16 21:41:43 +00001021 Appends an extra \\r\\n to the buffer.
Benjamin Peterson822b21c2009-01-18 00:04:57 +00001022 A message_body may be specified, to be appended to the request.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001023 """
Martin v. Löwisdd5a8602007-06-30 09:22:09 +00001024 self._buffer.extend((b"", b""))
1025 msg = b"\r\n".join(self._buffer)
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001026 del self._buffer[:]
1027 self.send(msg)
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001028
Benjamin Peterson822b21c2009-01-18 00:04:57 +00001029 if message_body is not None:
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001030
1031 # create a consistent interface to message_body
1032 if hasattr(message_body, 'read'):
1033 # Let file-like take precedence over byte-like. This
1034 # is needed to allow the current position of mmap'ed
1035 # files to be taken into account.
1036 chunks = self._read_readable(message_body)
1037 else:
1038 try:
1039 # this is solely to check to see if message_body
1040 # implements the buffer API. it /would/ be easier
1041 # to capture if PyObject_CheckBuffer was exposed
1042 # to Python.
1043 memoryview(message_body)
1044 except TypeError:
1045 try:
1046 chunks = iter(message_body)
1047 except TypeError:
1048 raise TypeError("message_body should be a bytes-like "
1049 "object or an iterable, got %r"
1050 % type(message_body))
1051 else:
1052 # the object implements the buffer interface and
1053 # can be passed directly into socket methods
1054 chunks = (message_body,)
1055
1056 for chunk in chunks:
1057 if not chunk:
1058 if self.debuglevel > 0:
1059 print('Zero length chunk ignored')
1060 continue
1061
1062 if encode_chunked and self._http_vsn == 11:
1063 # chunked encoding
Eric V. Smith451d0e32016-09-09 21:56:20 -04001064 chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001065 + b'\r\n'
1066 self.send(chunk)
1067
1068 if encode_chunked and self._http_vsn == 11:
1069 # end chunked transfer
1070 self.send(b'0\r\n\r\n')
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001071
Senthil Kumaran5dc504c2016-09-08 14:28:01 -07001072 def putrequest(self, method, url, skip_host=False,
1073 skip_accept_encoding=False):
Greg Steindd6eefb2000-07-18 09:09:48 +00001074 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +00001075
Greg Steindd6eefb2000-07-18 09:09:48 +00001076 `method' specifies an HTTP request method, e.g. 'GET'.
1077 `url' specifies the object being requested, e.g. '/index.html'.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +00001078 `skip_host' if True does not add automatically a 'Host:' header
1079 `skip_accept_encoding' if True does not add automatically an
1080 'Accept-Encoding:' header
Greg Steindd6eefb2000-07-18 09:09:48 +00001081 """
Greg Stein5e0fa402000-06-26 08:28:01 +00001082
Greg Stein616a58d2003-06-24 06:35:19 +00001083 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +00001084 if self.__response and self.__response.isclosed():
1085 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001086
Tim Peters58eb11c2004-01-18 20:29:55 +00001087
Greg Steindd6eefb2000-07-18 09:09:48 +00001088 # in certain cases, we cannot issue another request on this connection.
1089 # this occurs when:
1090 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
1091 # 2) a response to a previous request has signalled that it is going
1092 # to close the connection upon completion.
1093 # 3) the headers for the previous response have not been read, thus
1094 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
1095 #
1096 # if there is no prior response, then we can request at will.
1097 #
1098 # if point (2) is true, then we will have passed the socket to the
1099 # response (effectively meaning, "there is no prior response"), and
1100 # will open a new one when a new request is made.
1101 #
1102 # Note: if a prior response exists, then we *can* start a new request.
1103 # We are not allowed to begin fetching the response to this new
1104 # request, however, until that prior response is complete.
1105 #
1106 if self.__state == _CS_IDLE:
1107 self.__state = _CS_REQ_STARTED
1108 else:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001109 raise CannotSendRequest(self.__state)
Greg Stein5e0fa402000-06-26 08:28:01 +00001110
AMIR8ca8a2e2020-07-19 00:46:10 +04301111 self._validate_method(method)
1112
Jason R. Coombs7774d782019-09-28 08:32:01 -04001113 # Save the method for use later in the response phase
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +00001114 self._method = method
Jason R. Coombs7774d782019-09-28 08:32:01 -04001115
1116 url = url or '/'
1117 self._validate_path(url)
1118
Martin v. Löwisdd5a8602007-06-30 09:22:09 +00001119 request = '%s %s %s' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +00001120
Jason R. Coombs7774d782019-09-28 08:32:01 -04001121 self._output(self._encode_request(request))
Greg Stein5e0fa402000-06-26 08:28:01 +00001122
Greg Steindd6eefb2000-07-18 09:09:48 +00001123 if self._http_vsn == 11:
1124 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +00001125
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001126 if not skip_host:
1127 # this header is issued *only* for HTTP/1.1
1128 # connections. more specifically, this means it is
1129 # only issued when the client uses the new
1130 # HTTPConnection() class. backwards-compat clients
1131 # will be using HTTP/1.0 and those clients may be
1132 # issuing this header themselves. we should NOT issue
1133 # it twice; some web servers (such as Apache) barf
1134 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +00001135
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001136 # If we need a non-standard port,include it in the
1137 # header. If the request is going through a proxy,
1138 # but the host of the actual URL, not the host of the
1139 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +00001140
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001141 netloc = ''
1142 if url.startswith('http'):
1143 nil, netloc, nil, nil, nil = urlsplit(url)
1144
1145 if netloc:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001146 try:
1147 netloc_enc = netloc.encode("ascii")
1148 except UnicodeEncodeError:
1149 netloc_enc = netloc.encode("idna")
1150 self.putheader('Host', netloc_enc)
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001151 else:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001152 if self._tunnel_host:
1153 host = self._tunnel_host
1154 port = self._tunnel_port
1155 else:
1156 host = self.host
1157 port = self.port
1158
Thomas Wouters477c8d52006-05-27 19:21:47 +00001159 try:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001160 host_enc = host.encode("ascii")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001161 except UnicodeEncodeError:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001162 host_enc = host.encode("idna")
Senthil Kumaran74ebd9e2010-11-13 12:27:49 +00001163
1164 # As per RFC 273, IPv6 address should be wrapped with []
1165 # when used as Host header
1166
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001167 if host.find(':') >= 0:
Senthil Kumaran74ebd9e2010-11-13 12:27:49 +00001168 host_enc = b'[' + host_enc + b']'
1169
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001170 if port == self.default_port:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001171 self.putheader('Host', host_enc)
1172 else:
Guido van Rossum98297ee2007-11-06 21:34:58 +00001173 host_enc = host_enc.decode("ascii")
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001174 self.putheader('Host', "%s:%s" % (host_enc, port))
Greg Stein5e0fa402000-06-26 08:28:01 +00001175
Greg Steindd6eefb2000-07-18 09:09:48 +00001176 # note: we are assuming that clients will not attempt to set these
1177 # headers since *this* library must deal with the
1178 # consequences. this also means that when the supporting
1179 # libraries are updated to recognize other forms, then this
1180 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +00001181
Greg Steindd6eefb2000-07-18 09:09:48 +00001182 # we only want a Content-Encoding of "identity" since we don't
1183 # support encodings such as x-gzip or x-deflate.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +00001184 if not skip_accept_encoding:
1185 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +00001186
Greg Steindd6eefb2000-07-18 09:09:48 +00001187 # we can accept "chunked" Transfer-Encodings, but no others
1188 # NOTE: no TE header implies *only* "chunked"
1189 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +00001190
Greg Steindd6eefb2000-07-18 09:09:48 +00001191 # if TE is supplied in the header, then it must appear in a
1192 # Connection header.
1193 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +00001194
Greg Steindd6eefb2000-07-18 09:09:48 +00001195 else:
1196 # For HTTP/1.0, the server will assume "not chunked"
1197 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001198
Jason R. Coombs7774d782019-09-28 08:32:01 -04001199 def _encode_request(self, request):
1200 # ASCII also helps prevent CVE-2019-9740.
1201 return request.encode('ascii')
1202
AMIR8ca8a2e2020-07-19 00:46:10 +04301203 def _validate_method(self, method):
1204 """Validate a method name for putrequest."""
1205 # prevent http header injection
1206 match = _contains_disallowed_method_pchar_re.search(method)
1207 if match:
1208 raise ValueError(
1209 f"method can't contain control characters. {method!r} "
1210 f"(found at least {match.group()!r})")
1211
Jason R. Coombs7774d782019-09-28 08:32:01 -04001212 def _validate_path(self, url):
1213 """Validate a url for putrequest."""
1214 # Prevent CVE-2019-9740.
1215 match = _contains_disallowed_url_pchar_re.search(url)
1216 if match:
1217 raise InvalidURL(f"URL can't contain control characters. {url!r} "
1218 f"(found at least {match.group()!r})")
1219
Ashwin Ramaswami9165add2020-03-14 14:56:06 -04001220 def _validate_host(self, host):
1221 """Validate a host so it doesn't contain control characters."""
1222 # Prevent CVE-2019-18348.
1223 match = _contains_disallowed_url_pchar_re.search(host)
1224 if match:
1225 raise InvalidURL(f"URL can't contain control characters. {host!r} "
1226 f"(found at least {match.group()!r})")
1227
Benjamin Petersonf608c612008-11-16 18:33:53 +00001228 def putheader(self, header, *values):
Greg Steindd6eefb2000-07-18 09:09:48 +00001229 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +00001230
Greg Steindd6eefb2000-07-18 09:09:48 +00001231 For example: h.putheader('Accept', 'text/html')
1232 """
1233 if self.__state != _CS_REQ_STARTED:
1234 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +00001235
Guido van Rossum98297ee2007-11-06 21:34:58 +00001236 if hasattr(header, 'encode'):
1237 header = header.encode('ascii')
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +02001238
1239 if not _is_legal_header_name(header):
1240 raise ValueError('Invalid header name %r' % (header,))
1241
Benjamin Petersonf608c612008-11-16 18:33:53 +00001242 values = list(values)
1243 for i, one_value in enumerate(values):
1244 if hasattr(one_value, 'encode'):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001245 values[i] = one_value.encode('latin-1')
Senthil Kumaran58d5dbf2010-10-03 18:22:42 +00001246 elif isinstance(one_value, int):
1247 values[i] = str(one_value).encode('ascii')
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +02001248
1249 if _is_illegal_header_value(values[i]):
1250 raise ValueError('Invalid header value %r' % (values[i],))
1251
Benjamin Petersonf608c612008-11-16 18:33:53 +00001252 value = b'\r\n\t'.join(values)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001253 header = header + b': ' + value
1254 self._output(header)
Greg Stein5e0fa402000-06-26 08:28:01 +00001255
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001256 def endheaders(self, message_body=None, *, encode_chunked=False):
Benjamin Peterson1742e402008-11-30 22:15:29 +00001257 """Indicate that the last header line has been sent to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +00001258
Senthil Kumaranad87fa62011-10-05 23:26:49 +08001259 This method sends the request to the server. The optional message_body
1260 argument can be used to pass a message body associated with the
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001261 request.
Benjamin Peterson1742e402008-11-30 22:15:29 +00001262 """
Greg Steindd6eefb2000-07-18 09:09:48 +00001263 if self.__state == _CS_REQ_STARTED:
1264 self.__state = _CS_REQ_SENT
1265 else:
1266 raise CannotSendHeader()
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001267 self._send_output(message_body, encode_chunked=encode_chunked)
Greg Stein5e0fa402000-06-26 08:28:01 +00001268
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001269 def request(self, method, url, body=None, headers={}, *,
1270 encode_chunked=False):
Greg Steindd6eefb2000-07-18 09:09:48 +00001271 """Send a complete request to the server."""
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001272 self._send_request(method, url, body, headers, encode_chunked)
Greg Stein5e0fa402000-06-26 08:28:01 +00001273
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001274 def _send_request(self, method, url, body, headers, encode_chunked):
Jeremy Hylton636950f2009-03-28 04:34:21 +00001275 # Honor explicitly requested Host: and Accept-Encoding: headers.
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001276 header_names = frozenset(k.lower() for k in headers)
Jeremy Hylton2c178252004-08-07 16:28:14 +00001277 skips = {}
1278 if 'host' in header_names:
1279 skips['skip_host'] = 1
1280 if 'accept-encoding' in header_names:
1281 skips['skip_accept_encoding'] = 1
Greg Stein5e0fa402000-06-26 08:28:01 +00001282
Jeremy Hylton2c178252004-08-07 16:28:14 +00001283 self.putrequest(method, url, **skips)
1284
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001285 # chunked encoding will happen if HTTP/1.1 is used and either
1286 # the caller passes encode_chunked=True or the following
1287 # conditions hold:
1288 # 1. content-length has not been explicitly set
Martin Panteref91bb22016-08-27 01:39:26 +00001289 # 2. the body is a file or iterable, but not a str or bytes-like
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001290 # 3. Transfer-Encoding has NOT been explicitly set by the caller
1291
R David Murraybeed8402015-03-22 15:18:23 -04001292 if 'content-length' not in header_names:
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001293 # only chunk body if not explicitly set for backwards
1294 # compatibility, assuming the client code is already handling the
1295 # chunking
1296 if 'transfer-encoding' not in header_names:
1297 # if content-length cannot be automatically determined, fall
1298 # back to chunked encoding
1299 encode_chunked = False
1300 content_length = self._get_content_length(body, method)
1301 if content_length is None:
Martin Panteref91bb22016-08-27 01:39:26 +00001302 if body is not None:
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001303 if self.debuglevel > 0:
1304 print('Unable to determine size of %r' % body)
1305 encode_chunked = True
1306 self.putheader('Transfer-Encoding', 'chunked')
1307 else:
1308 self.putheader('Content-Length', str(content_length))
1309 else:
1310 encode_chunked = False
1311
Guido van Rossumcc2b0162007-02-11 06:12:03 +00001312 for hdr, value in headers.items():
Greg Steindd6eefb2000-07-18 09:09:48 +00001313 self.putheader(hdr, value)
Jeremy Hyltonef9f48e2009-03-26 22:04:05 +00001314 if isinstance(body, str):
Jeremy Hylton236654b2009-03-27 20:24:34 +00001315 # RFC 2616 Section 3.7.1 says that text default has a
1316 # default charset of iso-8859-1.
Martin Panter44391482016-02-09 10:20:52 +00001317 body = _encode(body, 'body')
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001318 self.endheaders(body, encode_chunked=encode_chunked)
Greg Stein5e0fa402000-06-26 08:28:01 +00001319
Greg Steindd6eefb2000-07-18 09:09:48 +00001320 def getresponse(self):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001321 """Get the response from the server.
1322
1323 If the HTTPConnection is in the correct state, returns an
1324 instance of HTTPResponse or of whatever object is returned by
Martin Pantercc71a792016-04-05 06:19:42 +00001325 the response_class variable.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001326
1327 If a request has not been sent or if a previous response has
1328 not be handled, ResponseNotReady is raised. If the HTTP
1329 response indicates that the connection should be closed, then
1330 it will be closed before the response is returned. When the
1331 connection is closed, the underlying socket is closed.
1332 """
Greg Stein5e0fa402000-06-26 08:28:01 +00001333
Greg Stein616a58d2003-06-24 06:35:19 +00001334 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +00001335 if self.__response and self.__response.isclosed():
1336 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001337
Greg Steindd6eefb2000-07-18 09:09:48 +00001338 # if a prior response exists, then it must be completed (otherwise, we
1339 # cannot read this response's header to determine the connection-close
1340 # behavior)
1341 #
1342 # note: if a prior response existed, but was connection-close, then the
1343 # socket and response were made independent of this HTTPConnection
1344 # object since a new request requires that we open a whole new
1345 # connection
1346 #
1347 # this means the prior response had one of two states:
1348 # 1) will_close: this connection was reset and the prior socket and
1349 # response operate independently
1350 # 2) persistent: the response was retained and we await its
1351 # isclosed() status to become true.
1352 #
1353 if self.__state != _CS_REQ_SENT or self.__response:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001354 raise ResponseNotReady(self.__state)
Greg Stein5e0fa402000-06-26 08:28:01 +00001355
Jeremy Hylton30f86742000-09-18 22:50:38 +00001356 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001357 response = self.response_class(self.sock, self.debuglevel,
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +00001358 method=self._method)
Jeremy Hylton30f86742000-09-18 22:50:38 +00001359 else:
Antoine Pitrou988dbd72010-12-17 17:35:56 +00001360 response = self.response_class(self.sock, method=self._method)
Greg Stein5e0fa402000-06-26 08:28:01 +00001361
Serhiy Storchakab491e052014-12-01 13:07:45 +02001362 try:
R David Murraycae7bdb2015-04-05 19:26:29 -04001363 try:
1364 response.begin()
1365 except ConnectionError:
1366 self.close()
1367 raise
Serhiy Storchakab491e052014-12-01 13:07:45 +02001368 assert response.will_close != _UNKNOWN
1369 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +00001370
Serhiy Storchakab491e052014-12-01 13:07:45 +02001371 if response.will_close:
1372 # this effectively passes the connection to the response
1373 self.close()
1374 else:
1375 # remember this, so we can tell when it is complete
1376 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +00001377
Serhiy Storchakab491e052014-12-01 13:07:45 +02001378 return response
1379 except:
1380 response.close()
1381 raise
Greg Stein5e0fa402000-06-26 08:28:01 +00001382
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001383try:
1384 import ssl
Brett Cannoncd171c82013-07-04 17:43:24 -04001385except ImportError:
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001386 pass
1387else:
1388 class HTTPSConnection(HTTPConnection):
1389 "This class allows communication via SSL."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001390
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001391 default_port = HTTPS_PORT
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001392
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001393 # XXX Should key_file and cert_file be deprecated in favour of context?
1394
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001395 def __init__(self, host, port=None, key_file=None, cert_file=None,
Senthil Kumaran052ddb02013-03-18 14:11:41 -07001396 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1397 source_address=None, *, context=None,
Nir Sofferad455cd2017-11-06 23:16:37 +02001398 check_hostname=None, blocksize=8192):
Senthil Kumaran052ddb02013-03-18 14:11:41 -07001399 super(HTTPSConnection, self).__init__(host, port, timeout,
Nir Sofferad455cd2017-11-06 23:16:37 +02001400 source_address,
1401 blocksize=blocksize)
Christian Heimesd0486372016-09-10 23:23:33 +02001402 if (key_file is not None or cert_file is not None or
1403 check_hostname is not None):
1404 import warnings
1405 warnings.warn("key_file, cert_file and check_hostname are "
1406 "deprecated, use a custom context instead.",
1407 DeprecationWarning, 2)
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001408 self.key_file = key_file
1409 self.cert_file = cert_file
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001410 if context is None:
Benjamin Peterson4ffb0752014-11-03 14:29:33 -05001411 context = ssl._create_default_https_context()
Christian Heimesf97406b2020-11-13 16:37:52 +01001412 # send ALPN extension to indicate HTTP/1.1 protocol
1413 if self._http_vsn == 11:
1414 context.set_alpn_protocols(['http/1.1'])
Christian Heimesd1bd6e72019-07-01 08:32:24 +02001415 # enable PHA for TLS 1.3 connections if available
1416 if context.post_handshake_auth is not None:
1417 context.post_handshake_auth = True
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001418 will_verify = context.verify_mode != ssl.CERT_NONE
1419 if check_hostname is None:
Benjamin Petersona090f012014-12-07 13:18:25 -05001420 check_hostname = context.check_hostname
1421 if check_hostname and not will_verify:
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001422 raise ValueError("check_hostname needs a SSL context with "
1423 "either CERT_OPTIONAL or CERT_REQUIRED")
1424 if key_file or cert_file:
Antoine Pitrou80603992010-10-13 10:45:16 +00001425 context.load_cert_chain(cert_file, key_file)
Christian Heimesd1bd6e72019-07-01 08:32:24 +02001426 # cert and key file means the user wants to authenticate.
1427 # enable TLS 1.3 PHA implicitly even for custom contexts.
1428 if context.post_handshake_auth is not None:
1429 context.post_handshake_auth = True
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001430 self._context = context
Christian Heimes61d478c2018-01-27 15:51:38 +01001431 if check_hostname is not None:
1432 self._context.check_hostname = check_hostname
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001433
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001434 def connect(self):
1435 "Connect to a host on a given (SSL) port."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001436
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001437 super().connect()
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001438
1439 if self._tunnel_host:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001440 server_hostname = self._tunnel_host
1441 else:
1442 server_hostname = self.host
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001443
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001444 self.sock = self._context.wrap_socket(self.sock,
Benjamin Peterson7243b572014-11-23 17:04:34 -06001445 server_hostname=server_hostname)
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001446
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001447 __all__.append("HTTPSConnection")
Greg Stein5e0fa402000-06-26 08:28:01 +00001448
Greg Stein5e0fa402000-06-26 08:28:01 +00001449class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001450 # Subclasses that define an __init__ must call Exception.__init__
1451 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +00001452 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001453
1454class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001455 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001456
Skip Montanaro9d389972002-03-24 16:53:50 +00001457class InvalidURL(HTTPException):
1458 pass
1459
Greg Stein5e0fa402000-06-26 08:28:01 +00001460class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001461 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001462 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001463 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001464
1465class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001466 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001467
Greg Stein5e0fa402000-06-26 08:28:01 +00001468class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001469 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001470
1471class IncompleteRead(HTTPException):
Benjamin Peterson6accb982009-03-02 22:50:25 +00001472 def __init__(self, partial, expected=None):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001473 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001474 self.partial = partial
Benjamin Peterson6accb982009-03-02 22:50:25 +00001475 self.expected = expected
1476 def __repr__(self):
1477 if self.expected is not None:
1478 e = ', %i more expected' % self.expected
1479 else:
1480 e = ''
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03001481 return '%s(%i bytes read%s)' % (self.__class__.__name__,
1482 len(self.partial), e)
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001483 __str__ = object.__str__
Greg Stein5e0fa402000-06-26 08:28:01 +00001484
1485class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001486 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001487
1488class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001489 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001490
1491class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001492 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001493
1494class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001495 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001496
1497class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001498 def __init__(self, line):
Benjamin Peterson11dbfd42010-03-21 22:50:04 +00001499 if not line:
1500 line = repr(line)
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001501 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001502 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001503
Senthil Kumaran5466bf12010-12-18 16:55:23 +00001504class LineTooLong(HTTPException):
1505 def __init__(self, line_type):
1506 HTTPException.__init__(self, "got more than %d bytes when reading %s"
1507 % (_MAXLINE, line_type))
1508
R David Murraycae7bdb2015-04-05 19:26:29 -04001509class RemoteDisconnected(ConnectionResetError, BadStatusLine):
1510 def __init__(self, *pos, **kw):
1511 BadStatusLine.__init__(self, "")
1512 ConnectionResetError.__init__(self, *pos, **kw)
1513
Greg Stein5e0fa402000-06-26 08:28:01 +00001514# for backwards compatibility
1515error = HTTPException