blob: 08cf2ed9b3716b7942dec780c24f0388ea471da1 [file] [log] [blame]
R David Murray44b548d2016-09-08 13:59:53 -04001r"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006HTTPConnection goes through a number of "states", which define when a client
Greg Stein5e0fa402000-06-26 08:28:01 +00007may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
R David Murraycae7bdb2015-04-05 19:26:29 -040023 |\_____________________________
24 | | getresponse() raises
25 | response = getresponse() | ConnectionError
26 v v
27 Unread-response Idle
28 [Response-headers-read]
Greg Stein5e0fa402000-06-26 08:28:01 +000029 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000030 | |
31 | response.read() | putrequest()
32 v v
33 Idle Req-started-unread-response
34 ______/|
35 / |
36 response.read() | | ( putheader() )* endheaders()
37 v v
38 Request-started Req-sent-unread-response
39 |
40 | response.read()
41 v
42 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000043
44This diagram presents the following rules:
45 -- a second request may not be started until {response-headers-read}
46 -- a response [object] cannot be retrieved until {request-sent}
47 -- there is no differentiation between an unread response body and a
48 partially read response body
49
50Note: this enforcement is applied by the HTTPConnection class. The
51 HTTPResponse class does not enforce this state machine, which
52 implies sophisticated clients may accelerate the request/response
53 pipeline. Caution should be taken, though: accelerating the states
54 beyond the above pattern may imply knowledge of the server's
55 connection-close behavior for certain requests. For example, it
56 is impossible to tell whether the server will close the connection
57 UNTIL the response headers have been read; this means that further
58 requests cannot be placed into the pipeline until it is known that
59 the server will NOT be closing the connection.
60
61Logical State __state __response
62------------- ------- ----------
63Idle _CS_IDLE None
64Request-started _CS_REQ_STARTED None
65Request-sent _CS_REQ_SENT None
66Unread-response _CS_IDLE <response_class>
67Req-started-unread-response _CS_REQ_STARTED <response_class>
68Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000069"""
Guido van Rossum23acc951994-02-21 16:36:04 +000070
Barry Warsaw820c1202008-06-12 04:06:45 +000071import email.parser
72import email.message
Serhiy Storchakae4db7692014-12-23 16:28:28 +020073import http
Jeremy Hylton636950f2009-03-28 04:34:21 +000074import io
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +020075import re
Jeremy Hylton636950f2009-03-28 04:34:21 +000076import socket
Saiyang Gou927b8412021-04-23 03:19:08 -070077import sys
Serhiy Storchaka2e576f52017-04-24 09:05:00 +030078import collections.abc
Jeremy Hylton1afc1692008-06-18 20:49:58 +000079from urllib.parse import urlsplit
Guido van Rossum23acc951994-02-21 16:36:04 +000080
Berker Peksagbabc6882015-02-20 09:39:38 +020081# HTTPMessage, parse_headers(), and the HTTP status code constants are
82# intentionally omitted for simplicity
Thomas Wouters47b49bf2007-08-30 22:15:33 +000083__all__ = ["HTTPResponse", "HTTPConnection",
Skip Montanaro951a8842001-06-01 16:25:38 +000084 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000085 "UnknownTransferEncoding", "UnimplementedFileMode",
86 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
87 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
R David Murraycae7bdb2015-04-05 19:26:29 -040088 "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
89 "responses"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000090
Guido van Rossum23acc951994-02-21 16:36:04 +000091HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000092HTTPS_PORT = 443
93
Greg Stein5e0fa402000-06-26 08:28:01 +000094_UNKNOWN = 'UNKNOWN'
95
96# connection states
97_CS_IDLE = 'Idle'
98_CS_REQ_STARTED = 'Request-started'
99_CS_REQ_SENT = 'Request-sent'
100
Martin v. Löwis39a31782004-09-18 09:03:49 +0000101
Serhiy Storchakae4db7692014-12-23 16:28:28 +0200102# hack to maintain backwards compatibility
103globals().update(http.HTTPStatus.__members__)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000104
Serhiy Storchakae4db7692014-12-23 16:28:28 +0200105# another hack to maintain backwards compatibility
Georg Brandl6aab16e2006-02-17 19:17:25 +0000106# Mapping status codes to official W3C names
Serhiy Storchakae4db7692014-12-23 16:28:28 +0200107responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
Georg Brandl6aab16e2006-02-17 19:17:25 +0000108
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000109# maximal line length when calling readline().
110_MAXLINE = 65536
Georg Brandlbf3f8eb2013-10-27 07:34:48 +0100111_MAXHEADERS = 100
112
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +0200113# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
114#
115# VCHAR = %x21-7E
116# obs-text = %x80-FF
117# header-field = field-name ":" OWS field-value OWS
118# field-name = token
119# field-value = *( field-content / obs-fold )
120# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
121# field-vchar = VCHAR / obs-text
122#
123# obs-fold = CRLF 1*( SP / HTAB )
124# ; obsolete line folding
125# ; see Section 3.2.4
126
127# token = 1*tchar
128#
129# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
130# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
131# / DIGIT / ALPHA
132# ; any VCHAR, except delimiters
133#
134# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
135
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700136# the patterns for both name and value are more lenient than RFC
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +0200137# definitions to allow for backwards compatibility
138_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
139_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
140
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700141# These characters are not allowed within HTTP URL paths.
142# See https://tools.ietf.org/html/rfc3986#section-3.3 and the
143# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
144# Prevents CVE-2019-9740. Includes control characters such as \r\n.
145# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
146_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
147# Arguably only these _should_ allowed:
148# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
149# We are more lenient for assumed real world compatibility purposes.
150
AMIR8ca8a2e2020-07-19 00:46:10 +0430151# These characters are not allowed within HTTP method names
152# to prevent http header injection.
153_contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')
154
R David Murraybeed8402015-03-22 15:18:23 -0400155# We always set the Content-Length header for these methods because some
156# servers will otherwise respond with a 411
157_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
158
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000159
Martin Panter44391482016-02-09 10:20:52 +0000160def _encode(data, name='data'):
161 """Call data.encode("latin-1") but show a better error message."""
162 try:
163 return data.encode("latin-1")
164 except UnicodeEncodeError as err:
165 raise UnicodeEncodeError(
166 err.encoding,
167 err.object,
168 err.start,
169 err.end,
170 "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "
171 "if you want to send it encoded in UTF-8." %
172 (name.title(), data[err.start:err.end], name)) from None
173
174
Barry Warsaw820c1202008-06-12 04:06:45 +0000175class HTTPMessage(email.message.Message):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000176 # XXX The only usage of this method is in
177 # http.server.CGIHTTPRequestHandler. Maybe move the code there so
178 # that it doesn't need to be part of the public API. The API has
179 # never been defined so this could cause backwards compatibility
180 # issues.
181
Barry Warsaw820c1202008-06-12 04:06:45 +0000182 def getallmatchingheaders(self, name):
183 """Find all header lines matching a given header name.
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000184
Barry Warsaw820c1202008-06-12 04:06:45 +0000185 Look through the list of headers and find all lines matching a given
186 header name (and their continuation lines). A list of the lines is
187 returned, without interpretation. If the header does not occur, an
188 empty list is returned. If the header occurs multiple times, all
189 occurrences are returned. Case is not important in the header name.
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000190
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000191 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000192 name = name.lower() + ':'
193 n = len(name)
194 lst = []
195 hit = 0
196 for line in self.keys():
197 if line[:n].lower() == name:
198 hit = 1
199 elif not line[:1].isspace():
200 hit = 0
201 if hit:
202 lst.append(line)
203 return lst
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000204
Miss Islington (bot)60ba0b62021-05-05 16:14:28 -0700205def _read_headers(fp):
206 """Reads potential header lines into a list from a file pointer.
Barry Warsaw820c1202008-06-12 04:06:45 +0000207
Miss Islington (bot)60ba0b62021-05-05 16:14:28 -0700208 Length of line is limited by _MAXLINE, and number of
209 headers is limited by _MAXHEADERS.
Barry Warsaw820c1202008-06-12 04:06:45 +0000210 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000211 headers = []
212 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000213 line = fp.readline(_MAXLINE + 1)
214 if len(line) > _MAXLINE:
215 raise LineTooLong("header line")
Barry Warsaw820c1202008-06-12 04:06:45 +0000216 headers.append(line)
Georg Brandlbf3f8eb2013-10-27 07:34:48 +0100217 if len(headers) > _MAXHEADERS:
218 raise HTTPException("got more than %d headers" % _MAXHEADERS)
Barry Warsaw820c1202008-06-12 04:06:45 +0000219 if line in (b'\r\n', b'\n', b''):
220 break
Miss Islington (bot)60ba0b62021-05-05 16:14:28 -0700221 return headers
222
223def parse_headers(fp, _class=HTTPMessage):
224 """Parses only RFC2822 headers from a file pointer.
225
226 email Parser wants to see strings rather than bytes.
227 But a TextIOWrapper around self.rfile would buffer too many bytes
228 from the stream, bytes which we later need to read as bytes.
229 So we read the correct bytes here, as bytes, for email Parser
230 to parse.
231
232 """
233 headers = _read_headers(fp)
Barry Warsaw820c1202008-06-12 04:06:45 +0000234 hstring = b''.join(headers).decode('iso-8859-1')
Jeremy Hylton98eb6c22009-03-27 18:31:36 +0000235 return email.parser.Parser(_class=_class).parsestr(hstring)
Greg Stein5e0fa402000-06-26 08:28:01 +0000236
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000237
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000238class HTTPResponse(io.BufferedIOBase):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000239
240 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
241
Jeremy Hylton811fc142007-08-03 13:30:02 +0000242 # The bytes from the socket object are iso-8859-1 strings.
243 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
244 # text following RFC 2047. The basic status line parsing only
245 # accepts iso-8859-1.
246
Senthil Kumaran052ddb02013-03-18 14:11:41 -0700247 def __init__(self, sock, debuglevel=0, method=None, url=None):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000248 # If the response includes a content-length header, we need to
249 # make sure that the client doesn't read more than the
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000250 # specified number of bytes. If it does, it will block until
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000251 # the server times out and closes the connection. This will
252 # happen if a self.fp.read() is done (without a size) whether
253 # self.fp is buffered or not. So, no self.fp.read() by
254 # clients unless they know what they are doing.
Benjamin Petersonf72d9fb2009-02-08 00:29:20 +0000255 self.fp = sock.makefile("rb")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000256 self.debuglevel = debuglevel
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000257 self._method = method
Greg Stein5e0fa402000-06-26 08:28:01 +0000258
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000259 # The HTTPResponse object is returned via urllib. The clients
260 # of http and urllib expect different attributes for the
261 # headers. headers is used here and supports urllib. msg is
262 # provided as a backwards compatibility layer for http
263 # clients.
264
265 self.headers = self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000266
Greg Steindd6eefb2000-07-18 09:09:48 +0000267 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000268 self.version = _UNKNOWN # HTTP-Version
269 self.status = _UNKNOWN # Status-Code
270 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000271
Tim Peters07e99cb2001-01-14 23:47:14 +0000272 self.chunked = _UNKNOWN # is "chunked" being used?
273 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
274 self.length = _UNKNOWN # number of bytes left in response
275 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000276
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000277 def _read_status(self):
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000278 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
279 if len(line) > _MAXLINE:
280 raise LineTooLong("status line")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000281 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000282 print("reply:", repr(line))
Jeremy Hyltonb6769522003-06-29 17:55:05 +0000283 if not line:
284 # Presumably, the server closed the connection before
285 # sending a valid response.
R David Murraycae7bdb2015-04-05 19:26:29 -0400286 raise RemoteDisconnected("Remote end closed connection without"
287 " response")
Greg Steindd6eefb2000-07-18 09:09:48 +0000288 try:
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000289 version, status, reason = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000290 except ValueError:
291 try:
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000292 version, status = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000293 reason = ""
294 except ValueError:
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000295 # empty version will cause next test to fail.
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000296 version = ""
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000297 if not version.startswith("HTTP/"):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200298 self._close_conn()
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000299 raise BadStatusLine(line)
Greg Stein5e0fa402000-06-26 08:28:01 +0000300
Jeremy Hylton23d40472001-04-13 14:57:08 +0000301 # The status code is a three-digit number
302 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000303 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000304 if status < 100 or status > 999:
305 raise BadStatusLine(line)
306 except ValueError:
307 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000308 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000309
Jeremy Hylton39c03802002-07-12 14:04:09 +0000310 def begin(self):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000311 if self.headers is not None:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000312 # we've already started reading the response
313 return
314
315 # read until we get a non-100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000316 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000317 version, status, reason = self._read_status()
Martin v. Löwis39a31782004-09-18 09:03:49 +0000318 if status != CONTINUE:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000319 break
320 # skip the header from the 100 response
Miss Islington (bot)60ba0b62021-05-05 16:14:28 -0700321 skipped_headers = _read_headers(self.fp)
322 if self.debuglevel > 0:
323 print("headers:", skipped_headers)
324 del skipped_headers
Tim Petersc411dba2002-07-16 21:35:23 +0000325
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000326 self.code = self.status = status
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000327 self.reason = reason.strip()
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000328 if version in ("HTTP/1.0", "HTTP/0.9"):
329 # Some servers might still return "0.9", treat it as 1.0 anyway
Greg Steindd6eefb2000-07-18 09:09:48 +0000330 self.version = 10
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000331 elif version.startswith("HTTP/1."):
Tim Peters07e99cb2001-01-14 23:47:14 +0000332 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Greg Steindd6eefb2000-07-18 09:09:48 +0000333 else:
334 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000335
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000336 self.headers = self.msg = parse_headers(self.fp)
Barry Warsaw820c1202008-06-12 04:06:45 +0000337
Jeremy Hylton30f86742000-09-18 22:50:38 +0000338 if self.debuglevel > 0:
Matt Houglum461c4162019-04-03 21:36:47 -0700339 for hdr, val in self.headers.items():
340 print("header:", hdr + ":", val)
Greg Stein5e0fa402000-06-26 08:28:01 +0000341
Greg Steindd6eefb2000-07-18 09:09:48 +0000342 # are we using the chunked-style of transfer encoding?
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000343 tr_enc = self.headers.get("transfer-encoding")
Jeremy Hyltond229b3a2002-09-03 19:24:24 +0000344 if tr_enc and tr_enc.lower() == "chunked":
Jeremy Hylton236156f2008-12-15 03:00:50 +0000345 self.chunked = True
Greg Steindd6eefb2000-07-18 09:09:48 +0000346 self.chunk_left = None
347 else:
Jeremy Hylton236156f2008-12-15 03:00:50 +0000348 self.chunked = False
Greg Stein5e0fa402000-06-26 08:28:01 +0000349
Greg Steindd6eefb2000-07-18 09:09:48 +0000350 # will the connection close at the end of the response?
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000351 self.will_close = self._check_close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000352
Greg Steindd6eefb2000-07-18 09:09:48 +0000353 # do we have a Content-Length?
354 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000355 self.length = None
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000356 length = self.headers.get("content-length")
Greg Steindd6eefb2000-07-18 09:09:48 +0000357 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000358 try:
359 self.length = int(length)
360 except ValueError:
Christian Heimesa612dc02008-02-24 13:08:18 +0000361 self.length = None
362 else:
363 if self.length < 0: # ignore nonsensical negative lengths
364 self.length = None
365 else:
366 self.length = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000367
Greg Steindd6eefb2000-07-18 09:09:48 +0000368 # does the body have a fixed length? (of zero)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000369 if (status == NO_CONTENT or status == NOT_MODIFIED or
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000370 100 <= status < 200 or # 1xx codes
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000371 self._method == "HEAD"):
Greg Steindd6eefb2000-07-18 09:09:48 +0000372 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000373
Greg Steindd6eefb2000-07-18 09:09:48 +0000374 # if the connection remains open, and we aren't using chunked, and
375 # a content-length was not provided, then assume that the connection
376 # WILL close.
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000377 if (not self.will_close and
378 not self.chunked and
379 self.length is None):
Jeremy Hylton236156f2008-12-15 03:00:50 +0000380 self.will_close = True
Greg Stein5e0fa402000-06-26 08:28:01 +0000381
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000382 def _check_close(self):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000383 conn = self.headers.get("connection")
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000384 if self.version == 11:
385 # An HTTP/1.1 proxy is assumed to stay open unless
386 # explicitly closed.
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000387 if conn and "close" in conn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000388 return True
389 return False
390
Jeremy Hylton2c178252004-08-07 16:28:14 +0000391 # Some HTTP/1.0 implementations have support for persistent
392 # connections, using rules different than HTTP/1.1.
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000393
Christian Heimes895627f2007-12-08 17:28:33 +0000394 # For older HTTP, Keep-Alive indicates persistent connection.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000395 if self.headers.get("keep-alive"):
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000396 return False
Tim Peters77c06fb2002-11-24 02:35:35 +0000397
Jeremy Hylton2c178252004-08-07 16:28:14 +0000398 # At least Akamai returns a "Connection: Keep-Alive" header,
399 # which was supposed to be sent by the client.
400 if conn and "keep-alive" in conn.lower():
401 return False
402
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000403 # Proxy-Connection is a netscape hack.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000404 pconn = self.headers.get("proxy-connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000405 if pconn and "keep-alive" in pconn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000406 return False
407
408 # otherwise, assume it will close
409 return True
410
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200411 def _close_conn(self):
412 fp = self.fp
413 self.fp = None
414 fp.close()
415
Greg Steindd6eefb2000-07-18 09:09:48 +0000416 def close(self):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300417 try:
418 super().close() # set "closed" flag
419 finally:
420 if self.fp:
421 self._close_conn()
Greg Stein5e0fa402000-06-26 08:28:01 +0000422
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000423 # These implementations are for the benefit of io.BufferedReader.
424
425 # XXX This class should probably be revised to act more like
426 # the "raw stream" that BufferedReader expects.
427
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000428 def flush(self):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200429 super().flush()
430 if self.fp:
431 self.fp.flush()
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000432
Jeremy Hyltona7cff022009-04-01 02:35:56 +0000433 def readable(self):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700434 """Always returns True"""
Jeremy Hyltona7cff022009-04-01 02:35:56 +0000435 return True
436
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000437 # End of "raw stream" methods
438
Greg Steindd6eefb2000-07-18 09:09:48 +0000439 def isclosed(self):
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200440 """True if the connection is closed."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000441 # NOTE: it is possible that we will not ever call self.close(). This
442 # case occurs when will_close is TRUE, length is None, and we
443 # read up to the last byte, but NOT past it.
444 #
445 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
446 # called, meaning self.isclosed() is meaningful.
447 return self.fp is None
448
449 def read(self, amt=None):
450 if self.fp is None:
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000451 return b""
Greg Steindd6eefb2000-07-18 09:09:48 +0000452
Senthil Kumaran71fb6c82010-04-28 17:39:48 +0000453 if self._method == "HEAD":
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200454 self._close_conn()
Senthil Kumaran71fb6c82010-04-28 17:39:48 +0000455 return b""
456
Bruce Merry152f0b82020-06-25 08:30:21 +0200457 if self.chunked:
458 return self._read_chunked(amt)
459
Antoine Pitrou38d96432011-12-06 22:33:57 +0100460 if amt is not None:
Bruce Merry152f0b82020-06-25 08:30:21 +0200461 if self.length is not None and amt > self.length:
462 # clip the read to the "end of response"
463 amt = self.length
464 s = self.fp.read(amt)
465 if not s and amt:
466 # Ideally, we would raise IncompleteRead if the content-length
467 # wasn't satisfied, but it might break compatibility.
468 self._close_conn()
469 elif self.length is not None:
470 self.length -= len(s)
471 if not self.length:
472 self._close_conn()
473 return s
Antoine Pitrou38d96432011-12-06 22:33:57 +0100474 else:
475 # Amount is not given (unbounded read) so we must check self.length
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000476 if self.length is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000477 s = self.fp.read()
478 else:
Antoine Pitroubeec61a2013-02-02 22:49:34 +0100479 try:
480 s = self._safe_read(self.length)
481 except IncompleteRead:
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200482 self._close_conn()
Antoine Pitroubeec61a2013-02-02 22:49:34 +0100483 raise
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000484 self.length = 0
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200485 self._close_conn() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000486 return s
487
Antoine Pitrou38d96432011-12-06 22:33:57 +0100488 def readinto(self, b):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700489 """Read up to len(b) bytes into bytearray b and return the number
490 of bytes read.
491 """
492
Antoine Pitrou38d96432011-12-06 22:33:57 +0100493 if self.fp is None:
494 return 0
495
496 if self._method == "HEAD":
Serhiy Storchakab6c86fd2013-02-06 10:35:40 +0200497 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100498 return 0
499
500 if self.chunked:
501 return self._readinto_chunked(b)
502
Greg Steindd6eefb2000-07-18 09:09:48 +0000503 if self.length is not None:
Antoine Pitrou38d96432011-12-06 22:33:57 +0100504 if len(b) > self.length:
Greg Steindd6eefb2000-07-18 09:09:48 +0000505 # clip the read to the "end of response"
Antoine Pitrou38d96432011-12-06 22:33:57 +0100506 b = memoryview(b)[0:self.length]
Greg Steindd6eefb2000-07-18 09:09:48 +0000507
508 # we do not use _safe_read() here because this may be a .will_close
509 # connection, and the user is reading more bytes than will be provided
510 # (for example, reading in 1k chunks)
Antoine Pitrou38d96432011-12-06 22:33:57 +0100511 n = self.fp.readinto(b)
Serhiy Storchaka1c84ac12013-12-17 21:50:02 +0200512 if not n and b:
Antoine Pitroubeec61a2013-02-02 22:49:34 +0100513 # Ideally, we would raise IncompleteRead if the content-length
514 # wasn't satisfied, but it might break compatibility.
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200515 self._close_conn()
Antoine Pitrou6a35e182013-02-02 23:04:56 +0100516 elif self.length is not None:
Antoine Pitrou38d96432011-12-06 22:33:57 +0100517 self.length -= n
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000518 if not self.length:
Serhiy Storchakab5b9c8c2013-02-06 10:31:57 +0200519 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100520 return n
Greg Steindd6eefb2000-07-18 09:09:48 +0000521
Antoine Pitrou38d96432011-12-06 22:33:57 +0100522 def _read_next_chunk_size(self):
523 # Read the next chunk size from the file
524 line = self.fp.readline(_MAXLINE + 1)
525 if len(line) > _MAXLINE:
526 raise LineTooLong("chunk size")
527 i = line.find(b";")
528 if i >= 0:
529 line = line[:i] # strip chunk-extensions
530 try:
531 return int(line, 16)
532 except ValueError:
533 # close the connection as protocol synchronisation is
534 # probably lost
Serhiy Storchakab6c86fd2013-02-06 10:35:40 +0200535 self._close_conn()
Antoine Pitrou38d96432011-12-06 22:33:57 +0100536 raise
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000537
Antoine Pitrou38d96432011-12-06 22:33:57 +0100538 def _read_and_discard_trailer(self):
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000539 # read and discard trailer up to the CRLF terminator
540 ### note: we shouldn't have any trailers!
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000541 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000542 line = self.fp.readline(_MAXLINE + 1)
543 if len(line) > _MAXLINE:
544 raise LineTooLong("trailer line")
Christian Heimes0bd4e112008-02-12 22:59:25 +0000545 if not line:
546 # a vanishingly small number of sites EOF without
547 # sending the trailer
548 break
Senthil Kumaran7e70a5c2012-04-29 10:39:49 +0800549 if line in (b'\r\n', b'\n', b''):
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000550 break
551
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000552 def _get_chunk_left(self):
553 # return self.chunk_left, reading a new chunk if necessary.
554 # chunk_left == 0: at the end of the current chunk, need to close it
555 # chunk_left == None: No current chunk, should read next.
556 # This function returns non-zero or None if the last chunk has
557 # been read.
558 chunk_left = self.chunk_left
559 if not chunk_left: # Can be 0 or None
560 if chunk_left is not None:
Mike53f7a7c2017-12-14 14:04:53 +0300561 # We are at the end of chunk, discard chunk end
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000562 self._safe_read(2) # toss the CRLF at the end of the chunk
563 try:
564 chunk_left = self._read_next_chunk_size()
565 except ValueError:
566 raise IncompleteRead(b'')
567 if chunk_left == 0:
568 # last chunk: 1*("0") [ chunk-extension ] CRLF
569 self._read_and_discard_trailer()
570 # we read everything; close the "file"
571 self._close_conn()
572 chunk_left = None
573 self.chunk_left = chunk_left
574 return chunk_left
575
Bruce Merry152f0b82020-06-25 08:30:21 +0200576 def _read_chunked(self, amt=None):
Antoine Pitrou38d96432011-12-06 22:33:57 +0100577 assert self.chunked != _UNKNOWN
Antoine Pitrou38d96432011-12-06 22:33:57 +0100578 value = []
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000579 try:
580 while True:
581 chunk_left = self._get_chunk_left()
582 if chunk_left is None:
583 break
Bruce Merry152f0b82020-06-25 08:30:21 +0200584
585 if amt is not None and amt <= chunk_left:
586 value.append(self._safe_read(amt))
587 self.chunk_left = chunk_left - amt
588 break
589
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000590 value.append(self._safe_read(chunk_left))
Bruce Merry152f0b82020-06-25 08:30:21 +0200591 if amt is not None:
592 amt -= chunk_left
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000593 self.chunk_left = 0
594 return b''.join(value)
595 except IncompleteRead:
596 raise IncompleteRead(b''.join(value))
Tim Peters230a60c2002-11-09 05:08:07 +0000597
Antoine Pitrou38d96432011-12-06 22:33:57 +0100598 def _readinto_chunked(self, b):
599 assert self.chunked != _UNKNOWN
Antoine Pitrou38d96432011-12-06 22:33:57 +0100600 total_bytes = 0
601 mvb = memoryview(b)
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000602 try:
603 while True:
604 chunk_left = self._get_chunk_left()
605 if chunk_left is None:
606 return total_bytes
Antoine Pitrou4ce6aa42011-12-06 22:34:36 +0100607
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000608 if len(mvb) <= chunk_left:
609 n = self._safe_readinto(mvb)
610 self.chunk_left = chunk_left - n
611 return total_bytes + n
612
613 temp_mvb = mvb[:chunk_left]
Antoine Pitrou38d96432011-12-06 22:33:57 +0100614 n = self._safe_readinto(temp_mvb)
615 mvb = mvb[n:]
616 total_bytes += n
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000617 self.chunk_left = 0
Antoine Pitrou38d96432011-12-06 22:33:57 +0100618
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000619 except IncompleteRead:
620 raise IncompleteRead(bytes(b[0:total_bytes]))
Antoine Pitrou38d96432011-12-06 22:33:57 +0100621
Greg Steindd6eefb2000-07-18 09:09:48 +0000622 def _safe_read(self, amt):
Inada Naokid6bf6f22019-04-06 18:06:19 +0900623 """Read the number of bytes requested.
Greg Steindd6eefb2000-07-18 09:09:48 +0000624
625 This function should be used when <amt> bytes "should" be present for
626 reading. If the bytes are truly not available (due to EOF), then the
627 IncompleteRead exception can be used to detect the problem.
628 """
Inada Naokid6bf6f22019-04-06 18:06:19 +0900629 data = self.fp.read(amt)
630 if len(data) < amt:
631 raise IncompleteRead(data, amt-len(data))
632 return data
Greg Steindd6eefb2000-07-18 09:09:48 +0000633
Antoine Pitrou38d96432011-12-06 22:33:57 +0100634 def _safe_readinto(self, b):
635 """Same as _safe_read, but for reading into a buffer."""
Inada Naokid6bf6f22019-04-06 18:06:19 +0900636 amt = len(b)
637 n = self.fp.readinto(b)
638 if n < amt:
639 raise IncompleteRead(bytes(b[:n]), amt-n)
640 return n
Antoine Pitrou38d96432011-12-06 22:33:57 +0100641
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000642 def read1(self, n=-1):
643 """Read with at most one underlying system call. If at least one
644 byte is buffered, return that instead.
645 """
646 if self.fp is None or self._method == "HEAD":
647 return b""
648 if self.chunked:
649 return self._read1_chunked(n)
Martin Panterce911c32016-03-17 06:42:48 +0000650 if self.length is not None and (n < 0 or n > self.length):
651 n = self.length
Martin Panter741d4942016-10-21 00:52:04 +0000652 result = self.fp.read1(n)
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000653 if not result and n:
654 self._close_conn()
Martin Panterce911c32016-03-17 06:42:48 +0000655 elif self.length is not None:
656 self.length -= len(result)
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000657 return result
658
659 def peek(self, n=-1):
660 # Having this enables IOBase.readline() to read more than one
661 # byte at a time
662 if self.fp is None or self._method == "HEAD":
663 return b""
664 if self.chunked:
665 return self._peek_chunked(n)
666 return self.fp.peek(n)
667
668 def readline(self, limit=-1):
669 if self.fp is None or self._method == "HEAD":
670 return b""
671 if self.chunked:
672 # Fallback to IOBase readline which uses peek() and read()
673 return super().readline(limit)
Martin Panterce911c32016-03-17 06:42:48 +0000674 if self.length is not None and (limit < 0 or limit > self.length):
675 limit = self.length
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000676 result = self.fp.readline(limit)
677 if not result and limit:
678 self._close_conn()
Martin Panterce911c32016-03-17 06:42:48 +0000679 elif self.length is not None:
680 self.length -= len(result)
Kristján Valur Jónsson8e5d0ca2014-03-19 10:07:26 +0000681 return result
682
683 def _read1_chunked(self, n):
684 # Strictly speaking, _get_chunk_left() may cause more than one read,
685 # but that is ok, since that is to satisfy the chunked protocol.
686 chunk_left = self._get_chunk_left()
687 if chunk_left is None or n == 0:
688 return b''
689 if not (0 <= n <= chunk_left):
690 n = chunk_left # if n is negative or larger than chunk_left
691 read = self.fp.read1(n)
692 self.chunk_left -= len(read)
693 if not read:
694 raise IncompleteRead(b"")
695 return read
696
697 def _peek_chunked(self, n):
698 # Strictly speaking, _get_chunk_left() may cause more than one read,
699 # but that is ok, since that is to satisfy the chunked protocol.
700 try:
701 chunk_left = self._get_chunk_left()
702 except IncompleteRead:
703 return b'' # peek doesn't worry about protocol
704 if chunk_left is None:
705 return b'' # eof
706 # peek is allowed to return more than requested. Just request the
707 # entire chunk, and truncate what we get.
708 return self.fp.peek(chunk_left)[:chunk_left]
709
Antoine Pitroub353c122009-02-11 00:39:14 +0000710 def fileno(self):
711 return self.fp.fileno()
712
Greg Steindd6eefb2000-07-18 09:09:48 +0000713 def getheader(self, name, default=None):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700714 '''Returns the value of the header matching *name*.
715
716 If there are multiple matching headers, the values are
717 combined into a single string separated by commas and spaces.
718
719 If no matching header is found, returns *default* or None if
720 the *default* is not specified.
721
722 If the headers are unknown, raises http.client.ResponseNotReady.
723
724 '''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000725 if self.headers is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000726 raise ResponseNotReady()
Senthil Kumaran9f8dc442010-08-02 11:04:58 +0000727 headers = self.headers.get_all(name) or default
728 if isinstance(headers, str) or not hasattr(headers, '__iter__'):
729 return headers
730 else:
731 return ', '.join(headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000732
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000733 def getheaders(self):
734 """Return list of (header, value) tuples."""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000735 if self.headers is None:
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000736 raise ResponseNotReady()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000737 return list(self.headers.items())
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000738
Antoine Pitroub353c122009-02-11 00:39:14 +0000739 # We override IOBase.__iter__ so that it doesn't check for closed-ness
740
741 def __iter__(self):
742 return self
743
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000744 # For compatibility with old-style urllib responses.
745
746 def info(self):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700747 '''Returns an instance of the class mimetools.Message containing
748 meta-information associated with the URL.
749
750 When the method is HTTP, these headers are those returned by
751 the server at the head of the retrieved HTML page (including
752 Content-Length and Content-Type).
753
754 When the method is FTP, a Content-Length header will be
755 present if (as is now usual) the server passed back a file
756 length in response to the FTP retrieval request. A
757 Content-Type header will be present if the MIME type can be
758 guessed.
759
760 When the method is local-file, returned headers will include
Serhiy Storchaka3fd4a732015-12-18 13:10:37 +0200761 a Date representing the file's last-modified time, a
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700762 Content-Length giving file size, and a Content-Type
Serhiy Storchaka3fd4a732015-12-18 13:10:37 +0200763 containing a guess at the file's type. See also the
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700764 description of the mimetools module.
765
766 '''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000767 return self.headers
768
769 def geturl(self):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700770 '''Return the real URL of the page.
771
772 In some cases, the HTTP server redirects a client to another
773 URL. The urlopen() function handles this transparently, but in
774 some cases the caller needs to know which URL the client was
775 redirected to. The geturl() method can be used to get at this
776 redirected URL.
777
778 '''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000779 return self.url
780
781 def getcode(self):
Raymond Hettinger15b87bf2015-08-18 22:03:08 -0700782 '''Return the HTTP status code that was sent with the response,
783 or None if the URL is not an HTTP URL.
784
785 '''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000786 return self.status
Greg Stein5e0fa402000-06-26 08:28:01 +0000787
788class HTTPConnection:
789
Greg Steindd6eefb2000-07-18 09:09:48 +0000790 _http_vsn = 11
791 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000792
Greg Steindd6eefb2000-07-18 09:09:48 +0000793 response_class = HTTPResponse
794 default_port = HTTP_PORT
795 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000796 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000797
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000798 @staticmethod
799 def _is_textIO(stream):
800 """Test whether a file-like object is a text or a binary stream.
801 """
802 return isinstance(stream, io.TextIOBase)
803
804 @staticmethod
805 def _get_content_length(body, method):
806 """Get the content-length based on the body.
807
Martin Panteref91bb22016-08-27 01:39:26 +0000808 If the body is None, we set Content-Length: 0 for methods that expect
809 a body (RFC 7230, Section 3.3.2). We also set the Content-Length for
810 any method if the body is a str or bytes-like object and not a file.
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000811 """
Martin Panteref91bb22016-08-27 01:39:26 +0000812 if body is None:
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000813 # do an explicit check for not None here to distinguish
814 # between unset and set but empty
Martin Panteref91bb22016-08-27 01:39:26 +0000815 if method.upper() in _METHODS_EXPECTING_BODY:
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000816 return 0
817 else:
818 return None
819
820 if hasattr(body, 'read'):
821 # file-like object.
Martin Panteref91bb22016-08-27 01:39:26 +0000822 return None
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000823
824 try:
825 # does it implement the buffer protocol (bytes, bytearray, array)?
826 mv = memoryview(body)
827 return mv.nbytes
828 except TypeError:
829 pass
830
831 if isinstance(body, str):
832 return len(body)
833
834 return None
835
Senthil Kumaran052ddb02013-03-18 14:11:41 -0700836 def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Nir Sofferad455cd2017-11-06 23:16:37 +0200837 source_address=None, blocksize=8192):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000838 self.timeout = timeout
Gregory P. Smithb4066372010-01-03 03:28:29 +0000839 self.source_address = source_address
Nir Sofferad455cd2017-11-06 23:16:37 +0200840 self.blocksize = blocksize
Greg Steindd6eefb2000-07-18 09:09:48 +0000841 self.sock = None
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000842 self._buffer = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000843 self.__response = None
844 self.__state = _CS_IDLE
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000845 self._method = None
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000846 self._tunnel_host = None
847 self._tunnel_port = None
Georg Brandlc7c199f2011-01-22 22:06:24 +0000848 self._tunnel_headers = {}
Tim Petersc411dba2002-07-16 21:35:23 +0000849
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400850 (self.host, self.port) = self._get_hostport(host, port)
851
Ashwin Ramaswami9165add2020-03-14 14:56:06 -0400852 self._validate_host(self.host)
853
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400854 # This is stored as an instance variable to allow unit
855 # tests to replace it with a suitable mockup
856 self._create_connection = socket.create_connection
Greg Stein5e0fa402000-06-26 08:28:01 +0000857
Senthil Kumaran47fff872009-12-20 07:10:31 +0000858 def set_tunnel(self, host, port=None, headers=None):
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400859 """Set up host and port for HTTP CONNECT tunnelling.
Senthil Kumaran47fff872009-12-20 07:10:31 +0000860
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400861 In a connection that uses HTTP CONNECT tunneling, the host passed to the
862 constructor is used as a proxy server that relays all communication to
863 the endpoint passed to `set_tunnel`. This done by sending an HTTP
864 CONNECT request to the proxy server when the connection is established.
865
Fernando Toledo711381d2020-12-21 11:06:31 -0300866 This method must be called before the HTTP connection has been
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400867 established.
868
869 The headers argument should be a mapping of extra HTTP headers to send
870 with the CONNECT request.
Senthil Kumaran47fff872009-12-20 07:10:31 +0000871 """
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400872
873 if self.sock:
874 raise RuntimeError("Can't set up tunnel for established connection")
875
Serhiy Storchaka4ac7ed92014-12-12 09:29:15 +0200876 self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
Senthil Kumaran47fff872009-12-20 07:10:31 +0000877 if headers:
878 self._tunnel_headers = headers
879 else:
880 self._tunnel_headers.clear()
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000881
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400882 def _get_hostport(self, host, port):
Greg Steindd6eefb2000-07-18 09:09:48 +0000883 if port is None:
Skip Montanaro10e6e0e2004-09-14 16:32:02 +0000884 i = host.rfind(':')
Skip Montanarocae14d22004-09-14 17:55:21 +0000885 j = host.rfind(']') # ipv6 addresses have [...]
886 if i > j:
Skip Montanaro9d389972002-03-24 16:53:50 +0000887 try:
888 port = int(host[i+1:])
889 except ValueError:
Łukasz Langaa5a9a9c2011-10-18 21:17:39 +0200890 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
891 port = self.default_port
892 else:
893 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000894 host = host[:i]
895 else:
896 port = self.default_port
Raymond Hettinger4d037912004-10-14 15:23:38 +0000897 if host and host[0] == '[' and host[-1] == ']':
Brett Cannon0a1af4a2004-09-15 23:26:23 +0000898 host = host[1:-1]
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400899
900 return (host, port)
Greg Stein5e0fa402000-06-26 08:28:01 +0000901
Jeremy Hylton30f86742000-09-18 22:50:38 +0000902 def set_debuglevel(self, level):
903 self.debuglevel = level
904
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000905 def _tunnel(self):
Gregory P. Smithc25910a2021-03-07 23:35:13 -0800906 connect = b"CONNECT %s:%d HTTP/1.0\r\n" % (
907 self._tunnel_host.encode("ascii"), self._tunnel_port)
908 headers = [connect]
Georg Brandlc7c199f2011-01-22 22:06:24 +0000909 for header, value in self._tunnel_headers.items():
Gregory P. Smithc25910a2021-03-07 23:35:13 -0800910 headers.append(f"{header}: {value}\r\n".encode("latin-1"))
911 headers.append(b"\r\n")
912 # Making a single send() call instead of one per line encourages
913 # the host OS to use a more optimal packet size instead of
914 # potentially emitting a series of small packets.
915 self.send(b"".join(headers))
916 del headers
Senthil Kumaran47fff872009-12-20 07:10:31 +0000917
Georg Brandlc7c199f2011-01-22 22:06:24 +0000918 response = self.response_class(self.sock, method=self._method)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000919 (version, code, message) = response._read_status()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000920
Serhiy Storchakae4db7692014-12-23 16:28:28 +0200921 if code != http.HTTPStatus.OK:
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000922 self.close()
Gregory P. Smithc25910a2021-03-07 23:35:13 -0800923 raise OSError(f"Tunnel connection failed: {code} {message.strip()}")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000924 while True:
Senthil Kumaran5466bf12010-12-18 16:55:23 +0000925 line = response.fp.readline(_MAXLINE + 1)
926 if len(line) > _MAXLINE:
927 raise LineTooLong("header line")
Senthil Kumaranb12771a2012-04-23 23:50:07 +0800928 if not line:
929 # for sites which EOF without sending a trailer
930 break
Senthil Kumaran7e70a5c2012-04-29 10:39:49 +0800931 if line in (b'\r\n', b'\n', b''):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000932 break
933
Berker Peksagab53ab02015-02-03 12:22:11 +0200934 if self.debuglevel > 0:
935 print('header:', line.decode())
936
Greg Steindd6eefb2000-07-18 09:09:48 +0000937 def connect(self):
938 """Connect to the host and port specified in __init__."""
Saiyang Gou927b8412021-04-23 03:19:08 -0700939 sys.audit("http.client.connect", self, self.host, self.port)
Benjamin Peterson9d8a3ad2015-01-23 11:02:57 -0500940 self.sock = self._create_connection(
941 (self.host,self.port), self.timeout, self.source_address)
942 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
Senthil Kumaran9da047b2014-04-14 13:07:56 -0400943
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000944 if self._tunnel_host:
945 self._tunnel()
Greg Stein5e0fa402000-06-26 08:28:01 +0000946
Greg Steindd6eefb2000-07-18 09:09:48 +0000947 def close(self):
948 """Close the connection to the HTTP server."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000949 self.__state = _CS_IDLE
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300950 try:
951 sock = self.sock
952 if sock:
953 self.sock = None
954 sock.close() # close it manually... there may be other refs
955 finally:
956 response = self.__response
957 if response:
958 self.__response = None
959 response.close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000960
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000961 def send(self, data):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000962 """Send `data' to the server.
963 ``data`` can be a string object, a bytes object, an array object, a
964 file-like object that supports a .read() method, or an iterable object.
965 """
966
Greg Steindd6eefb2000-07-18 09:09:48 +0000967 if self.sock is None:
968 if self.auto_open:
969 self.connect()
970 else:
971 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000972
Jeremy Hylton30f86742000-09-18 22:50:38 +0000973 if self.debuglevel > 0:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000974 print("send:", repr(data))
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000975 if hasattr(data, "read") :
Jeremy Hylton636950f2009-03-28 04:34:21 +0000976 if self.debuglevel > 0:
977 print("sendIng a read()able")
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000978 encode = self._is_textIO(data)
979 if encode and self.debuglevel > 0:
980 print("encoding file using iso-8859-1")
Jeremy Hylton636950f2009-03-28 04:34:21 +0000981 while 1:
Nir Sofferad455cd2017-11-06 23:16:37 +0200982 datablock = data.read(self.blocksize)
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000983 if not datablock:
Jeremy Hylton636950f2009-03-28 04:34:21 +0000984 break
985 if encode:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000986 datablock = datablock.encode("iso-8859-1")
Saiyang Gou927b8412021-04-23 03:19:08 -0700987 sys.audit("http.client.send", self, datablock)
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000988 self.sock.sendall(datablock)
Andrew Svetlov7b2c8bb2013-04-12 22:49:19 +0300989 return
Saiyang Gou927b8412021-04-23 03:19:08 -0700990 sys.audit("http.client.send", self, data)
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000991 try:
Senthil Kumaran5a2da3b2010-10-02 10:33:13 +0000992 self.sock.sendall(data)
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000993 except TypeError:
Serhiy Storchaka2e576f52017-04-24 09:05:00 +0300994 if isinstance(data, collections.abc.Iterable):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000995 for d in data:
996 self.sock.sendall(d)
997 else:
Georg Brandl25e2cd12011-08-03 08:27:00 +0200998 raise TypeError("data should be a bytes-like object "
999 "or an iterable, got %r" % type(data))
Greg Stein5e0fa402000-06-26 08:28:01 +00001000
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001001 def _output(self, s):
1002 """Add a line of output to the current request buffer.
Tim Peters469cdad2002-08-08 20:19:19 +00001003
Jeremy Hyltone3252ec2002-07-16 21:41:43 +00001004 Assumes that the line does *not* end with \\r\\n.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001005 """
1006 self._buffer.append(s)
1007
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001008 def _read_readable(self, readable):
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001009 if self.debuglevel > 0:
1010 print("sendIng a read()able")
1011 encode = self._is_textIO(readable)
1012 if encode and self.debuglevel > 0:
1013 print("encoding file using iso-8859-1")
1014 while True:
Nir Sofferad455cd2017-11-06 23:16:37 +02001015 datablock = readable.read(self.blocksize)
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001016 if not datablock:
1017 break
1018 if encode:
1019 datablock = datablock.encode("iso-8859-1")
1020 yield datablock
1021
1022 def _send_output(self, message_body=None, encode_chunked=False):
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001023 """Send the currently buffered request and clear the buffer.
1024
Jeremy Hyltone3252ec2002-07-16 21:41:43 +00001025 Appends an extra \\r\\n to the buffer.
Benjamin Peterson822b21c2009-01-18 00:04:57 +00001026 A message_body may be specified, to be appended to the request.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001027 """
Martin v. Löwisdd5a8602007-06-30 09:22:09 +00001028 self._buffer.extend((b"", b""))
1029 msg = b"\r\n".join(self._buffer)
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001030 del self._buffer[:]
1031 self.send(msg)
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001032
Benjamin Peterson822b21c2009-01-18 00:04:57 +00001033 if message_body is not None:
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001034
1035 # create a consistent interface to message_body
1036 if hasattr(message_body, 'read'):
1037 # Let file-like take precedence over byte-like. This
1038 # is needed to allow the current position of mmap'ed
1039 # files to be taken into account.
1040 chunks = self._read_readable(message_body)
1041 else:
1042 try:
1043 # this is solely to check to see if message_body
1044 # implements the buffer API. it /would/ be easier
1045 # to capture if PyObject_CheckBuffer was exposed
1046 # to Python.
1047 memoryview(message_body)
1048 except TypeError:
1049 try:
1050 chunks = iter(message_body)
1051 except TypeError:
1052 raise TypeError("message_body should be a bytes-like "
1053 "object or an iterable, got %r"
1054 % type(message_body))
1055 else:
1056 # the object implements the buffer interface and
1057 # can be passed directly into socket methods
1058 chunks = (message_body,)
1059
1060 for chunk in chunks:
1061 if not chunk:
1062 if self.debuglevel > 0:
1063 print('Zero length chunk ignored')
1064 continue
1065
1066 if encode_chunked and self._http_vsn == 11:
1067 # chunked encoding
Eric V. Smith451d0e32016-09-09 21:56:20 -04001068 chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001069 + b'\r\n'
1070 self.send(chunk)
1071
1072 if encode_chunked and self._http_vsn == 11:
1073 # end chunked transfer
1074 self.send(b'0\r\n\r\n')
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001075
Senthil Kumaran5dc504c2016-09-08 14:28:01 -07001076 def putrequest(self, method, url, skip_host=False,
1077 skip_accept_encoding=False):
Greg Steindd6eefb2000-07-18 09:09:48 +00001078 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +00001079
Greg Steindd6eefb2000-07-18 09:09:48 +00001080 `method' specifies an HTTP request method, e.g. 'GET'.
1081 `url' specifies the object being requested, e.g. '/index.html'.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +00001082 `skip_host' if True does not add automatically a 'Host:' header
1083 `skip_accept_encoding' if True does not add automatically an
1084 'Accept-Encoding:' header
Greg Steindd6eefb2000-07-18 09:09:48 +00001085 """
Greg Stein5e0fa402000-06-26 08:28:01 +00001086
Greg Stein616a58d2003-06-24 06:35:19 +00001087 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +00001088 if self.__response and self.__response.isclosed():
1089 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001090
Tim Peters58eb11c2004-01-18 20:29:55 +00001091
Greg Steindd6eefb2000-07-18 09:09:48 +00001092 # in certain cases, we cannot issue another request on this connection.
1093 # this occurs when:
1094 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
1095 # 2) a response to a previous request has signalled that it is going
1096 # to close the connection upon completion.
1097 # 3) the headers for the previous response have not been read, thus
1098 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
1099 #
1100 # if there is no prior response, then we can request at will.
1101 #
1102 # if point (2) is true, then we will have passed the socket to the
1103 # response (effectively meaning, "there is no prior response"), and
1104 # will open a new one when a new request is made.
1105 #
1106 # Note: if a prior response exists, then we *can* start a new request.
1107 # We are not allowed to begin fetching the response to this new
1108 # request, however, until that prior response is complete.
1109 #
1110 if self.__state == _CS_IDLE:
1111 self.__state = _CS_REQ_STARTED
1112 else:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001113 raise CannotSendRequest(self.__state)
Greg Stein5e0fa402000-06-26 08:28:01 +00001114
AMIR8ca8a2e2020-07-19 00:46:10 +04301115 self._validate_method(method)
1116
Jason R. Coombs7774d782019-09-28 08:32:01 -04001117 # Save the method for use later in the response phase
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +00001118 self._method = method
Jason R. Coombs7774d782019-09-28 08:32:01 -04001119
1120 url = url or '/'
1121 self._validate_path(url)
1122
Martin v. Löwisdd5a8602007-06-30 09:22:09 +00001123 request = '%s %s %s' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +00001124
Jason R. Coombs7774d782019-09-28 08:32:01 -04001125 self._output(self._encode_request(request))
Greg Stein5e0fa402000-06-26 08:28:01 +00001126
Greg Steindd6eefb2000-07-18 09:09:48 +00001127 if self._http_vsn == 11:
1128 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +00001129
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001130 if not skip_host:
1131 # this header is issued *only* for HTTP/1.1
1132 # connections. more specifically, this means it is
1133 # only issued when the client uses the new
1134 # HTTPConnection() class. backwards-compat clients
1135 # will be using HTTP/1.0 and those clients may be
1136 # issuing this header themselves. we should NOT issue
1137 # it twice; some web servers (such as Apache) barf
1138 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +00001139
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001140 # If we need a non-standard port,include it in the
1141 # header. If the request is going through a proxy,
1142 # but the host of the actual URL, not the host of the
1143 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +00001144
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001145 netloc = ''
1146 if url.startswith('http'):
1147 nil, netloc, nil, nil, nil = urlsplit(url)
1148
1149 if netloc:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001150 try:
1151 netloc_enc = netloc.encode("ascii")
1152 except UnicodeEncodeError:
1153 netloc_enc = netloc.encode("idna")
1154 self.putheader('Host', netloc_enc)
Jeremy Hylton3921ff62002-03-09 06:07:23 +00001155 else:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001156 if self._tunnel_host:
1157 host = self._tunnel_host
1158 port = self._tunnel_port
1159 else:
1160 host = self.host
1161 port = self.port
1162
Thomas Wouters477c8d52006-05-27 19:21:47 +00001163 try:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001164 host_enc = host.encode("ascii")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001165 except UnicodeEncodeError:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001166 host_enc = host.encode("idna")
Senthil Kumaran74ebd9e2010-11-13 12:27:49 +00001167
1168 # As per RFC 273, IPv6 address should be wrapped with []
1169 # when used as Host header
1170
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001171 if host.find(':') >= 0:
Senthil Kumaran74ebd9e2010-11-13 12:27:49 +00001172 host_enc = b'[' + host_enc + b']'
1173
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001174 if port == self.default_port:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001175 self.putheader('Host', host_enc)
1176 else:
Guido van Rossum98297ee2007-11-06 21:34:58 +00001177 host_enc = host_enc.decode("ascii")
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001178 self.putheader('Host', "%s:%s" % (host_enc, port))
Greg Stein5e0fa402000-06-26 08:28:01 +00001179
Greg Steindd6eefb2000-07-18 09:09:48 +00001180 # note: we are assuming that clients will not attempt to set these
1181 # headers since *this* library must deal with the
1182 # consequences. this also means that when the supporting
1183 # libraries are updated to recognize other forms, then this
1184 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +00001185
Greg Steindd6eefb2000-07-18 09:09:48 +00001186 # we only want a Content-Encoding of "identity" since we don't
1187 # support encodings such as x-gzip or x-deflate.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +00001188 if not skip_accept_encoding:
1189 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +00001190
Greg Steindd6eefb2000-07-18 09:09:48 +00001191 # we can accept "chunked" Transfer-Encodings, but no others
1192 # NOTE: no TE header implies *only* "chunked"
1193 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +00001194
Greg Steindd6eefb2000-07-18 09:09:48 +00001195 # if TE is supplied in the header, then it must appear in a
1196 # Connection header.
1197 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +00001198
Greg Steindd6eefb2000-07-18 09:09:48 +00001199 else:
1200 # For HTTP/1.0, the server will assume "not chunked"
1201 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001202
Jason R. Coombs7774d782019-09-28 08:32:01 -04001203 def _encode_request(self, request):
1204 # ASCII also helps prevent CVE-2019-9740.
1205 return request.encode('ascii')
1206
AMIR8ca8a2e2020-07-19 00:46:10 +04301207 def _validate_method(self, method):
1208 """Validate a method name for putrequest."""
1209 # prevent http header injection
1210 match = _contains_disallowed_method_pchar_re.search(method)
1211 if match:
1212 raise ValueError(
1213 f"method can't contain control characters. {method!r} "
1214 f"(found at least {match.group()!r})")
1215
Jason R. Coombs7774d782019-09-28 08:32:01 -04001216 def _validate_path(self, url):
1217 """Validate a url for putrequest."""
1218 # Prevent CVE-2019-9740.
1219 match = _contains_disallowed_url_pchar_re.search(url)
1220 if match:
1221 raise InvalidURL(f"URL can't contain control characters. {url!r} "
1222 f"(found at least {match.group()!r})")
1223
Ashwin Ramaswami9165add2020-03-14 14:56:06 -04001224 def _validate_host(self, host):
1225 """Validate a host so it doesn't contain control characters."""
1226 # Prevent CVE-2019-18348.
1227 match = _contains_disallowed_url_pchar_re.search(host)
1228 if match:
1229 raise InvalidURL(f"URL can't contain control characters. {host!r} "
1230 f"(found at least {match.group()!r})")
1231
Benjamin Petersonf608c612008-11-16 18:33:53 +00001232 def putheader(self, header, *values):
Greg Steindd6eefb2000-07-18 09:09:48 +00001233 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +00001234
Greg Steindd6eefb2000-07-18 09:09:48 +00001235 For example: h.putheader('Accept', 'text/html')
1236 """
1237 if self.__state != _CS_REQ_STARTED:
1238 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +00001239
Guido van Rossum98297ee2007-11-06 21:34:58 +00001240 if hasattr(header, 'encode'):
1241 header = header.encode('ascii')
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +02001242
1243 if not _is_legal_header_name(header):
1244 raise ValueError('Invalid header name %r' % (header,))
1245
Benjamin Petersonf608c612008-11-16 18:33:53 +00001246 values = list(values)
1247 for i, one_value in enumerate(values):
1248 if hasattr(one_value, 'encode'):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001249 values[i] = one_value.encode('latin-1')
Senthil Kumaran58d5dbf2010-10-03 18:22:42 +00001250 elif isinstance(one_value, int):
1251 values[i] = str(one_value).encode('ascii')
Serhiy Storchakaa112a8a2015-03-12 11:13:36 +02001252
1253 if _is_illegal_header_value(values[i]):
1254 raise ValueError('Invalid header value %r' % (values[i],))
1255
Benjamin Petersonf608c612008-11-16 18:33:53 +00001256 value = b'\r\n\t'.join(values)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001257 header = header + b': ' + value
1258 self._output(header)
Greg Stein5e0fa402000-06-26 08:28:01 +00001259
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001260 def endheaders(self, message_body=None, *, encode_chunked=False):
Benjamin Peterson1742e402008-11-30 22:15:29 +00001261 """Indicate that the last header line has been sent to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +00001262
Senthil Kumaranad87fa62011-10-05 23:26:49 +08001263 This method sends the request to the server. The optional message_body
1264 argument can be used to pass a message body associated with the
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001265 request.
Benjamin Peterson1742e402008-11-30 22:15:29 +00001266 """
Greg Steindd6eefb2000-07-18 09:09:48 +00001267 if self.__state == _CS_REQ_STARTED:
1268 self.__state = _CS_REQ_SENT
1269 else:
1270 raise CannotSendHeader()
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001271 self._send_output(message_body, encode_chunked=encode_chunked)
Greg Stein5e0fa402000-06-26 08:28:01 +00001272
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001273 def request(self, method, url, body=None, headers={}, *,
1274 encode_chunked=False):
Greg Steindd6eefb2000-07-18 09:09:48 +00001275 """Send a complete request to the server."""
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001276 self._send_request(method, url, body, headers, encode_chunked)
Greg Stein5e0fa402000-06-26 08:28:01 +00001277
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001278 def _send_request(self, method, url, body, headers, encode_chunked):
Jeremy Hylton636950f2009-03-28 04:34:21 +00001279 # Honor explicitly requested Host: and Accept-Encoding: headers.
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001280 header_names = frozenset(k.lower() for k in headers)
Jeremy Hylton2c178252004-08-07 16:28:14 +00001281 skips = {}
1282 if 'host' in header_names:
1283 skips['skip_host'] = 1
1284 if 'accept-encoding' in header_names:
1285 skips['skip_accept_encoding'] = 1
Greg Stein5e0fa402000-06-26 08:28:01 +00001286
Jeremy Hylton2c178252004-08-07 16:28:14 +00001287 self.putrequest(method, url, **skips)
1288
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001289 # chunked encoding will happen if HTTP/1.1 is used and either
1290 # the caller passes encode_chunked=True or the following
1291 # conditions hold:
1292 # 1. content-length has not been explicitly set
Martin Panteref91bb22016-08-27 01:39:26 +00001293 # 2. the body is a file or iterable, but not a str or bytes-like
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001294 # 3. Transfer-Encoding has NOT been explicitly set by the caller
1295
R David Murraybeed8402015-03-22 15:18:23 -04001296 if 'content-length' not in header_names:
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001297 # only chunk body if not explicitly set for backwards
1298 # compatibility, assuming the client code is already handling the
1299 # chunking
1300 if 'transfer-encoding' not in header_names:
1301 # if content-length cannot be automatically determined, fall
1302 # back to chunked encoding
1303 encode_chunked = False
1304 content_length = self._get_content_length(body, method)
1305 if content_length is None:
Martin Panteref91bb22016-08-27 01:39:26 +00001306 if body is not None:
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001307 if self.debuglevel > 0:
1308 print('Unable to determine size of %r' % body)
1309 encode_chunked = True
1310 self.putheader('Transfer-Encoding', 'chunked')
1311 else:
1312 self.putheader('Content-Length', str(content_length))
1313 else:
1314 encode_chunked = False
1315
Guido van Rossumcc2b0162007-02-11 06:12:03 +00001316 for hdr, value in headers.items():
Greg Steindd6eefb2000-07-18 09:09:48 +00001317 self.putheader(hdr, value)
Jeremy Hyltonef9f48e2009-03-26 22:04:05 +00001318 if isinstance(body, str):
Jeremy Hylton236654b2009-03-27 20:24:34 +00001319 # RFC 2616 Section 3.7.1 says that text default has a
1320 # default charset of iso-8859-1.
Martin Panter44391482016-02-09 10:20:52 +00001321 body = _encode(body, 'body')
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001322 self.endheaders(body, encode_chunked=encode_chunked)
Greg Stein5e0fa402000-06-26 08:28:01 +00001323
Greg Steindd6eefb2000-07-18 09:09:48 +00001324 def getresponse(self):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001325 """Get the response from the server.
1326
1327 If the HTTPConnection is in the correct state, returns an
1328 instance of HTTPResponse or of whatever object is returned by
Martin Pantercc71a792016-04-05 06:19:42 +00001329 the response_class variable.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001330
1331 If a request has not been sent or if a previous response has
1332 not be handled, ResponseNotReady is raised. If the HTTP
1333 response indicates that the connection should be closed, then
1334 it will be closed before the response is returned. When the
1335 connection is closed, the underlying socket is closed.
1336 """
Greg Stein5e0fa402000-06-26 08:28:01 +00001337
Greg Stein616a58d2003-06-24 06:35:19 +00001338 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +00001339 if self.__response and self.__response.isclosed():
1340 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001341
Greg Steindd6eefb2000-07-18 09:09:48 +00001342 # if a prior response exists, then it must be completed (otherwise, we
1343 # cannot read this response's header to determine the connection-close
1344 # behavior)
1345 #
1346 # note: if a prior response existed, but was connection-close, then the
1347 # socket and response were made independent of this HTTPConnection
1348 # object since a new request requires that we open a whole new
1349 # connection
1350 #
1351 # this means the prior response had one of two states:
1352 # 1) will_close: this connection was reset and the prior socket and
1353 # response operate independently
1354 # 2) persistent: the response was retained and we await its
1355 # isclosed() status to become true.
1356 #
1357 if self.__state != _CS_REQ_SENT or self.__response:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001358 raise ResponseNotReady(self.__state)
Greg Stein5e0fa402000-06-26 08:28:01 +00001359
Jeremy Hylton30f86742000-09-18 22:50:38 +00001360 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001361 response = self.response_class(self.sock, self.debuglevel,
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +00001362 method=self._method)
Jeremy Hylton30f86742000-09-18 22:50:38 +00001363 else:
Antoine Pitrou988dbd72010-12-17 17:35:56 +00001364 response = self.response_class(self.sock, method=self._method)
Greg Stein5e0fa402000-06-26 08:28:01 +00001365
Serhiy Storchakab491e052014-12-01 13:07:45 +02001366 try:
R David Murraycae7bdb2015-04-05 19:26:29 -04001367 try:
1368 response.begin()
1369 except ConnectionError:
1370 self.close()
1371 raise
Serhiy Storchakab491e052014-12-01 13:07:45 +02001372 assert response.will_close != _UNKNOWN
1373 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +00001374
Serhiy Storchakab491e052014-12-01 13:07:45 +02001375 if response.will_close:
1376 # this effectively passes the connection to the response
1377 self.close()
1378 else:
1379 # remember this, so we can tell when it is complete
1380 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +00001381
Serhiy Storchakab491e052014-12-01 13:07:45 +02001382 return response
1383 except:
1384 response.close()
1385 raise
Greg Stein5e0fa402000-06-26 08:28:01 +00001386
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001387try:
1388 import ssl
Brett Cannoncd171c82013-07-04 17:43:24 -04001389except ImportError:
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001390 pass
1391else:
1392 class HTTPSConnection(HTTPConnection):
1393 "This class allows communication via SSL."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001394
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001395 default_port = HTTPS_PORT
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001396
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001397 # XXX Should key_file and cert_file be deprecated in favour of context?
1398
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001399 def __init__(self, host, port=None, key_file=None, cert_file=None,
Senthil Kumaran052ddb02013-03-18 14:11:41 -07001400 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1401 source_address=None, *, context=None,
Nir Sofferad455cd2017-11-06 23:16:37 +02001402 check_hostname=None, blocksize=8192):
Senthil Kumaran052ddb02013-03-18 14:11:41 -07001403 super(HTTPSConnection, self).__init__(host, port, timeout,
Nir Sofferad455cd2017-11-06 23:16:37 +02001404 source_address,
1405 blocksize=blocksize)
Christian Heimesd0486372016-09-10 23:23:33 +02001406 if (key_file is not None or cert_file is not None or
1407 check_hostname is not None):
1408 import warnings
1409 warnings.warn("key_file, cert_file and check_hostname are "
1410 "deprecated, use a custom context instead.",
1411 DeprecationWarning, 2)
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001412 self.key_file = key_file
1413 self.cert_file = cert_file
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001414 if context is None:
Benjamin Peterson4ffb0752014-11-03 14:29:33 -05001415 context = ssl._create_default_https_context()
Christian Heimesf97406b2020-11-13 16:37:52 +01001416 # send ALPN extension to indicate HTTP/1.1 protocol
1417 if self._http_vsn == 11:
1418 context.set_alpn_protocols(['http/1.1'])
Christian Heimesd1bd6e72019-07-01 08:32:24 +02001419 # enable PHA for TLS 1.3 connections if available
1420 if context.post_handshake_auth is not None:
1421 context.post_handshake_auth = True
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001422 will_verify = context.verify_mode != ssl.CERT_NONE
1423 if check_hostname is None:
Benjamin Petersona090f012014-12-07 13:18:25 -05001424 check_hostname = context.check_hostname
1425 if check_hostname and not will_verify:
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001426 raise ValueError("check_hostname needs a SSL context with "
1427 "either CERT_OPTIONAL or CERT_REQUIRED")
1428 if key_file or cert_file:
Antoine Pitrou80603992010-10-13 10:45:16 +00001429 context.load_cert_chain(cert_file, key_file)
Christian Heimesd1bd6e72019-07-01 08:32:24 +02001430 # cert and key file means the user wants to authenticate.
1431 # enable TLS 1.3 PHA implicitly even for custom contexts.
1432 if context.post_handshake_auth is not None:
1433 context.post_handshake_auth = True
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001434 self._context = context
Christian Heimes61d478c2018-01-27 15:51:38 +01001435 if check_hostname is not None:
1436 self._context.check_hostname = check_hostname
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001437
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001438 def connect(self):
1439 "Connect to a host on a given (SSL) port."
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001440
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001441 super().connect()
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001442
1443 if self._tunnel_host:
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001444 server_hostname = self._tunnel_host
1445 else:
1446 server_hostname = self.host
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001447
Senthil Kumaran9da047b2014-04-14 13:07:56 -04001448 self.sock = self._context.wrap_socket(self.sock,
Benjamin Peterson7243b572014-11-23 17:04:34 -06001449 server_hostname=server_hostname)
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001450
Thomas Wouters47b49bf2007-08-30 22:15:33 +00001451 __all__.append("HTTPSConnection")
Greg Stein5e0fa402000-06-26 08:28:01 +00001452
Greg Stein5e0fa402000-06-26 08:28:01 +00001453class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001454 # Subclasses that define an __init__ must call Exception.__init__
1455 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +00001456 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001457
1458class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001459 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001460
Skip Montanaro9d389972002-03-24 16:53:50 +00001461class InvalidURL(HTTPException):
1462 pass
1463
Greg Stein5e0fa402000-06-26 08:28:01 +00001464class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001465 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001466 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001467 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001468
1469class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001470 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001471
Greg Stein5e0fa402000-06-26 08:28:01 +00001472class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001473 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001474
1475class IncompleteRead(HTTPException):
Benjamin Peterson6accb982009-03-02 22:50:25 +00001476 def __init__(self, partial, expected=None):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001477 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001478 self.partial = partial
Benjamin Peterson6accb982009-03-02 22:50:25 +00001479 self.expected = expected
1480 def __repr__(self):
1481 if self.expected is not None:
1482 e = ', %i more expected' % self.expected
1483 else:
1484 e = ''
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03001485 return '%s(%i bytes read%s)' % (self.__class__.__name__,
1486 len(self.partial), e)
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001487 __str__ = object.__str__
Greg Stein5e0fa402000-06-26 08:28:01 +00001488
1489class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001490 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001491
1492class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001493 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001494
1495class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001496 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001497
1498class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001499 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001500
1501class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001502 def __init__(self, line):
Benjamin Peterson11dbfd42010-03-21 22:50:04 +00001503 if not line:
1504 line = repr(line)
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001505 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001506 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001507
Senthil Kumaran5466bf12010-12-18 16:55:23 +00001508class LineTooLong(HTTPException):
1509 def __init__(self, line_type):
1510 HTTPException.__init__(self, "got more than %d bytes when reading %s"
1511 % (_MAXLINE, line_type))
1512
R David Murraycae7bdb2015-04-05 19:26:29 -04001513class RemoteDisconnected(ConnectionResetError, BadStatusLine):
1514 def __init__(self, *pos, **kw):
1515 BadStatusLine.__init__(self, "")
1516 ConnectionResetError.__init__(self, *pos, **kw)
1517
Greg Stein5e0fa402000-06-26 08:28:01 +00001518# for backwards compatibility
1519error = HTTPException