blob: fd66cfd3c9467210cae3711417a3ea5f81bf3f3b [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006HTTPConnection goes through a number of "states", which define when a client
Greg Stein5e0fa402000-06-26 08:28:01 +00007may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000069import errno
Jeremy Hyltonfb35f652007-08-03 20:30:33 +000070import io
Guido van Rossum65ab98c1995-08-07 20:13:02 +000071import mimetools
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000072import socket
Jeremy Hylton8acf1e02002-03-08 19:35:51 +000073from urlparse import urlsplit
Guido van Rossum23acc951994-02-21 16:36:04 +000074
Skip Montanaro951a8842001-06-01 16:25:38 +000075__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
76 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000077 "UnknownTransferEncoding", "UnimplementedFileMode",
78 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
79 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
Georg Brandl6aab16e2006-02-17 19:17:25 +000080 "BadStatusLine", "error", "responses"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000081
Guido van Rossum23acc951994-02-21 16:36:04 +000082HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000083HTTPS_PORT = 443
84
Greg Stein5e0fa402000-06-26 08:28:01 +000085_UNKNOWN = 'UNKNOWN'
86
87# connection states
88_CS_IDLE = 'Idle'
89_CS_REQ_STARTED = 'Request-started'
90_CS_REQ_SENT = 'Request-sent'
91
Martin v. Löwis39a31782004-09-18 09:03:49 +000092# status codes
93# informational
94CONTINUE = 100
95SWITCHING_PROTOCOLS = 101
96PROCESSING = 102
97
98# successful
99OK = 200
100CREATED = 201
101ACCEPTED = 202
102NON_AUTHORITATIVE_INFORMATION = 203
103NO_CONTENT = 204
104RESET_CONTENT = 205
105PARTIAL_CONTENT = 206
106MULTI_STATUS = 207
107IM_USED = 226
108
109# redirection
110MULTIPLE_CHOICES = 300
111MOVED_PERMANENTLY = 301
112FOUND = 302
113SEE_OTHER = 303
114NOT_MODIFIED = 304
115USE_PROXY = 305
116TEMPORARY_REDIRECT = 307
117
118# client error
119BAD_REQUEST = 400
120UNAUTHORIZED = 401
121PAYMENT_REQUIRED = 402
122FORBIDDEN = 403
123NOT_FOUND = 404
124METHOD_NOT_ALLOWED = 405
125NOT_ACCEPTABLE = 406
126PROXY_AUTHENTICATION_REQUIRED = 407
127REQUEST_TIMEOUT = 408
128CONFLICT = 409
129GONE = 410
130LENGTH_REQUIRED = 411
131PRECONDITION_FAILED = 412
132REQUEST_ENTITY_TOO_LARGE = 413
133REQUEST_URI_TOO_LONG = 414
134UNSUPPORTED_MEDIA_TYPE = 415
135REQUESTED_RANGE_NOT_SATISFIABLE = 416
136EXPECTATION_FAILED = 417
137UNPROCESSABLE_ENTITY = 422
138LOCKED = 423
139FAILED_DEPENDENCY = 424
140UPGRADE_REQUIRED = 426
141
142# server error
143INTERNAL_SERVER_ERROR = 500
144NOT_IMPLEMENTED = 501
145BAD_GATEWAY = 502
146SERVICE_UNAVAILABLE = 503
147GATEWAY_TIMEOUT = 504
148HTTP_VERSION_NOT_SUPPORTED = 505
149INSUFFICIENT_STORAGE = 507
150NOT_EXTENDED = 510
151
Georg Brandl6aab16e2006-02-17 19:17:25 +0000152# Mapping status codes to official W3C names
153responses = {
154 100: 'Continue',
155 101: 'Switching Protocols',
156
157 200: 'OK',
158 201: 'Created',
159 202: 'Accepted',
160 203: 'Non-Authoritative Information',
161 204: 'No Content',
162 205: 'Reset Content',
163 206: 'Partial Content',
164
165 300: 'Multiple Choices',
166 301: 'Moved Permanently',
167 302: 'Found',
168 303: 'See Other',
169 304: 'Not Modified',
170 305: 'Use Proxy',
171 306: '(Unused)',
172 307: 'Temporary Redirect',
173
174 400: 'Bad Request',
175 401: 'Unauthorized',
176 402: 'Payment Required',
177 403: 'Forbidden',
178 404: 'Not Found',
179 405: 'Method Not Allowed',
180 406: 'Not Acceptable',
181 407: 'Proxy Authentication Required',
182 408: 'Request Timeout',
183 409: 'Conflict',
184 410: 'Gone',
185 411: 'Length Required',
186 412: 'Precondition Failed',
187 413: 'Request Entity Too Large',
188 414: 'Request-URI Too Long',
189 415: 'Unsupported Media Type',
190 416: 'Requested Range Not Satisfiable',
191 417: 'Expectation Failed',
192
193 500: 'Internal Server Error',
194 501: 'Not Implemented',
195 502: 'Bad Gateway',
196 503: 'Service Unavailable',
197 504: 'Gateway Timeout',
198 505: 'HTTP Version Not Supported',
199}
200
Georg Brandl80ba8e82005-09-29 20:16:07 +0000201# maximal amount of data to read at one time in _safe_read
202MAXAMOUNT = 1048576
203
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000204class HTTPMessage(mimetools.Message):
205
206 def addheader(self, key, value):
207 """Add header for field key handling repeats."""
208 prev = self.dict.get(key)
209 if prev is None:
210 self.dict[key] = value
211 else:
212 combined = ", ".join((prev, value))
213 self.dict[key] = combined
214
215 def addcontinue(self, key, more):
216 """Add more field data from a continuation line."""
217 prev = self.dict[key]
218 self.dict[key] = prev + "\n " + more
219
220 def readheaders(self):
221 """Read header lines.
222
223 Read header lines up to the entirely blank line that terminates them.
224 The (normally blank) line that ends the headers is skipped, but not
225 included in the returned list. If a non-header line ends the headers,
226 (which is an error), an attempt is made to backspace over it; it is
227 never included in the returned list.
228
229 The variable self.status is set to the empty string if all went well,
230 otherwise it is an error message. The variable self.headers is a
231 completely uninterpreted list of lines contained in the header (so
232 printing them will reproduce the header exactly as it appears in the
233 file).
234
235 If multiple header fields with the same name occur, they are combined
236 according to the rules in RFC 2616 sec 4.2:
237
238 Appending each subsequent field-value to the first, each separated
239 by a comma. The order in which header fields with the same field-name
240 are received is significant to the interpretation of the combined
241 field value.
242 """
243 # XXX The implementation overrides the readheaders() method of
244 # rfc822.Message. The base class design isn't amenable to
245 # customized behavior here so the method here is a copy of the
246 # base class code with a few small changes.
247
248 self.dict = {}
249 self.unixfrom = ''
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000250 self.headers = hlist = []
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000251 self.status = ''
252 headerseen = ""
253 firstline = 1
254 startofline = unread = tell = None
255 if hasattr(self.fp, 'unread'):
256 unread = self.fp.unread
257 elif self.seekable:
258 tell = self.fp.tell
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000259 while True:
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000260 if tell:
261 try:
262 startofline = tell()
263 except IOError:
264 startofline = tell = None
265 self.seekable = 0
Jeremy Hylton811fc142007-08-03 13:30:02 +0000266 line = str(self.fp.readline(), "iso-8859-1")
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000267 if not line:
268 self.status = 'EOF in headers'
269 break
270 # Skip unix From name time lines
271 if firstline and line.startswith('From '):
272 self.unixfrom = self.unixfrom + line
273 continue
274 firstline = 0
275 if headerseen and line[0] in ' \t':
276 # XXX Not sure if continuation lines are handled properly
277 # for http and/or for repeating headers
278 # It's a continuation line.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000279 hlist.append(line)
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000280 self.addcontinue(headerseen, line.strip())
281 continue
282 elif self.iscomment(line):
283 # It's a comment. Ignore it.
284 continue
285 elif self.islast(line):
286 # Note! No pushback here! The delimiter line gets eaten.
287 break
288 headerseen = self.isheader(line)
289 if headerseen:
290 # It's a legal header line, save it.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000291 hlist.append(line)
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000292 self.addheader(headerseen, line[len(headerseen)+1:].strip())
293 continue
294 else:
295 # It's not a header line; throw it back and stop here.
296 if not self.dict:
297 self.status = 'No headers'
298 else:
299 self.status = 'Non-header line where header expected'
300 # Try to undo the read.
301 if unread:
302 unread(line)
303 elif tell:
304 self.fp.seek(startofline)
305 else:
306 self.status = self.status + '; bad seek'
307 break
Greg Stein5e0fa402000-06-26 08:28:01 +0000308
Jeremy Hylton97043c32007-08-04 02:34:24 +0000309class HTTPResponse:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000310
311 # strict: If true, raise BadStatusLine if the status line can't be
312 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
Skip Montanaro186bec22002-07-25 16:10:38 +0000313 # false because it prevents clients from talking to HTTP/0.9
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000314 # servers. Note that a response with a sufficiently corrupted
315 # status line will look like an HTTP/0.9 response.
316
317 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
318
Jeremy Hylton811fc142007-08-03 13:30:02 +0000319 # The bytes from the socket object are iso-8859-1 strings.
320 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
321 # text following RFC 2047. The basic status line parsing only
322 # accepts iso-8859-1.
323
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000324 def __init__(self, sock, debuglevel=0, strict=0, method=None):
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000325 # XXX If the response includes a content-length header, we
326 # need to make sure that the client doesn't read more than the
327 # specified number of bytes. If it does, it will block until
328 # the server times out and closes the connection. (The only
329 # applies to HTTP/1.1 connections.) Since some clients access
330 # self.fp directly rather than calling read(), this is a little
331 # tricky.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000332 self.fp = sock.makefile("rb", 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000333 self.debuglevel = debuglevel
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000334 self.strict = strict
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000335 self._method = method
Greg Stein5e0fa402000-06-26 08:28:01 +0000336
Greg Steindd6eefb2000-07-18 09:09:48 +0000337 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000338
Greg Steindd6eefb2000-07-18 09:09:48 +0000339 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000340 self.version = _UNKNOWN # HTTP-Version
341 self.status = _UNKNOWN # Status-Code
342 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000343
Tim Peters07e99cb2001-01-14 23:47:14 +0000344 self.chunked = _UNKNOWN # is "chunked" being used?
345 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
346 self.length = _UNKNOWN # number of bytes left in response
347 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000348
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000349 def _read_status(self):
Jeremy Hylton04319c72007-08-04 03:41:19 +0000350 # Initialize with Simple-Response defaults.
Jeremy Hylton811fc142007-08-03 13:30:02 +0000351 line = str(self.fp.readline(), "iso-8859-1")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000352 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000353 print("reply:", repr(line))
Jeremy Hyltonb6769522003-06-29 17:55:05 +0000354 if not line:
355 # Presumably, the server closed the connection before
356 # sending a valid response.
357 raise BadStatusLine(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000358 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000359 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000360 except ValueError:
361 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000362 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000363 reason = ""
364 except ValueError:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000365 # empty version will cause next test to fail and status
366 # will be treated as 0.9 response.
367 version = ""
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000368 if not version.startswith("HTTP/"):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000369 if self.strict:
370 self.close()
371 raise BadStatusLine(line)
372 else:
Jeremy Hylton04319c72007-08-04 03:41:19 +0000373 # Assume it's a Simple-Response from an 0.9 server.
374 # We have to convert the first line back to raw bytes
375 # because self.fp.readline() needs to return bytes.
376 self.fp = LineAndFileWrapper(bytes(line), self.fp)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000377 return "HTTP/0.9", 200, ""
Greg Stein5e0fa402000-06-26 08:28:01 +0000378
Jeremy Hylton23d40472001-04-13 14:57:08 +0000379 # The status code is a three-digit number
380 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000381 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000382 if status < 100 or status > 999:
383 raise BadStatusLine(line)
384 except ValueError:
385 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000386 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000387
Jeremy Hylton39c03802002-07-12 14:04:09 +0000388 def begin(self):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000389 if self.msg is not None:
390 # we've already started reading the response
391 return
392
393 # read until we get a non-100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000394 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000395 version, status, reason = self._read_status()
Martin v. Löwis39a31782004-09-18 09:03:49 +0000396 if status != CONTINUE:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000397 break
398 # skip the header from the 100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000399 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000400 skip = self.fp.readline().strip()
401 if not skip:
402 break
403 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000404 print("header:", skip)
Tim Petersc411dba2002-07-16 21:35:23 +0000405
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000406 self.status = status
407 self.reason = reason.strip()
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000408 if version == "HTTP/1.0":
Greg Steindd6eefb2000-07-18 09:09:48 +0000409 self.version = 10
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000410 elif version.startswith("HTTP/1."):
Tim Peters07e99cb2001-01-14 23:47:14 +0000411 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000412 elif version == "HTTP/0.9":
Jeremy Hylton110941a2000-10-12 19:58:36 +0000413 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000414 else:
415 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000416
Jeremy Hylton110941a2000-10-12 19:58:36 +0000417 if self.version == 9:
Georg Brandl0aade9a2005-06-26 22:06:54 +0000418 self.length = None
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000419 self.chunked = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000420 self.will_close = 1
Jeremy Hylton4e7855d2007-08-04 03:34:03 +0000421 self.msg = HTTPMessage(io.BytesIO())
Jeremy Hylton110941a2000-10-12 19:58:36 +0000422 return
423
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000424 self.msg = HTTPMessage(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000425 if self.debuglevel > 0:
426 for hdr in self.msg.headers:
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000427 print("header:", hdr, end=" ")
Greg Stein5e0fa402000-06-26 08:28:01 +0000428
Greg Steindd6eefb2000-07-18 09:09:48 +0000429 # don't let the msg keep an fp
430 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000431
Greg Steindd6eefb2000-07-18 09:09:48 +0000432 # are we using the chunked-style of transfer encoding?
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000433 tr_enc = self.msg.getheader("transfer-encoding")
Jeremy Hyltond229b3a2002-09-03 19:24:24 +0000434 if tr_enc and tr_enc.lower() == "chunked":
Greg Steindd6eefb2000-07-18 09:09:48 +0000435 self.chunked = 1
436 self.chunk_left = None
437 else:
438 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000439
Greg Steindd6eefb2000-07-18 09:09:48 +0000440 # will the connection close at the end of the response?
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000441 self.will_close = self._check_close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000442
Greg Steindd6eefb2000-07-18 09:09:48 +0000443 # do we have a Content-Length?
444 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000445 self.length = None
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000446 length = self.msg.getheader("content-length")
Greg Steindd6eefb2000-07-18 09:09:48 +0000447 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000448 try:
449 self.length = int(length)
450 except ValueError:
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000451 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000452
Greg Steindd6eefb2000-07-18 09:09:48 +0000453 # does the body have a fixed length? (of zero)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000454 if (status == NO_CONTENT or status == NOT_MODIFIED or
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000455 100 <= status < 200 or # 1xx codes
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000456 self._method == "HEAD"):
Greg Steindd6eefb2000-07-18 09:09:48 +0000457 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000458
Greg Steindd6eefb2000-07-18 09:09:48 +0000459 # if the connection remains open, and we aren't using chunked, and
460 # a content-length was not provided, then assume that the connection
461 # WILL close.
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000462 if (not self.will_close and
463 not self.chunked and
464 self.length is None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000465 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000466
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000467 def _check_close(self):
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000468 conn = self.msg.getheader("connection")
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000469 if self.version == 11:
470 # An HTTP/1.1 proxy is assumed to stay open unless
471 # explicitly closed.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000472 conn = self.msg.getheader("connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000473 if conn and "close" in conn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000474 return True
475 return False
476
Jeremy Hylton2c178252004-08-07 16:28:14 +0000477 # Some HTTP/1.0 implementations have support for persistent
478 # connections, using rules different than HTTP/1.1.
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000479
480 # For older HTTP, Keep-Alive indiciates persistent connection.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000481 if self.msg.getheader("keep-alive"):
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000482 return False
Tim Peters77c06fb2002-11-24 02:35:35 +0000483
Jeremy Hylton2c178252004-08-07 16:28:14 +0000484 # At least Akamai returns a "Connection: Keep-Alive" header,
485 # which was supposed to be sent by the client.
486 if conn and "keep-alive" in conn.lower():
487 return False
488
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000489 # Proxy-Connection is a netscape hack.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000490 pconn = self.msg.getheader("proxy-connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000491 if pconn and "keep-alive" in pconn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000492 return False
493
494 # otherwise, assume it will close
495 return True
496
Greg Steindd6eefb2000-07-18 09:09:48 +0000497 def close(self):
498 if self.fp:
499 self.fp.close()
500 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000501
Jeremy Hyltondf5f6b52007-08-08 17:36:33 +0000502 # These implementations are for the benefit of io.BufferedReader.
503
504 # XXX This class should probably be revised to act more like
505 # the "raw stream" that BufferedReader expects.
506
507 @property
508 def closed(self):
509 return self.isclosed()
510
511 def flush(self):
512 self.fp.flush()
513
514 # End of "raw stream" methods
515
Greg Steindd6eefb2000-07-18 09:09:48 +0000516 def isclosed(self):
517 # NOTE: it is possible that we will not ever call self.close(). This
518 # case occurs when will_close is TRUE, length is None, and we
519 # read up to the last byte, but NOT past it.
520 #
521 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
522 # called, meaning self.isclosed() is meaningful.
523 return self.fp is None
524
Jeremy Hylton2c178252004-08-07 16:28:14 +0000525 # XXX It would be nice to have readline and __iter__ for this, too.
526
Greg Steindd6eefb2000-07-18 09:09:48 +0000527 def read(self, amt=None):
528 if self.fp is None:
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000529 return ""
Greg Steindd6eefb2000-07-18 09:09:48 +0000530
531 if self.chunked:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000532 return self._read_chunked(amt)
Tim Peters230a60c2002-11-09 05:08:07 +0000533
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000534 if amt is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000535 # unbounded read
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000536 if self.length is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000537 s = self.fp.read()
538 else:
539 s = self._safe_read(self.length)
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000540 self.length = 0
Tim Peters07e99cb2001-01-14 23:47:14 +0000541 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000542 return s
543
544 if self.length is not None:
545 if amt > self.length:
546 # clip the read to the "end of response"
547 amt = self.length
Greg Steindd6eefb2000-07-18 09:09:48 +0000548
549 # we do not use _safe_read() here because this may be a .will_close
550 # connection, and the user is reading more bytes than will be provided
551 # (for example, reading in 1k chunks)
552 s = self.fp.read(amt)
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000553 if self.length is not None:
554 self.length -= len(s)
Greg Steindd6eefb2000-07-18 09:09:48 +0000555
Greg Steindd6eefb2000-07-18 09:09:48 +0000556 return s
557
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000558 def _read_chunked(self, amt):
559 assert self.chunked != _UNKNOWN
560 chunk_left = self.chunk_left
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000561 value = ""
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000562
563 # XXX This accumulates chunks by repeated string concatenation,
564 # which is not efficient as the number or size of chunks gets big.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000565 while True:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000566 if chunk_left is None:
567 line = self.fp.readline()
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000568 i = line.find(";")
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000569 if i >= 0:
570 line = line[:i] # strip chunk-extensions
571 chunk_left = int(line, 16)
572 if chunk_left == 0:
573 break
574 if amt is None:
575 value += self._safe_read(chunk_left)
576 elif amt < chunk_left:
577 value += self._safe_read(amt)
578 self.chunk_left = chunk_left - amt
579 return value
580 elif amt == chunk_left:
581 value += self._safe_read(amt)
582 self._safe_read(2) # toss the CRLF at the end of the chunk
583 self.chunk_left = None
584 return value
585 else:
586 value += self._safe_read(chunk_left)
587 amt -= chunk_left
588
589 # we read the whole chunk, get another
590 self._safe_read(2) # toss the CRLF at the end of the chunk
591 chunk_left = None
592
593 # read and discard trailer up to the CRLF terminator
594 ### note: we shouldn't have any trailers!
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000595 while True:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000596 line = self.fp.readline()
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000597 if line == "\r\n":
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000598 break
599
600 # we read everything; close the "file"
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000601 self.close()
602
603 return value
Tim Peters230a60c2002-11-09 05:08:07 +0000604
Greg Steindd6eefb2000-07-18 09:09:48 +0000605 def _safe_read(self, amt):
606 """Read the number of bytes requested, compensating for partial reads.
607
608 Normally, we have a blocking socket, but a read() can be interrupted
609 by a signal (resulting in a partial read).
610
611 Note that we cannot distinguish between EOF and an interrupt when zero
612 bytes have been read. IncompleteRead() will be raised in this
613 situation.
614
615 This function should be used when <amt> bytes "should" be present for
616 reading. If the bytes are truly not available (due to EOF), then the
617 IncompleteRead exception can be used to detect the problem.
618 """
Georg Brandl80ba8e82005-09-29 20:16:07 +0000619 s = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000620 while amt > 0:
Georg Brandl80ba8e82005-09-29 20:16:07 +0000621 chunk = self.fp.read(min(amt, MAXAMOUNT))
Greg Steindd6eefb2000-07-18 09:09:48 +0000622 if not chunk:
623 raise IncompleteRead(s)
Georg Brandl80ba8e82005-09-29 20:16:07 +0000624 s.append(chunk)
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000625 amt -= len(chunk)
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000626 return "".join(s)
Greg Steindd6eefb2000-07-18 09:09:48 +0000627
628 def getheader(self, name, default=None):
629 if self.msg is None:
630 raise ResponseNotReady()
631 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000632
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000633 def getheaders(self):
634 """Return list of (header, value) tuples."""
635 if self.msg is None:
636 raise ResponseNotReady()
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000637 return list(self.msg.items())
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000638
Greg Stein5e0fa402000-06-26 08:28:01 +0000639
640class HTTPConnection:
641
Greg Steindd6eefb2000-07-18 09:09:48 +0000642 _http_vsn = 11
643 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000644
Greg Steindd6eefb2000-07-18 09:09:48 +0000645 response_class = HTTPResponse
646 default_port = HTTP_PORT
647 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000648 debuglevel = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000649 strict = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000650
Guido van Rossumd8faa362007-04-27 19:54:29 +0000651 def __init__(self, host, port=None, strict=None, timeout=None):
652 self.timeout = timeout
Greg Steindd6eefb2000-07-18 09:09:48 +0000653 self.sock = None
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000654 self._buffer = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000655 self.__response = None
656 self.__state = _CS_IDLE
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000657 self._method = None
Tim Petersc411dba2002-07-16 21:35:23 +0000658
Greg Steindd6eefb2000-07-18 09:09:48 +0000659 self._set_hostport(host, port)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000660 if strict is not None:
661 self.strict = strict
Greg Stein5e0fa402000-06-26 08:28:01 +0000662
Greg Steindd6eefb2000-07-18 09:09:48 +0000663 def _set_hostport(self, host, port):
664 if port is None:
Skip Montanaro10e6e0e2004-09-14 16:32:02 +0000665 i = host.rfind(':')
Skip Montanarocae14d22004-09-14 17:55:21 +0000666 j = host.rfind(']') # ipv6 addresses have [...]
667 if i > j:
Skip Montanaro9d389972002-03-24 16:53:50 +0000668 try:
669 port = int(host[i+1:])
670 except ValueError:
Jeremy Hyltonfbd79942002-07-02 20:19:08 +0000671 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000672 host = host[:i]
673 else:
674 port = self.default_port
Raymond Hettinger4d037912004-10-14 15:23:38 +0000675 if host and host[0] == '[' and host[-1] == ']':
Brett Cannon0a1af4a2004-09-15 23:26:23 +0000676 host = host[1:-1]
Greg Steindd6eefb2000-07-18 09:09:48 +0000677 self.host = host
678 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000679
Jeremy Hylton30f86742000-09-18 22:50:38 +0000680 def set_debuglevel(self, level):
681 self.debuglevel = level
682
Greg Steindd6eefb2000-07-18 09:09:48 +0000683 def connect(self):
684 """Connect to the host and port specified in __init__."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000685 self.sock = socket.create_connection((self.host,self.port),
686 self.timeout)
Greg Stein5e0fa402000-06-26 08:28:01 +0000687
Greg Steindd6eefb2000-07-18 09:09:48 +0000688 def close(self):
689 """Close the connection to the HTTP server."""
690 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000691 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000692 self.sock = None
693 if self.__response:
694 self.__response.close()
695 self.__response = None
696 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000697
Greg Steindd6eefb2000-07-18 09:09:48 +0000698 def send(self, str):
699 """Send `str' to the server."""
700 if self.sock is None:
701 if self.auto_open:
702 self.connect()
703 else:
704 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000705
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000706 # send the data to the server. if we get a broken pipe, then close
Greg Steindd6eefb2000-07-18 09:09:48 +0000707 # the socket. we want to reconnect when somebody tries to send again.
708 #
709 # NOTE: we DO propagate the error, though, because we cannot simply
710 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000711 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000712 print("send:", repr(str))
Greg Steindd6eefb2000-07-18 09:09:48 +0000713 try:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000714 blocksize=8192
715 if hasattr(str,'read') :
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000716 if self.debuglevel > 0: print("sendIng a read()able")
Thomas Wouters89f507f2006-12-13 04:49:30 +0000717 data=str.read(blocksize)
718 while data:
719 self.sock.sendall(data)
720 data=str.read(blocksize)
721 else:
722 self.sock.sendall(str)
Guido van Rossumb940e112007-01-10 16:19:56 +0000723 except socket.error as v:
Guido van Rossum89df2452007-03-19 22:26:27 +0000724 if v.args[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000725 self.close()
726 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000727
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000728 def _output(self, s):
729 """Add a line of output to the current request buffer.
Tim Peters469cdad2002-08-08 20:19:19 +0000730
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000731 Assumes that the line does *not* end with \\r\\n.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000732 """
733 self._buffer.append(s)
734
735 def _send_output(self):
736 """Send the currently buffered request and clear the buffer.
737
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000738 Appends an extra \\r\\n to the buffer.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000739 """
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000740 self._buffer.extend((b"", b""))
741 msg = b"\r\n".join(self._buffer)
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000742 del self._buffer[:]
743 self.send(msg)
744
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000745 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000746 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000747
Greg Steindd6eefb2000-07-18 09:09:48 +0000748 `method' specifies an HTTP request method, e.g. 'GET'.
749 `url' specifies the object being requested, e.g. '/index.html'.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000750 `skip_host' if True does not add automatically a 'Host:' header
751 `skip_accept_encoding' if True does not add automatically an
752 'Accept-Encoding:' header
Greg Steindd6eefb2000-07-18 09:09:48 +0000753 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000754
Greg Stein616a58d2003-06-24 06:35:19 +0000755 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000756 if self.__response and self.__response.isclosed():
757 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000758
Tim Peters58eb11c2004-01-18 20:29:55 +0000759
Greg Steindd6eefb2000-07-18 09:09:48 +0000760 # in certain cases, we cannot issue another request on this connection.
761 # this occurs when:
762 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
763 # 2) a response to a previous request has signalled that it is going
764 # to close the connection upon completion.
765 # 3) the headers for the previous response have not been read, thus
766 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
767 #
768 # if there is no prior response, then we can request at will.
769 #
770 # if point (2) is true, then we will have passed the socket to the
771 # response (effectively meaning, "there is no prior response"), and
772 # will open a new one when a new request is made.
773 #
774 # Note: if a prior response exists, then we *can* start a new request.
775 # We are not allowed to begin fetching the response to this new
776 # request, however, until that prior response is complete.
777 #
778 if self.__state == _CS_IDLE:
779 self.__state = _CS_REQ_STARTED
780 else:
781 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000782
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000783 # Save the method we use, we need it later in the response phase
784 self._method = method
Greg Steindd6eefb2000-07-18 09:09:48 +0000785 if not url:
786 url = '/'
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000787 request = '%s %s %s' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000788
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000789 # Non-ASCII characters should have been eliminated earlier
790 self._output(request.encode('ascii'))
Greg Stein5e0fa402000-06-26 08:28:01 +0000791
Greg Steindd6eefb2000-07-18 09:09:48 +0000792 if self._http_vsn == 11:
793 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000794
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000795 if not skip_host:
796 # this header is issued *only* for HTTP/1.1
797 # connections. more specifically, this means it is
798 # only issued when the client uses the new
799 # HTTPConnection() class. backwards-compat clients
800 # will be using HTTP/1.0 and those clients may be
801 # issuing this header themselves. we should NOT issue
802 # it twice; some web servers (such as Apache) barf
803 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000804
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000805 # If we need a non-standard port,include it in the
806 # header. If the request is going through a proxy,
807 # but the host of the actual URL, not the host of the
808 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +0000809
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000810 netloc = ''
811 if url.startswith('http'):
812 nil, netloc, nil, nil, nil = urlsplit(url)
813
814 if netloc:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000815 try:
816 netloc_enc = netloc.encode("ascii")
817 except UnicodeEncodeError:
818 netloc_enc = netloc.encode("idna")
819 self.putheader('Host', netloc_enc)
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000820 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000821 try:
822 host_enc = self.host.encode("ascii")
823 except UnicodeEncodeError:
824 host_enc = self.host.encode("idna")
825 if self.port == HTTP_PORT:
826 self.putheader('Host', host_enc)
827 else:
828 self.putheader('Host', "%s:%s" % (host_enc, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000829
Greg Steindd6eefb2000-07-18 09:09:48 +0000830 # note: we are assuming that clients will not attempt to set these
831 # headers since *this* library must deal with the
832 # consequences. this also means that when the supporting
833 # libraries are updated to recognize other forms, then this
834 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000835
Greg Steindd6eefb2000-07-18 09:09:48 +0000836 # we only want a Content-Encoding of "identity" since we don't
837 # support encodings such as x-gzip or x-deflate.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000838 if not skip_accept_encoding:
839 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000840
Greg Steindd6eefb2000-07-18 09:09:48 +0000841 # we can accept "chunked" Transfer-Encodings, but no others
842 # NOTE: no TE header implies *only* "chunked"
843 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000844
Greg Steindd6eefb2000-07-18 09:09:48 +0000845 # if TE is supplied in the header, then it must appear in a
846 # Connection header.
847 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000848
Greg Steindd6eefb2000-07-18 09:09:48 +0000849 else:
850 # For HTTP/1.0, the server will assume "not chunked"
851 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000852
Greg Steindd6eefb2000-07-18 09:09:48 +0000853 def putheader(self, header, value):
854 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000855
Greg Steindd6eefb2000-07-18 09:09:48 +0000856 For example: h.putheader('Accept', 'text/html')
857 """
858 if self.__state != _CS_REQ_STARTED:
859 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000860
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000861 header = '%s: %s' % (header, value)
862 self._output(header.encode('ascii'))
Greg Stein5e0fa402000-06-26 08:28:01 +0000863
Greg Steindd6eefb2000-07-18 09:09:48 +0000864 def endheaders(self):
865 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000866
Greg Steindd6eefb2000-07-18 09:09:48 +0000867 if self.__state == _CS_REQ_STARTED:
868 self.__state = _CS_REQ_SENT
869 else:
870 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000871
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000872 self._send_output()
Greg Stein5e0fa402000-06-26 08:28:01 +0000873
Greg Steindd6eefb2000-07-18 09:09:48 +0000874 def request(self, method, url, body=None, headers={}):
875 """Send a complete request to the server."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000876 try:
877 self._send_request(method, url, body, headers)
Guido van Rossumb940e112007-01-10 16:19:56 +0000878 except socket.error as v:
Greg Steindd6eefb2000-07-18 09:09:48 +0000879 # trap 'Broken pipe' if we're allowed to automatically reconnect
Guido van Rossum89df2452007-03-19 22:26:27 +0000880 if v.args[0] != 32 or not self.auto_open:
Greg Steindd6eefb2000-07-18 09:09:48 +0000881 raise
882 # try one more time
883 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000884
Greg Steindd6eefb2000-07-18 09:09:48 +0000885 def _send_request(self, method, url, body, headers):
Jeremy Hylton2c178252004-08-07 16:28:14 +0000886 # honour explicitly requested Host: and Accept-Encoding headers
887 header_names = dict.fromkeys([k.lower() for k in headers])
888 skips = {}
889 if 'host' in header_names:
890 skips['skip_host'] = 1
891 if 'accept-encoding' in header_names:
892 skips['skip_accept_encoding'] = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000893
Jeremy Hylton2c178252004-08-07 16:28:14 +0000894 self.putrequest(method, url, **skips)
895
896 if body and ('content-length' not in header_names):
Thomas Wouters89f507f2006-12-13 04:49:30 +0000897 thelen=None
898 try:
899 thelen=str(len(body))
Guido van Rossumb940e112007-01-10 16:19:56 +0000900 except TypeError as te:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000901 # If this is a file-like object, try to
902 # fstat its file descriptor
903 import os
904 try:
905 thelen = str(os.fstat(body.fileno()).st_size)
906 except (AttributeError, OSError):
907 # Don't send a length if this failed
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000908 if self.debuglevel > 0: print("Cannot stat!!")
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000909
Thomas Wouters89f507f2006-12-13 04:49:30 +0000910 if thelen is not None:
911 self.putheader('Content-Length',thelen)
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000912 for hdr, value in headers.items():
Greg Steindd6eefb2000-07-18 09:09:48 +0000913 self.putheader(hdr, value)
914 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000915
Greg Steindd6eefb2000-07-18 09:09:48 +0000916 if body:
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000917 if isinstance(body, str): body = body.encode('ascii')
Greg Steindd6eefb2000-07-18 09:09:48 +0000918 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000919
Greg Steindd6eefb2000-07-18 09:09:48 +0000920 def getresponse(self):
Jeremy Hyltonfb35f652007-08-03 20:30:33 +0000921 """Get the response from the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000922
Greg Stein616a58d2003-06-24 06:35:19 +0000923 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000924 if self.__response and self.__response.isclosed():
925 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000926
Greg Steindd6eefb2000-07-18 09:09:48 +0000927 #
928 # if a prior response exists, then it must be completed (otherwise, we
929 # cannot read this response's header to determine the connection-close
930 # behavior)
931 #
932 # note: if a prior response existed, but was connection-close, then the
933 # socket and response were made independent of this HTTPConnection
934 # object since a new request requires that we open a whole new
935 # connection
936 #
937 # this means the prior response had one of two states:
938 # 1) will_close: this connection was reset and the prior socket and
939 # response operate independently
940 # 2) persistent: the response was retained and we await its
941 # isclosed() status to become true.
942 #
943 if self.__state != _CS_REQ_SENT or self.__response:
944 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000945
Jeremy Hylton30f86742000-09-18 22:50:38 +0000946 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000947 response = self.response_class(self.sock, self.debuglevel,
Tim Petersc2659cf2003-05-12 20:19:37 +0000948 strict=self.strict,
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000949 method=self._method)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000950 else:
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000951 response = self.response_class(self.sock, strict=self.strict,
952 method=self._method)
Greg Stein5e0fa402000-06-26 08:28:01 +0000953
Jeremy Hylton39c03802002-07-12 14:04:09 +0000954 response.begin()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000955 assert response.will_close != _UNKNOWN
Greg Steindd6eefb2000-07-18 09:09:48 +0000956 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000957
Greg Steindd6eefb2000-07-18 09:09:48 +0000958 if response.will_close:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000959 # this effectively passes the connection to the response
960 self.close()
Greg Steindd6eefb2000-07-18 09:09:48 +0000961 else:
962 # remember this, so we can tell when it is complete
963 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000964
Greg Steindd6eefb2000-07-18 09:09:48 +0000965 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000966
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000967# The next several classes are used to define FakeSocket, a socket-like
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000968# interface to an SSL connection.
969
970# The primary complexity comes from faking a makefile() method. The
971# standard socket makefile() implementation calls dup() on the socket
972# file descriptor. As a consequence, clients can call close() on the
973# parent socket and its makefile children in any order. The underlying
974# socket isn't closed until they are all closed.
975
976# The implementation uses reference counting to keep the socket open
977# until the last client calls close(). SharedSocket keeps track of
978# the reference counting and SharedSocketClient provides an constructor
979# and close() method that call incref() and decref() correctly.
980
981class SharedSocket:
982
983 def __init__(self, sock):
984 self.sock = sock
985 self._refcnt = 0
986
987 def incref(self):
988 self._refcnt += 1
989
990 def decref(self):
991 self._refcnt -= 1
992 assert self._refcnt >= 0
993 if self._refcnt == 0:
994 self.sock.close()
995
996 def __del__(self):
997 self.sock.close()
998
999class SharedSocketClient:
1000
1001 def __init__(self, shared):
1002 self._closed = 0
1003 self._shared = shared
1004 self._shared.incref()
1005 self._sock = shared.sock
1006
1007 def close(self):
1008 if not self._closed:
1009 self._shared.decref()
1010 self._closed = 1
1011 self._shared = None
1012
1013class SSLFile(SharedSocketClient):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001014 """File-like object wrapping an SSL socket."""
Greg Stein5e0fa402000-06-26 08:28:01 +00001015
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001016 BUFSIZE = 8192
Tim Petersc411dba2002-07-16 21:35:23 +00001017
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001018 def __init__(self, sock, ssl, bufsize=None):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001019 SharedSocketClient.__init__(self, sock)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001020 self._ssl = ssl
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001021 self._buf = b""
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001022 self._bufsize = bufsize or self.__class__.BUFSIZE
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001023
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001024 def _read(self):
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001025 buf = b""
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001026 # put in a loop so that we retry on transient errors
Raymond Hettingerb2e0b922003-02-26 22:45:18 +00001027 while True:
Greg Steindd6eefb2000-07-18 09:09:48 +00001028 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001029 buf = self._ssl.read(self._bufsize)
Guido van Rossumb940e112007-01-10 16:19:56 +00001030 except socket.sslerror as err:
Guido van Rossum89df2452007-03-19 22:26:27 +00001031 err_type = err.args[0]
Brett Cannon6f8fe152007-02-27 20:16:38 +00001032 if (err_type == socket.SSL_ERROR_WANT_READ
1033 or err_type == socket.SSL_ERROR_WANT_WRITE):
Jeremy Hylton6459c8d2001-10-11 17:47:22 +00001034 continue
Brett Cannon6f8fe152007-02-27 20:16:38 +00001035 if (err_type == socket.SSL_ERROR_ZERO_RETURN
1036 or err_type == socket.SSL_ERROR_EOF):
Jeremy Hylton6459c8d2001-10-11 17:47:22 +00001037 break
1038 raise
Guido van Rossumb940e112007-01-10 16:19:56 +00001039 except socket.error as err:
Guido van Rossum89df2452007-03-19 22:26:27 +00001040 err_type = err.args[0]
Brett Cannon6f8fe152007-02-27 20:16:38 +00001041 if err_type == errno.EINTR:
Jeremy Hylton6459c8d2001-10-11 17:47:22 +00001042 continue
Brett Cannon6f8fe152007-02-27 20:16:38 +00001043 if err_type == errno.EBADF:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001044 # XXX socket was closed?
1045 break
Jeremy Hylton6459c8d2001-10-11 17:47:22 +00001046 raise
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001047 else:
Jeremy Hylton42dd01a2001-02-01 23:35:20 +00001048 break
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001049 return buf
1050
1051 def read(self, size=None):
1052 L = [self._buf]
Raymond Hettinger49227682003-03-06 16:31:48 +00001053 avail = len(self._buf)
1054 while size is None or avail < size:
1055 s = self._read()
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001056 if s == b"":
Raymond Hettinger49227682003-03-06 16:31:48 +00001057 break
1058 L.append(s)
1059 avail += len(s)
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001060 all = b"".join(L)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001061 if size is None:
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001062 self._buf = b""
Raymond Hettinger49227682003-03-06 16:31:48 +00001063 return all
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001064 else:
Raymond Hettinger49227682003-03-06 16:31:48 +00001065 self._buf = all[size:]
1066 return all[:size]
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001067
1068 def readline(self):
1069 L = [self._buf]
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001070 self._buf = b""
Raymond Hettinger49227682003-03-06 16:31:48 +00001071 while 1:
1072 i = L[-1].find("\n")
1073 if i >= 0:
Raymond Hettingerb2e0b922003-02-26 22:45:18 +00001074 break
Raymond Hettinger49227682003-03-06 16:31:48 +00001075 s = self._read()
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001076 if s == b"":
Raymond Hettinger49227682003-03-06 16:31:48 +00001077 break
1078 L.append(s)
1079 if i == -1:
1080 # loop exited because there is no more data
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001081 return b"".join(L)
Raymond Hettinger49227682003-03-06 16:31:48 +00001082 else:
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001083 all = b"".join(L)
Raymond Hettinger49227682003-03-06 16:31:48 +00001084 # XXX could do enough bookkeeping not to do a 2nd search
1085 i = all.find("\n") + 1
1086 line = all[:i]
1087 self._buf = all[i:]
1088 return line
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001089
Martin v. Löwis11892ec2003-10-27 14:07:53 +00001090 def readlines(self, sizehint=0):
1091 total = 0
1092 list = []
1093 while True:
1094 line = self.readline()
1095 if not line:
1096 break
1097 list.append(line)
1098 total += len(line)
1099 if sizehint and total >= sizehint:
1100 break
1101 return list
1102
1103 def fileno(self):
1104 return self._sock.fileno()
1105
1106 def __iter__(self):
1107 return self
1108
Georg Brandla18af4e2007-04-21 15:47:16 +00001109 def __next__(self):
Martin v. Löwis11892ec2003-10-27 14:07:53 +00001110 line = self.readline()
1111 if not line:
1112 raise StopIteration
1113 return line
1114
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001115class FakeSocket(SharedSocketClient):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001116
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001117 class _closedsocket:
1118 def __getattr__(self, name):
1119 raise error(9, 'Bad file descriptor')
1120
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001121 def __init__(self, sock, ssl):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001122 sock = SharedSocket(sock)
1123 SharedSocketClient.__init__(self, sock)
1124 self._ssl = ssl
1125
1126 def close(self):
1127 SharedSocketClient.close(self)
1128 self._sock = self.__class__._closedsocket()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001129
1130 def makefile(self, mode, bufsize=None):
1131 if mode != 'r' and mode != 'rb':
1132 raise UnimplementedFileMode()
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001133 return SSLFile(self._shared, self._ssl, bufsize)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001134
Greg Steindd6eefb2000-07-18 09:09:48 +00001135 def send(self, stuff, flags = 0):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001136 return self._ssl.write(stuff)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001137
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001138 sendall = send
Andrew M. Kuchlinga3c0b932002-03-18 22:51:48 +00001139
Greg Steindd6eefb2000-07-18 09:09:48 +00001140 def recv(self, len = 1024, flags = 0):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001141 return self._ssl.read(len)
Guido van Rossum23acc951994-02-21 16:36:04 +00001142
Greg Steindd6eefb2000-07-18 09:09:48 +00001143 def __getattr__(self, attr):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001144 return getattr(self._sock, attr)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001145
Guido van Rossum806c2462007-08-06 23:33:07 +00001146 def close(self):
1147 SharedSocketClient.close(self)
1148 self._ssl = None
Guido van Rossum23acc951994-02-21 16:36:04 +00001149
Greg Stein5e0fa402000-06-26 08:28:01 +00001150class HTTPSConnection(HTTPConnection):
Greg Steindd6eefb2000-07-18 09:09:48 +00001151 "This class allows communication via SSL."
Greg Stein5e0fa402000-06-26 08:28:01 +00001152
Greg Steindd6eefb2000-07-18 09:09:48 +00001153 default_port = HTTPS_PORT
Greg Stein5e0fa402000-06-26 08:28:01 +00001154
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001155 def __init__(self, host, port=None, key_file=None, cert_file=None,
Guido van Rossumd59da4b2007-05-22 18:11:13 +00001156 strict=None, timeout=None):
1157 HTTPConnection.__init__(self, host, port, strict, timeout)
Jeremy Hylton7c75c992002-06-28 23:38:14 +00001158 self.key_file = key_file
1159 self.cert_file = cert_file
Greg Stein5e0fa402000-06-26 08:28:01 +00001160
Greg Steindd6eefb2000-07-18 09:09:48 +00001161 def connect(self):
1162 "Connect to a host on a given (SSL) port."
Greg Stein5e0fa402000-06-26 08:28:01 +00001163
Guido van Rossumd59da4b2007-05-22 18:11:13 +00001164 sock = socket.create_connection((self.host, self.port), self.timeout)
Martin v. Löwis1867f242003-06-14 13:30:53 +00001165 ssl = socket.ssl(sock, self.key_file, self.cert_file)
Greg Steindd6eefb2000-07-18 09:09:48 +00001166 self.sock = FakeSocket(sock, ssl)
Greg Stein5e0fa402000-06-26 08:28:01 +00001167
1168
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001169class HTTP:
Greg Steindd6eefb2000-07-18 09:09:48 +00001170 "Compatibility class with httplib.py from 1.5."
Greg Stein5e0fa402000-06-26 08:28:01 +00001171
Greg Steindd6eefb2000-07-18 09:09:48 +00001172 _http_vsn = 10
1173 _http_vsn_str = 'HTTP/1.0'
Greg Stein5e0fa402000-06-26 08:28:01 +00001174
Greg Steindd6eefb2000-07-18 09:09:48 +00001175 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +00001176
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001177 _connection_class = HTTPConnection
1178
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001179 def __init__(self, host='', port=None, strict=None):
Greg Steindd6eefb2000-07-18 09:09:48 +00001180 "Provide a default host, since the superclass requires one."
Greg Stein5e0fa402000-06-26 08:28:01 +00001181
Greg Steindd6eefb2000-07-18 09:09:48 +00001182 # some joker passed 0 explicitly, meaning default port
1183 if port == 0:
1184 port = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001185
Greg Steindd6eefb2000-07-18 09:09:48 +00001186 # Note that we may pass an empty string as the host; this will throw
1187 # an error when we attempt to connect. Presumably, the client code
1188 # will call connect before then, with a proper host.
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001189 self._setup(self._connection_class(host, port, strict))
Greg Stein5e0fa402000-06-26 08:28:01 +00001190
Greg Stein81937a42001-08-18 09:20:23 +00001191 def _setup(self, conn):
1192 self._conn = conn
1193
1194 # set up delegation to flesh out interface
1195 self.send = conn.send
1196 self.putrequest = conn.putrequest
1197 self.endheaders = conn.endheaders
1198 self.set_debuglevel = conn.set_debuglevel
1199
1200 conn._http_vsn = self._http_vsn
1201 conn._http_vsn_str = self._http_vsn_str
Greg Stein5e0fa402000-06-26 08:28:01 +00001202
Greg Steindd6eefb2000-07-18 09:09:48 +00001203 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001204
Greg Steindd6eefb2000-07-18 09:09:48 +00001205 def connect(self, host=None, port=None):
1206 "Accept arguments to set the host/port, since the superclass doesn't."
Greg Stein5e0fa402000-06-26 08:28:01 +00001207
Greg Steindd6eefb2000-07-18 09:09:48 +00001208 if host is not None:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001209 self._conn._set_hostport(host, port)
1210 self._conn.connect()
Greg Stein5e0fa402000-06-26 08:28:01 +00001211
Greg Steindd6eefb2000-07-18 09:09:48 +00001212 def getfile(self):
1213 "Provide a getfile, since the superclass' does not use this concept."
1214 return self.file
Greg Stein5e0fa402000-06-26 08:28:01 +00001215
Greg Steindd6eefb2000-07-18 09:09:48 +00001216 def putheader(self, header, *values):
1217 "The superclass allows only one value argument."
Guido van Rossum34735a62000-12-15 15:09:42 +00001218 self._conn.putheader(header, '\r\n\t'.join(values))
Greg Stein5e0fa402000-06-26 08:28:01 +00001219
Greg Steindd6eefb2000-07-18 09:09:48 +00001220 def getreply(self):
1221 """Compat definition since superclass does not define it.
Greg Stein5e0fa402000-06-26 08:28:01 +00001222
Greg Steindd6eefb2000-07-18 09:09:48 +00001223 Returns a tuple consisting of:
1224 - server status code (e.g. '200' if all goes well)
1225 - server "reason" corresponding to status code
1226 - any RFC822 headers in the response from the server
1227 """
1228 try:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001229 response = self._conn.getresponse()
Guido van Rossumb940e112007-01-10 16:19:56 +00001230 except BadStatusLine as e:
Greg Steindd6eefb2000-07-18 09:09:48 +00001231 # keep the socket open (as a file), and return it
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001232 self.file = self._conn.sock.makefile('rb', 0)
Greg Stein5e0fa402000-06-26 08:28:01 +00001233
Greg Steindd6eefb2000-07-18 09:09:48 +00001234 # close our socket -- we want to restart after any protocol error
1235 self.close()
Greg Stein5e0fa402000-06-26 08:28:01 +00001236
Greg Steindd6eefb2000-07-18 09:09:48 +00001237 self.headers = None
1238 return -1, e.line, None
Greg Stein5e0fa402000-06-26 08:28:01 +00001239
Greg Steindd6eefb2000-07-18 09:09:48 +00001240 self.headers = response.msg
1241 self.file = response.fp
1242 return response.status, response.reason, response.msg
Greg Stein5e0fa402000-06-26 08:28:01 +00001243
Greg Steindd6eefb2000-07-18 09:09:48 +00001244 def close(self):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001245 self._conn.close()
Greg Stein5e0fa402000-06-26 08:28:01 +00001246
Greg Steindd6eefb2000-07-18 09:09:48 +00001247 # note that self.file == response.fp, which gets closed by the
1248 # superclass. just clear the object ref here.
1249 ### hmm. messy. if status==-1, then self.file is owned by us.
1250 ### well... we aren't explicitly closing, but losing this ref will
1251 ### do it
1252 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001253
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001254if hasattr(socket, 'ssl'):
1255 class HTTPS(HTTP):
1256 """Compatibility with 1.5 httplib interface
1257
1258 Python 1.5.2 did not have an HTTPS class, but it defined an
1259 interface for sending http requests that is also useful for
Tim Peters5ceadc82001-01-13 19:16:21 +00001260 https.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001261 """
1262
Martin v. Löwisd7bf9742000-09-21 22:09:47 +00001263 _connection_class = HTTPSConnection
Tim Peters5ceadc82001-01-13 19:16:21 +00001264
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001265 def __init__(self, host='', port=None, key_file=None, cert_file=None,
1266 strict=None):
Greg Stein81937a42001-08-18 09:20:23 +00001267 # provide a default host, pass the X509 cert info
1268
1269 # urf. compensate for bad input.
1270 if port == 0:
1271 port = None
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001272 self._setup(self._connection_class(host, port, key_file,
1273 cert_file, strict))
Greg Stein81937a42001-08-18 09:20:23 +00001274
1275 # we never actually use these for anything, but we keep them
1276 # here for compatibility with post-1.5.2 CVS.
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001277 self.key_file = key_file
1278 self.cert_file = cert_file
Greg Stein81937a42001-08-18 09:20:23 +00001279
Greg Stein5e0fa402000-06-26 08:28:01 +00001280
1281class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001282 # Subclasses that define an __init__ must call Exception.__init__
1283 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +00001284 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001285
1286class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001287 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001288
Skip Montanaro9d389972002-03-24 16:53:50 +00001289class InvalidURL(HTTPException):
1290 pass
1291
Greg Stein5e0fa402000-06-26 08:28:01 +00001292class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001293 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001294 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001295 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001296
1297class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001298 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001299
Greg Stein5e0fa402000-06-26 08:28:01 +00001300class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001301 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001302
1303class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001304 def __init__(self, partial):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001305 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001306 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +00001307
1308class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001309 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001310
1311class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001312 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001313
1314class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001315 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001316
1317class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001318 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001319
1320class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001321 def __init__(self, line):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001322 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001323 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001324
1325# for backwards compatibility
1326error = HTTPException
1327
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001328class LineAndFileWrapper:
1329 """A limited file-like object for HTTP/0.9 responses."""
1330
1331 # The status-line parsing code calls readline(), which normally
1332 # get the HTTP status line. For a 0.9 response, however, this is
1333 # actually the first line of the body! Clients need to get a
1334 # readable file object that contains that line.
1335
1336 def __init__(self, line, file):
1337 self._line = line
1338 self._file = file
1339 self._line_consumed = 0
1340 self._line_offset = 0
1341 self._line_left = len(line)
1342
1343 def __getattr__(self, attr):
1344 return getattr(self._file, attr)
1345
1346 def _done(self):
1347 # called when the last byte is read from the line. After the
1348 # call, all read methods are delegated to the underlying file
Skip Montanaro74b9a7a2003-02-25 17:48:15 +00001349 # object.
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001350 self._line_consumed = 1
1351 self.read = self._file.read
1352 self.readline = self._file.readline
1353 self.readlines = self._file.readlines
1354
1355 def read(self, amt=None):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001356 if self._line_consumed:
1357 return self._file.read(amt)
1358 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001359 if amt is None or amt > self._line_left:
1360 s = self._line[self._line_offset:]
1361 self._done()
1362 if amt is None:
1363 return s + self._file.read()
1364 else:
Tim Petersc411dba2002-07-16 21:35:23 +00001365 return s + self._file.read(amt - len(s))
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001366 else:
1367 assert amt <= self._line_left
1368 i = self._line_offset
1369 j = i + amt
1370 s = self._line[i:j]
1371 self._line_offset = j
1372 self._line_left -= amt
1373 if self._line_left == 0:
1374 self._done()
1375 return s
Tim Petersc411dba2002-07-16 21:35:23 +00001376
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001377 def readline(self):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001378 if self._line_consumed:
1379 return self._file.readline()
1380 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001381 s = self._line[self._line_offset:]
1382 self._done()
1383 return s
1384
1385 def readlines(self, size=None):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001386 if self._line_consumed:
1387 return self._file.readlines(size)
1388 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001389 L = [self._line[self._line_offset:]]
1390 self._done()
1391 if size is None:
1392 return L + self._file.readlines()
1393 else:
1394 return L + self._file.readlines(size)
Greg Stein5e0fa402000-06-26 08:28:01 +00001395
Guido van Rossum23acc951994-02-21 16:36:04 +00001396def test():
Guido van Rossum41999c11997-12-09 00:12:23 +00001397 """Test this module.
1398
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001399 A hodge podge of tests collected here, because they have too many
1400 external dependencies for the regular test suite.
Guido van Rossum41999c11997-12-09 00:12:23 +00001401 """
Greg Stein5e0fa402000-06-26 08:28:01 +00001402
Guido van Rossum41999c11997-12-09 00:12:23 +00001403 import sys
1404 import getopt
1405 opts, args = getopt.getopt(sys.argv[1:], 'd')
1406 dl = 0
1407 for o, a in opts:
1408 if o == '-d': dl = dl + 1
1409 host = 'www.python.org'
1410 selector = '/'
1411 if args[0:]: host = args[0]
1412 if args[1:]: selector = args[1]
1413 h = HTTP()
1414 h.set_debuglevel(dl)
1415 h.connect(host)
1416 h.putrequest('GET', selector)
1417 h.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +00001418 status, reason, headers = h.getreply()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001419 print('status =', status)
1420 print('reason =', reason)
Jeremy Hylton97043c32007-08-04 02:34:24 +00001421 print('read', len(h.getfile().read()))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001422 print()
Guido van Rossum41999c11997-12-09 00:12:23 +00001423 if headers:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001424 for header in headers.headers: print(header.strip())
1425 print()
Greg Stein5e0fa402000-06-26 08:28:01 +00001426
Jeremy Hylton8acf1e02002-03-08 19:35:51 +00001427 # minimal test that code to extract host from url works
1428 class HTTP11(HTTP):
1429 _http_vsn = 11
1430 _http_vsn_str = 'HTTP/1.1'
1431
1432 h = HTTP11('www.python.org')
1433 h.putrequest('GET', 'http://www.python.org/~jeremy/')
1434 h.endheaders()
1435 h.getreply()
1436 h.close()
1437
Greg Stein5e0fa402000-06-26 08:28:01 +00001438 if hasattr(socket, 'ssl'):
Tim Petersc411dba2002-07-16 21:35:23 +00001439
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001440 for host, selector in (('sourceforge.net', '/projects/python'),
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001441 ):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001442 print("https://%s%s" % (host, selector))
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001443 hs = HTTPS()
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001444 hs.set_debuglevel(dl)
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001445 hs.connect(host)
1446 hs.putrequest('GET', selector)
1447 hs.endheaders()
1448 status, reason, headers = hs.getreply()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001449 print('status =', status)
1450 print('reason =', reason)
1451 print("read", len(hs.getfile().read()))
1452 print()
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001453 if headers:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001454 for header in headers.headers: print(header.strip())
1455 print()
Guido van Rossum23acc951994-02-21 16:36:04 +00001456
Guido van Rossum23acc951994-02-21 16:36:04 +00001457if __name__ == '__main__':
Guido van Rossum41999c11997-12-09 00:12:23 +00001458 test()