blob: 0931446e59d80f3c59630be511dc307df8e87a1a [file] [log] [blame]
Greg Stein5e0fa402000-06-26 08:28:01 +00001"""HTTP/1.1 client library
Guido van Rossum41999c11997-12-09 00:12:23 +00002
Greg Stein5e0fa402000-06-26 08:28:01 +00003<intro stuff goes here>
4<other stuff, too>
Guido van Rossum41999c11997-12-09 00:12:23 +00005
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006HTTPConnection goes through a number of "states", which define when a client
Greg Stein5e0fa402000-06-26 08:28:01 +00007may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
Guido van Rossum41999c11997-12-09 00:12:23 +00009
Greg Stein5e0fa402000-06-26 08:28:01 +000010 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
Tim Peters5ceadc82001-01-13 19:16:21 +000028 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
Greg Stein5e0fa402000-06-26 08:28:01 +000041
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
Guido van Rossum41999c11997-12-09 00:12:23 +000067"""
Guido van Rossum23acc951994-02-21 16:36:04 +000068
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000069import errno
Jeremy Hyltonfb35f652007-08-03 20:30:33 +000070import io
Guido van Rossum65ab98c1995-08-07 20:13:02 +000071import mimetools
Jeremy Hylton6459c8d2001-10-11 17:47:22 +000072import socket
Jeremy Hylton8acf1e02002-03-08 19:35:51 +000073from urlparse import urlsplit
Guido van Rossum23acc951994-02-21 16:36:04 +000074
Skip Montanaro951a8842001-06-01 16:25:38 +000075__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
76 "HTTPException", "NotConnected", "UnknownProtocol",
Jeremy Hylton7c75c992002-06-28 23:38:14 +000077 "UnknownTransferEncoding", "UnimplementedFileMode",
78 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
79 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
Georg Brandl6aab16e2006-02-17 19:17:25 +000080 "BadStatusLine", "error", "responses"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000081
Guido van Rossum23acc951994-02-21 16:36:04 +000082HTTP_PORT = 80
Guido van Rossum09c8b6c1999-12-07 21:37:17 +000083HTTPS_PORT = 443
84
Greg Stein5e0fa402000-06-26 08:28:01 +000085_UNKNOWN = 'UNKNOWN'
86
87# connection states
88_CS_IDLE = 'Idle'
89_CS_REQ_STARTED = 'Request-started'
90_CS_REQ_SENT = 'Request-sent'
91
Martin v. Löwis39a31782004-09-18 09:03:49 +000092# status codes
93# informational
94CONTINUE = 100
95SWITCHING_PROTOCOLS = 101
96PROCESSING = 102
97
98# successful
99OK = 200
100CREATED = 201
101ACCEPTED = 202
102NON_AUTHORITATIVE_INFORMATION = 203
103NO_CONTENT = 204
104RESET_CONTENT = 205
105PARTIAL_CONTENT = 206
106MULTI_STATUS = 207
107IM_USED = 226
108
109# redirection
110MULTIPLE_CHOICES = 300
111MOVED_PERMANENTLY = 301
112FOUND = 302
113SEE_OTHER = 303
114NOT_MODIFIED = 304
115USE_PROXY = 305
116TEMPORARY_REDIRECT = 307
117
118# client error
119BAD_REQUEST = 400
120UNAUTHORIZED = 401
121PAYMENT_REQUIRED = 402
122FORBIDDEN = 403
123NOT_FOUND = 404
124METHOD_NOT_ALLOWED = 405
125NOT_ACCEPTABLE = 406
126PROXY_AUTHENTICATION_REQUIRED = 407
127REQUEST_TIMEOUT = 408
128CONFLICT = 409
129GONE = 410
130LENGTH_REQUIRED = 411
131PRECONDITION_FAILED = 412
132REQUEST_ENTITY_TOO_LARGE = 413
133REQUEST_URI_TOO_LONG = 414
134UNSUPPORTED_MEDIA_TYPE = 415
135REQUESTED_RANGE_NOT_SATISFIABLE = 416
136EXPECTATION_FAILED = 417
137UNPROCESSABLE_ENTITY = 422
138LOCKED = 423
139FAILED_DEPENDENCY = 424
140UPGRADE_REQUIRED = 426
141
142# server error
143INTERNAL_SERVER_ERROR = 500
144NOT_IMPLEMENTED = 501
145BAD_GATEWAY = 502
146SERVICE_UNAVAILABLE = 503
147GATEWAY_TIMEOUT = 504
148HTTP_VERSION_NOT_SUPPORTED = 505
149INSUFFICIENT_STORAGE = 507
150NOT_EXTENDED = 510
151
Georg Brandl6aab16e2006-02-17 19:17:25 +0000152# Mapping status codes to official W3C names
153responses = {
154 100: 'Continue',
155 101: 'Switching Protocols',
156
157 200: 'OK',
158 201: 'Created',
159 202: 'Accepted',
160 203: 'Non-Authoritative Information',
161 204: 'No Content',
162 205: 'Reset Content',
163 206: 'Partial Content',
164
165 300: 'Multiple Choices',
166 301: 'Moved Permanently',
167 302: 'Found',
168 303: 'See Other',
169 304: 'Not Modified',
170 305: 'Use Proxy',
171 306: '(Unused)',
172 307: 'Temporary Redirect',
173
174 400: 'Bad Request',
175 401: 'Unauthorized',
176 402: 'Payment Required',
177 403: 'Forbidden',
178 404: 'Not Found',
179 405: 'Method Not Allowed',
180 406: 'Not Acceptable',
181 407: 'Proxy Authentication Required',
182 408: 'Request Timeout',
183 409: 'Conflict',
184 410: 'Gone',
185 411: 'Length Required',
186 412: 'Precondition Failed',
187 413: 'Request Entity Too Large',
188 414: 'Request-URI Too Long',
189 415: 'Unsupported Media Type',
190 416: 'Requested Range Not Satisfiable',
191 417: 'Expectation Failed',
192
193 500: 'Internal Server Error',
194 501: 'Not Implemented',
195 502: 'Bad Gateway',
196 503: 'Service Unavailable',
197 504: 'Gateway Timeout',
198 505: 'HTTP Version Not Supported',
199}
200
Georg Brandl80ba8e82005-09-29 20:16:07 +0000201# maximal amount of data to read at one time in _safe_read
202MAXAMOUNT = 1048576
203
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000204class HTTPMessage(mimetools.Message):
205
206 def addheader(self, key, value):
207 """Add header for field key handling repeats."""
208 prev = self.dict.get(key)
209 if prev is None:
210 self.dict[key] = value
211 else:
212 combined = ", ".join((prev, value))
213 self.dict[key] = combined
214
215 def addcontinue(self, key, more):
216 """Add more field data from a continuation line."""
217 prev = self.dict[key]
218 self.dict[key] = prev + "\n " + more
219
220 def readheaders(self):
221 """Read header lines.
222
223 Read header lines up to the entirely blank line that terminates them.
224 The (normally blank) line that ends the headers is skipped, but not
225 included in the returned list. If a non-header line ends the headers,
226 (which is an error), an attempt is made to backspace over it; it is
227 never included in the returned list.
228
229 The variable self.status is set to the empty string if all went well,
230 otherwise it is an error message. The variable self.headers is a
231 completely uninterpreted list of lines contained in the header (so
232 printing them will reproduce the header exactly as it appears in the
233 file).
234
235 If multiple header fields with the same name occur, they are combined
236 according to the rules in RFC 2616 sec 4.2:
237
238 Appending each subsequent field-value to the first, each separated
239 by a comma. The order in which header fields with the same field-name
240 are received is significant to the interpretation of the combined
241 field value.
242 """
243 # XXX The implementation overrides the readheaders() method of
244 # rfc822.Message. The base class design isn't amenable to
245 # customized behavior here so the method here is a copy of the
246 # base class code with a few small changes.
247
248 self.dict = {}
249 self.unixfrom = ''
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000250 self.headers = hlist = []
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000251 self.status = ''
252 headerseen = ""
253 firstline = 1
254 startofline = unread = tell = None
255 if hasattr(self.fp, 'unread'):
256 unread = self.fp.unread
257 elif self.seekable:
258 tell = self.fp.tell
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000259 while True:
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000260 if tell:
261 try:
262 startofline = tell()
263 except IOError:
264 startofline = tell = None
265 self.seekable = 0
Jeremy Hylton811fc142007-08-03 13:30:02 +0000266 line = str(self.fp.readline(), "iso-8859-1")
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000267 if not line:
268 self.status = 'EOF in headers'
269 break
270 # Skip unix From name time lines
271 if firstline and line.startswith('From '):
272 self.unixfrom = self.unixfrom + line
273 continue
274 firstline = 0
275 if headerseen and line[0] in ' \t':
276 # XXX Not sure if continuation lines are handled properly
277 # for http and/or for repeating headers
278 # It's a continuation line.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000279 hlist.append(line)
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000280 self.addcontinue(headerseen, line.strip())
281 continue
282 elif self.iscomment(line):
283 # It's a comment. Ignore it.
284 continue
285 elif self.islast(line):
286 # Note! No pushback here! The delimiter line gets eaten.
287 break
288 headerseen = self.isheader(line)
289 if headerseen:
290 # It's a legal header line, save it.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000291 hlist.append(line)
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000292 self.addheader(headerseen, line[len(headerseen)+1:].strip())
293 continue
294 else:
295 # It's not a header line; throw it back and stop here.
296 if not self.dict:
297 self.status = 'No headers'
298 else:
299 self.status = 'Non-header line where header expected'
300 # Try to undo the read.
301 if unread:
302 unread(line)
303 elif tell:
304 self.fp.seek(startofline)
305 else:
306 self.status = self.status + '; bad seek'
307 break
Greg Stein5e0fa402000-06-26 08:28:01 +0000308
Jeremy Hylton97043c32007-08-04 02:34:24 +0000309class HTTPResponse:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000310
311 # strict: If true, raise BadStatusLine if the status line can't be
312 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
Skip Montanaro186bec22002-07-25 16:10:38 +0000313 # false because it prevents clients from talking to HTTP/0.9
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000314 # servers. Note that a response with a sufficiently corrupted
315 # status line will look like an HTTP/0.9 response.
316
317 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
318
Jeremy Hylton811fc142007-08-03 13:30:02 +0000319 # The bytes from the socket object are iso-8859-1 strings.
320 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
321 # text following RFC 2047. The basic status line parsing only
322 # accepts iso-8859-1.
323
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000324 def __init__(self, sock, debuglevel=0, strict=0, method=None):
Jeremy Hylton39b198d2007-08-04 19:22:00 +0000325 # XXX If the response includes a content-length header, we
326 # need to make sure that the client doesn't read more than the
327 # specified number of bytes. If it does, it will block until
328 # the server times out and closes the connection. (The only
329 # applies to HTTP/1.1 connections.) Since some clients access
330 # self.fp directly rather than calling read(), this is a little
331 # tricky.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000332 self.fp = sock.makefile("rb", 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000333 self.debuglevel = debuglevel
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000334 self.strict = strict
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000335 self._method = method
Greg Stein5e0fa402000-06-26 08:28:01 +0000336
Greg Steindd6eefb2000-07-18 09:09:48 +0000337 self.msg = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000338
Greg Steindd6eefb2000-07-18 09:09:48 +0000339 # from the Status-Line of the response
Tim Peters07e99cb2001-01-14 23:47:14 +0000340 self.version = _UNKNOWN # HTTP-Version
341 self.status = _UNKNOWN # Status-Code
342 self.reason = _UNKNOWN # Reason-Phrase
Greg Stein5e0fa402000-06-26 08:28:01 +0000343
Tim Peters07e99cb2001-01-14 23:47:14 +0000344 self.chunked = _UNKNOWN # is "chunked" being used?
345 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
346 self.length = _UNKNOWN # number of bytes left in response
347 self.will_close = _UNKNOWN # conn will close at end of response
Greg Stein5e0fa402000-06-26 08:28:01 +0000348
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000349 def _read_status(self):
Jeremy Hylton04319c72007-08-04 03:41:19 +0000350 # Initialize with Simple-Response defaults.
Jeremy Hylton811fc142007-08-03 13:30:02 +0000351 line = str(self.fp.readline(), "iso-8859-1")
Jeremy Hylton30f86742000-09-18 22:50:38 +0000352 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000353 print("reply:", repr(line))
Jeremy Hyltonb6769522003-06-29 17:55:05 +0000354 if not line:
355 # Presumably, the server closed the connection before
356 # sending a valid response.
357 raise BadStatusLine(line)
Greg Steindd6eefb2000-07-18 09:09:48 +0000358 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000359 [version, status, reason] = line.split(None, 2)
Greg Steindd6eefb2000-07-18 09:09:48 +0000360 except ValueError:
361 try:
Guido van Rossum34735a62000-12-15 15:09:42 +0000362 [version, status] = line.split(None, 1)
Greg Steindd6eefb2000-07-18 09:09:48 +0000363 reason = ""
364 except ValueError:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000365 # empty version will cause next test to fail and status
366 # will be treated as 0.9 response.
367 version = ""
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000368 if not version.startswith("HTTP/"):
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000369 if self.strict:
370 self.close()
371 raise BadStatusLine(line)
372 else:
Jeremy Hylton04319c72007-08-04 03:41:19 +0000373 # Assume it's a Simple-Response from an 0.9 server.
374 # We have to convert the first line back to raw bytes
375 # because self.fp.readline() needs to return bytes.
376 self.fp = LineAndFileWrapper(bytes(line), self.fp)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000377 return "HTTP/0.9", 200, ""
Greg Stein5e0fa402000-06-26 08:28:01 +0000378
Jeremy Hylton23d40472001-04-13 14:57:08 +0000379 # The status code is a three-digit number
380 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000381 status = int(status)
Jeremy Hylton23d40472001-04-13 14:57:08 +0000382 if status < 100 or status > 999:
383 raise BadStatusLine(line)
384 except ValueError:
385 raise BadStatusLine(line)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000386 return version, status, reason
Greg Stein5e0fa402000-06-26 08:28:01 +0000387
Jeremy Hylton39c03802002-07-12 14:04:09 +0000388 def begin(self):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000389 if self.msg is not None:
390 # we've already started reading the response
391 return
392
393 # read until we get a non-100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000394 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000395 version, status, reason = self._read_status()
Martin v. Löwis39a31782004-09-18 09:03:49 +0000396 if status != CONTINUE:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000397 break
398 # skip the header from the 100 response
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000399 while True:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000400 skip = self.fp.readline().strip()
401 if not skip:
402 break
403 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000404 print("header:", skip)
Tim Petersc411dba2002-07-16 21:35:23 +0000405
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000406 self.status = status
407 self.reason = reason.strip()
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000408 if version == "HTTP/1.0":
Greg Steindd6eefb2000-07-18 09:09:48 +0000409 self.version = 10
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000410 elif version.startswith("HTTP/1."):
Tim Peters07e99cb2001-01-14 23:47:14 +0000411 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000412 elif version == "HTTP/0.9":
Jeremy Hylton110941a2000-10-12 19:58:36 +0000413 self.version = 9
Greg Steindd6eefb2000-07-18 09:09:48 +0000414 else:
415 raise UnknownProtocol(version)
Greg Stein5e0fa402000-06-26 08:28:01 +0000416
Jeremy Hylton110941a2000-10-12 19:58:36 +0000417 if self.version == 9:
Georg Brandl0aade9a2005-06-26 22:06:54 +0000418 self.length = None
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000419 self.chunked = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000420 self.will_close = 1
Jeremy Hylton4e7855d2007-08-04 03:34:03 +0000421 self.msg = HTTPMessage(io.BytesIO())
Jeremy Hylton110941a2000-10-12 19:58:36 +0000422 return
423
Jeremy Hylton6d0a4c72002-07-07 16:51:37 +0000424 self.msg = HTTPMessage(self.fp, 0)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000425 if self.debuglevel > 0:
426 for hdr in self.msg.headers:
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000427 print("header:", hdr, end=" ")
Greg Stein5e0fa402000-06-26 08:28:01 +0000428
Greg Steindd6eefb2000-07-18 09:09:48 +0000429 # don't let the msg keep an fp
430 self.msg.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000431
Greg Steindd6eefb2000-07-18 09:09:48 +0000432 # are we using the chunked-style of transfer encoding?
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000433 tr_enc = self.msg.getheader("transfer-encoding")
Jeremy Hyltond229b3a2002-09-03 19:24:24 +0000434 if tr_enc and tr_enc.lower() == "chunked":
Greg Steindd6eefb2000-07-18 09:09:48 +0000435 self.chunked = 1
436 self.chunk_left = None
437 else:
438 self.chunked = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000439
Greg Steindd6eefb2000-07-18 09:09:48 +0000440 # will the connection close at the end of the response?
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000441 self.will_close = self._check_close()
Greg Stein5e0fa402000-06-26 08:28:01 +0000442
Greg Steindd6eefb2000-07-18 09:09:48 +0000443 # do we have a Content-Length?
444 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000445 self.length = None
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000446 length = self.msg.getheader("content-length")
Greg Steindd6eefb2000-07-18 09:09:48 +0000447 if length and not self.chunked:
Jeremy Hylton30a81812000-09-14 20:34:27 +0000448 try:
449 self.length = int(length)
450 except ValueError:
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000451 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000452
Greg Steindd6eefb2000-07-18 09:09:48 +0000453 # does the body have a fixed length? (of zero)
Martin v. Löwis39a31782004-09-18 09:03:49 +0000454 if (status == NO_CONTENT or status == NOT_MODIFIED or
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000455 100 <= status < 200 or # 1xx codes
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000456 self._method == "HEAD"):
Greg Steindd6eefb2000-07-18 09:09:48 +0000457 self.length = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000458
Greg Steindd6eefb2000-07-18 09:09:48 +0000459 # if the connection remains open, and we aren't using chunked, and
460 # a content-length was not provided, then assume that the connection
461 # WILL close.
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +0000462 if (not self.will_close and
463 not self.chunked and
464 self.length is None):
Greg Steindd6eefb2000-07-18 09:09:48 +0000465 self.will_close = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000466
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000467 def _check_close(self):
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000468 conn = self.msg.getheader("connection")
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000469 if self.version == 11:
470 # An HTTP/1.1 proxy is assumed to stay open unless
471 # explicitly closed.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000472 conn = self.msg.getheader("connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000473 if conn and "close" in conn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000474 return True
475 return False
476
Jeremy Hylton2c178252004-08-07 16:28:14 +0000477 # Some HTTP/1.0 implementations have support for persistent
478 # connections, using rules different than HTTP/1.1.
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000479
480 # For older HTTP, Keep-Alive indiciates persistent connection.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000481 if self.msg.getheader("keep-alive"):
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000482 return False
Tim Peters77c06fb2002-11-24 02:35:35 +0000483
Jeremy Hylton2c178252004-08-07 16:28:14 +0000484 # At least Akamai returns a "Connection: Keep-Alive" header,
485 # which was supposed to be sent by the client.
486 if conn and "keep-alive" in conn.lower():
487 return False
488
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000489 # Proxy-Connection is a netscape hack.
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000490 pconn = self.msg.getheader("proxy-connection")
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000491 if pconn and "keep-alive" in pconn.lower():
Jeremy Hylton22b3a492002-11-13 17:27:43 +0000492 return False
493
494 # otherwise, assume it will close
495 return True
496
Greg Steindd6eefb2000-07-18 09:09:48 +0000497 def close(self):
498 if self.fp:
499 self.fp.close()
500 self.fp = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000501
Greg Steindd6eefb2000-07-18 09:09:48 +0000502 def isclosed(self):
503 # NOTE: it is possible that we will not ever call self.close(). This
504 # case occurs when will_close is TRUE, length is None, and we
505 # read up to the last byte, but NOT past it.
506 #
507 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
508 # called, meaning self.isclosed() is meaningful.
509 return self.fp is None
510
Jeremy Hylton2c178252004-08-07 16:28:14 +0000511 # XXX It would be nice to have readline and __iter__ for this, too.
512
Greg Steindd6eefb2000-07-18 09:09:48 +0000513 def read(self, amt=None):
514 if self.fp is None:
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000515 return ""
Greg Steindd6eefb2000-07-18 09:09:48 +0000516
517 if self.chunked:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000518 return self._read_chunked(amt)
Tim Peters230a60c2002-11-09 05:08:07 +0000519
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000520 if amt is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000521 # unbounded read
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000522 if self.length is None:
Greg Steindd6eefb2000-07-18 09:09:48 +0000523 s = self.fp.read()
524 else:
525 s = self._safe_read(self.length)
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000526 self.length = 0
Tim Peters07e99cb2001-01-14 23:47:14 +0000527 self.close() # we read everything
Greg Steindd6eefb2000-07-18 09:09:48 +0000528 return s
529
530 if self.length is not None:
531 if amt > self.length:
532 # clip the read to the "end of response"
533 amt = self.length
Greg Steindd6eefb2000-07-18 09:09:48 +0000534
535 # we do not use _safe_read() here because this may be a .will_close
536 # connection, and the user is reading more bytes than will be provided
537 # (for example, reading in 1k chunks)
538 s = self.fp.read(amt)
Jeremy Hyltondef9d2a2004-11-07 16:13:49 +0000539 if self.length is not None:
540 self.length -= len(s)
Greg Steindd6eefb2000-07-18 09:09:48 +0000541
Greg Steindd6eefb2000-07-18 09:09:48 +0000542 return s
543
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000544 def _read_chunked(self, amt):
545 assert self.chunked != _UNKNOWN
546 chunk_left = self.chunk_left
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000547 value = ""
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000548
549 # XXX This accumulates chunks by repeated string concatenation,
550 # which is not efficient as the number or size of chunks gets big.
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000551 while True:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000552 if chunk_left is None:
553 line = self.fp.readline()
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000554 i = line.find(";")
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000555 if i >= 0:
556 line = line[:i] # strip chunk-extensions
557 chunk_left = int(line, 16)
558 if chunk_left == 0:
559 break
560 if amt is None:
561 value += self._safe_read(chunk_left)
562 elif amt < chunk_left:
563 value += self._safe_read(amt)
564 self.chunk_left = chunk_left - amt
565 return value
566 elif amt == chunk_left:
567 value += self._safe_read(amt)
568 self._safe_read(2) # toss the CRLF at the end of the chunk
569 self.chunk_left = None
570 return value
571 else:
572 value += self._safe_read(chunk_left)
573 amt -= chunk_left
574
575 # we read the whole chunk, get another
576 self._safe_read(2) # toss the CRLF at the end of the chunk
577 chunk_left = None
578
579 # read and discard trailer up to the CRLF terminator
580 ### note: we shouldn't have any trailers!
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000581 while True:
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000582 line = self.fp.readline()
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000583 if line == "\r\n":
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000584 break
585
586 # we read everything; close the "file"
Jeremy Hyltond4c472c2002-09-03 20:49:06 +0000587 self.close()
588
589 return value
Tim Peters230a60c2002-11-09 05:08:07 +0000590
Greg Steindd6eefb2000-07-18 09:09:48 +0000591 def _safe_read(self, amt):
592 """Read the number of bytes requested, compensating for partial reads.
593
594 Normally, we have a blocking socket, but a read() can be interrupted
595 by a signal (resulting in a partial read).
596
597 Note that we cannot distinguish between EOF and an interrupt when zero
598 bytes have been read. IncompleteRead() will be raised in this
599 situation.
600
601 This function should be used when <amt> bytes "should" be present for
602 reading. If the bytes are truly not available (due to EOF), then the
603 IncompleteRead exception can be used to detect the problem.
604 """
Georg Brandl80ba8e82005-09-29 20:16:07 +0000605 s = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000606 while amt > 0:
Georg Brandl80ba8e82005-09-29 20:16:07 +0000607 chunk = self.fp.read(min(amt, MAXAMOUNT))
Greg Steindd6eefb2000-07-18 09:09:48 +0000608 if not chunk:
609 raise IncompleteRead(s)
Georg Brandl80ba8e82005-09-29 20:16:07 +0000610 s.append(chunk)
Raymond Hettingerb2e0b922003-02-26 22:45:18 +0000611 amt -= len(chunk)
Jeremy Hyltone5d0e842007-08-03 13:45:24 +0000612 return "".join(s)
Greg Steindd6eefb2000-07-18 09:09:48 +0000613
614 def getheader(self, name, default=None):
615 if self.msg is None:
616 raise ResponseNotReady()
617 return self.msg.getheader(name, default)
Greg Stein5e0fa402000-06-26 08:28:01 +0000618
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000619 def getheaders(self):
620 """Return list of (header, value) tuples."""
621 if self.msg is None:
622 raise ResponseNotReady()
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000623 return list(self.msg.items())
Martin v. Löwisdeacce22004-08-18 12:46:26 +0000624
Greg Stein5e0fa402000-06-26 08:28:01 +0000625
626class HTTPConnection:
627
Greg Steindd6eefb2000-07-18 09:09:48 +0000628 _http_vsn = 11
629 _http_vsn_str = 'HTTP/1.1'
Greg Stein5e0fa402000-06-26 08:28:01 +0000630
Greg Steindd6eefb2000-07-18 09:09:48 +0000631 response_class = HTTPResponse
632 default_port = HTTP_PORT
633 auto_open = 1
Jeremy Hylton30f86742000-09-18 22:50:38 +0000634 debuglevel = 0
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000635 strict = 0
Greg Stein5e0fa402000-06-26 08:28:01 +0000636
Guido van Rossumd8faa362007-04-27 19:54:29 +0000637 def __init__(self, host, port=None, strict=None, timeout=None):
638 self.timeout = timeout
Greg Steindd6eefb2000-07-18 09:09:48 +0000639 self.sock = None
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000640 self._buffer = []
Greg Steindd6eefb2000-07-18 09:09:48 +0000641 self.__response = None
642 self.__state = _CS_IDLE
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000643 self._method = None
Tim Petersc411dba2002-07-16 21:35:23 +0000644
Greg Steindd6eefb2000-07-18 09:09:48 +0000645 self._set_hostport(host, port)
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000646 if strict is not None:
647 self.strict = strict
Greg Stein5e0fa402000-06-26 08:28:01 +0000648
Greg Steindd6eefb2000-07-18 09:09:48 +0000649 def _set_hostport(self, host, port):
650 if port is None:
Skip Montanaro10e6e0e2004-09-14 16:32:02 +0000651 i = host.rfind(':')
Skip Montanarocae14d22004-09-14 17:55:21 +0000652 j = host.rfind(']') # ipv6 addresses have [...]
653 if i > j:
Skip Montanaro9d389972002-03-24 16:53:50 +0000654 try:
655 port = int(host[i+1:])
656 except ValueError:
Jeremy Hyltonfbd79942002-07-02 20:19:08 +0000657 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
Greg Steindd6eefb2000-07-18 09:09:48 +0000658 host = host[:i]
659 else:
660 port = self.default_port
Raymond Hettinger4d037912004-10-14 15:23:38 +0000661 if host and host[0] == '[' and host[-1] == ']':
Brett Cannon0a1af4a2004-09-15 23:26:23 +0000662 host = host[1:-1]
Greg Steindd6eefb2000-07-18 09:09:48 +0000663 self.host = host
664 self.port = port
Greg Stein5e0fa402000-06-26 08:28:01 +0000665
Jeremy Hylton30f86742000-09-18 22:50:38 +0000666 def set_debuglevel(self, level):
667 self.debuglevel = level
668
Greg Steindd6eefb2000-07-18 09:09:48 +0000669 def connect(self):
670 """Connect to the host and port specified in __init__."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000671 self.sock = socket.create_connection((self.host,self.port),
672 self.timeout)
Greg Stein5e0fa402000-06-26 08:28:01 +0000673
Greg Steindd6eefb2000-07-18 09:09:48 +0000674 def close(self):
675 """Close the connection to the HTTP server."""
676 if self.sock:
Tim Peters07e99cb2001-01-14 23:47:14 +0000677 self.sock.close() # close it manually... there may be other refs
Greg Steindd6eefb2000-07-18 09:09:48 +0000678 self.sock = None
679 if self.__response:
680 self.__response.close()
681 self.__response = None
682 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000683
Greg Steindd6eefb2000-07-18 09:09:48 +0000684 def send(self, str):
685 """Send `str' to the server."""
686 if self.sock is None:
687 if self.auto_open:
688 self.connect()
689 else:
690 raise NotConnected()
Greg Stein5e0fa402000-06-26 08:28:01 +0000691
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000692 # send the data to the server. if we get a broken pipe, then close
Greg Steindd6eefb2000-07-18 09:09:48 +0000693 # the socket. we want to reconnect when somebody tries to send again.
694 #
695 # NOTE: we DO propagate the error, though, because we cannot simply
696 # ignore the error... the caller will know if they can retry.
Jeremy Hylton30f86742000-09-18 22:50:38 +0000697 if self.debuglevel > 0:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000698 print("send:", repr(str))
Greg Steindd6eefb2000-07-18 09:09:48 +0000699 try:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000700 blocksize=8192
701 if hasattr(str,'read') :
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000702 if self.debuglevel > 0: print("sendIng a read()able")
Thomas Wouters89f507f2006-12-13 04:49:30 +0000703 data=str.read(blocksize)
704 while data:
705 self.sock.sendall(data)
706 data=str.read(blocksize)
707 else:
708 self.sock.sendall(str)
Guido van Rossumb940e112007-01-10 16:19:56 +0000709 except socket.error as v:
Guido van Rossum89df2452007-03-19 22:26:27 +0000710 if v.args[0] == 32: # Broken pipe
Greg Steindd6eefb2000-07-18 09:09:48 +0000711 self.close()
712 raise
Greg Stein5e0fa402000-06-26 08:28:01 +0000713
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000714 def _output(self, s):
715 """Add a line of output to the current request buffer.
Tim Peters469cdad2002-08-08 20:19:19 +0000716
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000717 Assumes that the line does *not* end with \\r\\n.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000718 """
719 self._buffer.append(s)
720
721 def _send_output(self):
722 """Send the currently buffered request and clear the buffer.
723
Jeremy Hyltone3252ec2002-07-16 21:41:43 +0000724 Appends an extra \\r\\n to the buffer.
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000725 """
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000726 self._buffer.extend((b"", b""))
727 msg = b"\r\n".join(self._buffer)
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000728 del self._buffer[:]
729 self.send(msg)
730
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000731 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
Greg Steindd6eefb2000-07-18 09:09:48 +0000732 """Send a request to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000733
Greg Steindd6eefb2000-07-18 09:09:48 +0000734 `method' specifies an HTTP request method, e.g. 'GET'.
735 `url' specifies the object being requested, e.g. '/index.html'.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000736 `skip_host' if True does not add automatically a 'Host:' header
737 `skip_accept_encoding' if True does not add automatically an
738 'Accept-Encoding:' header
Greg Steindd6eefb2000-07-18 09:09:48 +0000739 """
Greg Stein5e0fa402000-06-26 08:28:01 +0000740
Greg Stein616a58d2003-06-24 06:35:19 +0000741 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000742 if self.__response and self.__response.isclosed():
743 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000744
Tim Peters58eb11c2004-01-18 20:29:55 +0000745
Greg Steindd6eefb2000-07-18 09:09:48 +0000746 # in certain cases, we cannot issue another request on this connection.
747 # this occurs when:
748 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
749 # 2) a response to a previous request has signalled that it is going
750 # to close the connection upon completion.
751 # 3) the headers for the previous response have not been read, thus
752 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
753 #
754 # if there is no prior response, then we can request at will.
755 #
756 # if point (2) is true, then we will have passed the socket to the
757 # response (effectively meaning, "there is no prior response"), and
758 # will open a new one when a new request is made.
759 #
760 # Note: if a prior response exists, then we *can* start a new request.
761 # We are not allowed to begin fetching the response to this new
762 # request, however, until that prior response is complete.
763 #
764 if self.__state == _CS_IDLE:
765 self.__state = _CS_REQ_STARTED
766 else:
767 raise CannotSendRequest()
Greg Stein5e0fa402000-06-26 08:28:01 +0000768
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000769 # Save the method we use, we need it later in the response phase
770 self._method = method
Greg Steindd6eefb2000-07-18 09:09:48 +0000771 if not url:
772 url = '/'
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000773 request = '%s %s %s' % (method, url, self._http_vsn_str)
Greg Stein5e0fa402000-06-26 08:28:01 +0000774
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000775 # Non-ASCII characters should have been eliminated earlier
776 self._output(request.encode('ascii'))
Greg Stein5e0fa402000-06-26 08:28:01 +0000777
Greg Steindd6eefb2000-07-18 09:09:48 +0000778 if self._http_vsn == 11:
779 # Issue some standard headers for better HTTP/1.1 compliance
Greg Stein5e0fa402000-06-26 08:28:01 +0000780
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000781 if not skip_host:
782 # this header is issued *only* for HTTP/1.1
783 # connections. more specifically, this means it is
784 # only issued when the client uses the new
785 # HTTPConnection() class. backwards-compat clients
786 # will be using HTTP/1.0 and those clients may be
787 # issuing this header themselves. we should NOT issue
788 # it twice; some web servers (such as Apache) barf
789 # when they see two Host: headers
Guido van Rossumf6922aa2001-01-14 21:03:01 +0000790
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000791 # If we need a non-standard port,include it in the
792 # header. If the request is going through a proxy,
793 # but the host of the actual URL, not the host of the
794 # proxy.
Jeremy Hylton8acf1e02002-03-08 19:35:51 +0000795
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000796 netloc = ''
797 if url.startswith('http'):
798 nil, netloc, nil, nil, nil = urlsplit(url)
799
800 if netloc:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000801 try:
802 netloc_enc = netloc.encode("ascii")
803 except UnicodeEncodeError:
804 netloc_enc = netloc.encode("idna")
805 self.putheader('Host', netloc_enc)
Jeremy Hylton3921ff62002-03-09 06:07:23 +0000806 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000807 try:
808 host_enc = self.host.encode("ascii")
809 except UnicodeEncodeError:
810 host_enc = self.host.encode("idna")
811 if self.port == HTTP_PORT:
812 self.putheader('Host', host_enc)
813 else:
814 self.putheader('Host', "%s:%s" % (host_enc, self.port))
Greg Stein5e0fa402000-06-26 08:28:01 +0000815
Greg Steindd6eefb2000-07-18 09:09:48 +0000816 # note: we are assuming that clients will not attempt to set these
817 # headers since *this* library must deal with the
818 # consequences. this also means that when the supporting
819 # libraries are updated to recognize other forms, then this
820 # code should be changed (removed or updated).
Greg Stein5e0fa402000-06-26 08:28:01 +0000821
Greg Steindd6eefb2000-07-18 09:09:48 +0000822 # we only want a Content-Encoding of "identity" since we don't
823 # support encodings such as x-gzip or x-deflate.
Martin v. Löwisaf7dc8d2003-11-19 19:51:55 +0000824 if not skip_accept_encoding:
825 self.putheader('Accept-Encoding', 'identity')
Greg Stein5e0fa402000-06-26 08:28:01 +0000826
Greg Steindd6eefb2000-07-18 09:09:48 +0000827 # we can accept "chunked" Transfer-Encodings, but no others
828 # NOTE: no TE header implies *only* "chunked"
829 #self.putheader('TE', 'chunked')
Greg Stein5e0fa402000-06-26 08:28:01 +0000830
Greg Steindd6eefb2000-07-18 09:09:48 +0000831 # if TE is supplied in the header, then it must appear in a
832 # Connection header.
833 #self.putheader('Connection', 'TE')
Greg Stein5e0fa402000-06-26 08:28:01 +0000834
Greg Steindd6eefb2000-07-18 09:09:48 +0000835 else:
836 # For HTTP/1.0, the server will assume "not chunked"
837 pass
Greg Stein5e0fa402000-06-26 08:28:01 +0000838
Greg Steindd6eefb2000-07-18 09:09:48 +0000839 def putheader(self, header, value):
840 """Send a request header line to the server.
Greg Stein5e0fa402000-06-26 08:28:01 +0000841
Greg Steindd6eefb2000-07-18 09:09:48 +0000842 For example: h.putheader('Accept', 'text/html')
843 """
844 if self.__state != _CS_REQ_STARTED:
845 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000846
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000847 header = '%s: %s' % (header, value)
848 self._output(header.encode('ascii'))
Greg Stein5e0fa402000-06-26 08:28:01 +0000849
Greg Steindd6eefb2000-07-18 09:09:48 +0000850 def endheaders(self):
851 """Indicate that the last header line has been sent to the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000852
Greg Steindd6eefb2000-07-18 09:09:48 +0000853 if self.__state == _CS_REQ_STARTED:
854 self.__state = _CS_REQ_SENT
855 else:
856 raise CannotSendHeader()
Greg Stein5e0fa402000-06-26 08:28:01 +0000857
Jeremy Hylton8531b1b2002-07-16 21:21:11 +0000858 self._send_output()
Greg Stein5e0fa402000-06-26 08:28:01 +0000859
Greg Steindd6eefb2000-07-18 09:09:48 +0000860 def request(self, method, url, body=None, headers={}):
861 """Send a complete request to the server."""
Greg Steindd6eefb2000-07-18 09:09:48 +0000862 try:
863 self._send_request(method, url, body, headers)
Guido van Rossumb940e112007-01-10 16:19:56 +0000864 except socket.error as v:
Greg Steindd6eefb2000-07-18 09:09:48 +0000865 # trap 'Broken pipe' if we're allowed to automatically reconnect
Guido van Rossum89df2452007-03-19 22:26:27 +0000866 if v.args[0] != 32 or not self.auto_open:
Greg Steindd6eefb2000-07-18 09:09:48 +0000867 raise
868 # try one more time
869 self._send_request(method, url, body, headers)
Greg Stein5e0fa402000-06-26 08:28:01 +0000870
Greg Steindd6eefb2000-07-18 09:09:48 +0000871 def _send_request(self, method, url, body, headers):
Jeremy Hylton2c178252004-08-07 16:28:14 +0000872 # honour explicitly requested Host: and Accept-Encoding headers
873 header_names = dict.fromkeys([k.lower() for k in headers])
874 skips = {}
875 if 'host' in header_names:
876 skips['skip_host'] = 1
877 if 'accept-encoding' in header_names:
878 skips['skip_accept_encoding'] = 1
Greg Stein5e0fa402000-06-26 08:28:01 +0000879
Jeremy Hylton2c178252004-08-07 16:28:14 +0000880 self.putrequest(method, url, **skips)
881
882 if body and ('content-length' not in header_names):
Thomas Wouters89f507f2006-12-13 04:49:30 +0000883 thelen=None
884 try:
885 thelen=str(len(body))
Guido van Rossumb940e112007-01-10 16:19:56 +0000886 except TypeError as te:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000887 # If this is a file-like object, try to
888 # fstat its file descriptor
889 import os
890 try:
891 thelen = str(os.fstat(body.fileno()).st_size)
892 except (AttributeError, OSError):
893 # Don't send a length if this failed
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000894 if self.debuglevel > 0: print("Cannot stat!!")
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000895
Thomas Wouters89f507f2006-12-13 04:49:30 +0000896 if thelen is not None:
897 self.putheader('Content-Length',thelen)
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000898 for hdr, value in headers.items():
Greg Steindd6eefb2000-07-18 09:09:48 +0000899 self.putheader(hdr, value)
900 self.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +0000901
Greg Steindd6eefb2000-07-18 09:09:48 +0000902 if body:
Martin v. Löwisdd5a8602007-06-30 09:22:09 +0000903 if isinstance(body, str): body = body.encode('ascii')
Greg Steindd6eefb2000-07-18 09:09:48 +0000904 self.send(body)
Greg Stein5e0fa402000-06-26 08:28:01 +0000905
Greg Steindd6eefb2000-07-18 09:09:48 +0000906 def getresponse(self):
Jeremy Hyltonfb35f652007-08-03 20:30:33 +0000907 """Get the response from the server."""
Greg Stein5e0fa402000-06-26 08:28:01 +0000908
Greg Stein616a58d2003-06-24 06:35:19 +0000909 # if a prior response has been completed, then forget about it.
Greg Steindd6eefb2000-07-18 09:09:48 +0000910 if self.__response and self.__response.isclosed():
911 self.__response = None
Greg Stein5e0fa402000-06-26 08:28:01 +0000912
Greg Steindd6eefb2000-07-18 09:09:48 +0000913 #
914 # if a prior response exists, then it must be completed (otherwise, we
915 # cannot read this response's header to determine the connection-close
916 # behavior)
917 #
918 # note: if a prior response existed, but was connection-close, then the
919 # socket and response were made independent of this HTTPConnection
920 # object since a new request requires that we open a whole new
921 # connection
922 #
923 # this means the prior response had one of two states:
924 # 1) will_close: this connection was reset and the prior socket and
925 # response operate independently
926 # 2) persistent: the response was retained and we await its
927 # isclosed() status to become true.
928 #
929 if self.__state != _CS_REQ_SENT or self.__response:
930 raise ResponseNotReady()
Greg Stein5e0fa402000-06-26 08:28:01 +0000931
Jeremy Hylton30f86742000-09-18 22:50:38 +0000932 if self.debuglevel > 0:
Jeremy Hyltond46aa372002-07-06 18:48:07 +0000933 response = self.response_class(self.sock, self.debuglevel,
Tim Petersc2659cf2003-05-12 20:19:37 +0000934 strict=self.strict,
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000935 method=self._method)
Jeremy Hylton30f86742000-09-18 22:50:38 +0000936 else:
Jeremy Hyltonc1b2cb92003-05-05 16:13:58 +0000937 response = self.response_class(self.sock, strict=self.strict,
938 method=self._method)
Greg Stein5e0fa402000-06-26 08:28:01 +0000939
Jeremy Hylton39c03802002-07-12 14:04:09 +0000940 response.begin()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +0000941 assert response.will_close != _UNKNOWN
Greg Steindd6eefb2000-07-18 09:09:48 +0000942 self.__state = _CS_IDLE
Greg Stein5e0fa402000-06-26 08:28:01 +0000943
Greg Steindd6eefb2000-07-18 09:09:48 +0000944 if response.will_close:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000945 # this effectively passes the connection to the response
946 self.close()
Greg Steindd6eefb2000-07-18 09:09:48 +0000947 else:
948 # remember this, so we can tell when it is complete
949 self.__response = response
Greg Stein5e0fa402000-06-26 08:28:01 +0000950
Greg Steindd6eefb2000-07-18 09:09:48 +0000951 return response
Greg Stein5e0fa402000-06-26 08:28:01 +0000952
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000953# The next several classes are used to define FakeSocket, a socket-like
Jeremy Hylton29d27ac2002-07-09 21:22:36 +0000954# interface to an SSL connection.
955
956# The primary complexity comes from faking a makefile() method. The
957# standard socket makefile() implementation calls dup() on the socket
958# file descriptor. As a consequence, clients can call close() on the
959# parent socket and its makefile children in any order. The underlying
960# socket isn't closed until they are all closed.
961
962# The implementation uses reference counting to keep the socket open
963# until the last client calls close(). SharedSocket keeps track of
964# the reference counting and SharedSocketClient provides an constructor
965# and close() method that call incref() and decref() correctly.
966
967class SharedSocket:
968
969 def __init__(self, sock):
970 self.sock = sock
971 self._refcnt = 0
972
973 def incref(self):
974 self._refcnt += 1
975
976 def decref(self):
977 self._refcnt -= 1
978 assert self._refcnt >= 0
979 if self._refcnt == 0:
980 self.sock.close()
981
982 def __del__(self):
983 self.sock.close()
984
985class SharedSocketClient:
986
987 def __init__(self, shared):
988 self._closed = 0
989 self._shared = shared
990 self._shared.incref()
991 self._sock = shared.sock
992
993 def close(self):
994 if not self._closed:
995 self._shared.decref()
996 self._closed = 1
997 self._shared = None
998
999class SSLFile(SharedSocketClient):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001000 """File-like object wrapping an SSL socket."""
Greg Stein5e0fa402000-06-26 08:28:01 +00001001
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001002 BUFSIZE = 8192
Tim Petersc411dba2002-07-16 21:35:23 +00001003
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001004 def __init__(self, sock, ssl, bufsize=None):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001005 SharedSocketClient.__init__(self, sock)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001006 self._ssl = ssl
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001007 self._buf = b""
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001008 self._bufsize = bufsize or self.__class__.BUFSIZE
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001009
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001010 def _read(self):
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001011 buf = b""
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001012 # put in a loop so that we retry on transient errors
Raymond Hettingerb2e0b922003-02-26 22:45:18 +00001013 while True:
Greg Steindd6eefb2000-07-18 09:09:48 +00001014 try:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001015 buf = self._ssl.read(self._bufsize)
Guido van Rossumb940e112007-01-10 16:19:56 +00001016 except socket.sslerror as err:
Guido van Rossum89df2452007-03-19 22:26:27 +00001017 err_type = err.args[0]
Brett Cannon6f8fe152007-02-27 20:16:38 +00001018 if (err_type == socket.SSL_ERROR_WANT_READ
1019 or err_type == socket.SSL_ERROR_WANT_WRITE):
Jeremy Hylton6459c8d2001-10-11 17:47:22 +00001020 continue
Brett Cannon6f8fe152007-02-27 20:16:38 +00001021 if (err_type == socket.SSL_ERROR_ZERO_RETURN
1022 or err_type == socket.SSL_ERROR_EOF):
Jeremy Hylton6459c8d2001-10-11 17:47:22 +00001023 break
1024 raise
Guido van Rossumb940e112007-01-10 16:19:56 +00001025 except socket.error as err:
Guido van Rossum89df2452007-03-19 22:26:27 +00001026 err_type = err.args[0]
Brett Cannon6f8fe152007-02-27 20:16:38 +00001027 if err_type == errno.EINTR:
Jeremy Hylton6459c8d2001-10-11 17:47:22 +00001028 continue
Brett Cannon6f8fe152007-02-27 20:16:38 +00001029 if err_type == errno.EBADF:
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001030 # XXX socket was closed?
1031 break
Jeremy Hylton6459c8d2001-10-11 17:47:22 +00001032 raise
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001033 else:
Jeremy Hylton42dd01a2001-02-01 23:35:20 +00001034 break
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001035 return buf
1036
1037 def read(self, size=None):
1038 L = [self._buf]
Raymond Hettinger49227682003-03-06 16:31:48 +00001039 avail = len(self._buf)
1040 while size is None or avail < size:
1041 s = self._read()
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001042 if s == b"":
Raymond Hettinger49227682003-03-06 16:31:48 +00001043 break
1044 L.append(s)
1045 avail += len(s)
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001046 all = b"".join(L)
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001047 if size is None:
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001048 self._buf = b""
Raymond Hettinger49227682003-03-06 16:31:48 +00001049 return all
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001050 else:
Raymond Hettinger49227682003-03-06 16:31:48 +00001051 self._buf = all[size:]
1052 return all[:size]
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001053
1054 def readline(self):
1055 L = [self._buf]
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001056 self._buf = b""
Raymond Hettinger49227682003-03-06 16:31:48 +00001057 while 1:
1058 i = L[-1].find("\n")
1059 if i >= 0:
Raymond Hettingerb2e0b922003-02-26 22:45:18 +00001060 break
Raymond Hettinger49227682003-03-06 16:31:48 +00001061 s = self._read()
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001062 if s == b"":
Raymond Hettinger49227682003-03-06 16:31:48 +00001063 break
1064 L.append(s)
1065 if i == -1:
1066 # loop exited because there is no more data
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001067 return b"".join(L)
Raymond Hettinger49227682003-03-06 16:31:48 +00001068 else:
Jeremy Hylton0ee5eeb2007-08-04 03:25:17 +00001069 all = b"".join(L)
Raymond Hettinger49227682003-03-06 16:31:48 +00001070 # XXX could do enough bookkeeping not to do a 2nd search
1071 i = all.find("\n") + 1
1072 line = all[:i]
1073 self._buf = all[i:]
1074 return line
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001075
Martin v. Löwis11892ec2003-10-27 14:07:53 +00001076 def readlines(self, sizehint=0):
1077 total = 0
1078 list = []
1079 while True:
1080 line = self.readline()
1081 if not line:
1082 break
1083 list.append(line)
1084 total += len(line)
1085 if sizehint and total >= sizehint:
1086 break
1087 return list
1088
1089 def fileno(self):
1090 return self._sock.fileno()
1091
1092 def __iter__(self):
1093 return self
1094
Georg Brandla18af4e2007-04-21 15:47:16 +00001095 def __next__(self):
Martin v. Löwis11892ec2003-10-27 14:07:53 +00001096 line = self.readline()
1097 if not line:
1098 raise StopIteration
1099 return line
1100
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001101class FakeSocket(SharedSocketClient):
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001102
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001103 class _closedsocket:
1104 def __getattr__(self, name):
1105 raise error(9, 'Bad file descriptor')
1106
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001107 def __init__(self, sock, ssl):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001108 sock = SharedSocket(sock)
1109 SharedSocketClient.__init__(self, sock)
1110 self._ssl = ssl
1111
1112 def close(self):
1113 SharedSocketClient.close(self)
1114 self._sock = self.__class__._closedsocket()
Jeremy Hyltonbe4fcf12002-06-28 22:38:01 +00001115
1116 def makefile(self, mode, bufsize=None):
1117 if mode != 'r' and mode != 'rb':
1118 raise UnimplementedFileMode()
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001119 return SSLFile(self._shared, self._ssl, bufsize)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001120
Greg Steindd6eefb2000-07-18 09:09:48 +00001121 def send(self, stuff, flags = 0):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001122 return self._ssl.write(stuff)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001123
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001124 sendall = send
Andrew M. Kuchlinga3c0b932002-03-18 22:51:48 +00001125
Greg Steindd6eefb2000-07-18 09:09:48 +00001126 def recv(self, len = 1024, flags = 0):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001127 return self._ssl.read(len)
Guido van Rossum23acc951994-02-21 16:36:04 +00001128
Greg Steindd6eefb2000-07-18 09:09:48 +00001129 def __getattr__(self, attr):
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001130 return getattr(self._sock, attr)
Guido van Rossum09c8b6c1999-12-07 21:37:17 +00001131
Guido van Rossum806c2462007-08-06 23:33:07 +00001132 def close(self):
1133 SharedSocketClient.close(self)
1134 self._ssl = None
Guido van Rossum23acc951994-02-21 16:36:04 +00001135
Greg Stein5e0fa402000-06-26 08:28:01 +00001136class HTTPSConnection(HTTPConnection):
Greg Steindd6eefb2000-07-18 09:09:48 +00001137 "This class allows communication via SSL."
Greg Stein5e0fa402000-06-26 08:28:01 +00001138
Greg Steindd6eefb2000-07-18 09:09:48 +00001139 default_port = HTTPS_PORT
Greg Stein5e0fa402000-06-26 08:28:01 +00001140
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001141 def __init__(self, host, port=None, key_file=None, cert_file=None,
Guido van Rossumd59da4b2007-05-22 18:11:13 +00001142 strict=None, timeout=None):
1143 HTTPConnection.__init__(self, host, port, strict, timeout)
Jeremy Hylton7c75c992002-06-28 23:38:14 +00001144 self.key_file = key_file
1145 self.cert_file = cert_file
Greg Stein5e0fa402000-06-26 08:28:01 +00001146
Greg Steindd6eefb2000-07-18 09:09:48 +00001147 def connect(self):
1148 "Connect to a host on a given (SSL) port."
Greg Stein5e0fa402000-06-26 08:28:01 +00001149
Guido van Rossumd59da4b2007-05-22 18:11:13 +00001150 sock = socket.create_connection((self.host, self.port), self.timeout)
Martin v. Löwis1867f242003-06-14 13:30:53 +00001151 ssl = socket.ssl(sock, self.key_file, self.cert_file)
Greg Steindd6eefb2000-07-18 09:09:48 +00001152 self.sock = FakeSocket(sock, ssl)
Greg Stein5e0fa402000-06-26 08:28:01 +00001153
1154
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001155class HTTP:
Greg Steindd6eefb2000-07-18 09:09:48 +00001156 "Compatibility class with httplib.py from 1.5."
Greg Stein5e0fa402000-06-26 08:28:01 +00001157
Greg Steindd6eefb2000-07-18 09:09:48 +00001158 _http_vsn = 10
1159 _http_vsn_str = 'HTTP/1.0'
Greg Stein5e0fa402000-06-26 08:28:01 +00001160
Greg Steindd6eefb2000-07-18 09:09:48 +00001161 debuglevel = 0
Greg Stein5e0fa402000-06-26 08:28:01 +00001162
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001163 _connection_class = HTTPConnection
1164
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001165 def __init__(self, host='', port=None, strict=None):
Greg Steindd6eefb2000-07-18 09:09:48 +00001166 "Provide a default host, since the superclass requires one."
Greg Stein5e0fa402000-06-26 08:28:01 +00001167
Greg Steindd6eefb2000-07-18 09:09:48 +00001168 # some joker passed 0 explicitly, meaning default port
1169 if port == 0:
1170 port = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001171
Greg Steindd6eefb2000-07-18 09:09:48 +00001172 # Note that we may pass an empty string as the host; this will throw
1173 # an error when we attempt to connect. Presumably, the client code
1174 # will call connect before then, with a proper host.
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001175 self._setup(self._connection_class(host, port, strict))
Greg Stein5e0fa402000-06-26 08:28:01 +00001176
Greg Stein81937a42001-08-18 09:20:23 +00001177 def _setup(self, conn):
1178 self._conn = conn
1179
1180 # set up delegation to flesh out interface
1181 self.send = conn.send
1182 self.putrequest = conn.putrequest
1183 self.endheaders = conn.endheaders
1184 self.set_debuglevel = conn.set_debuglevel
1185
1186 conn._http_vsn = self._http_vsn
1187 conn._http_vsn_str = self._http_vsn_str
Greg Stein5e0fa402000-06-26 08:28:01 +00001188
Greg Steindd6eefb2000-07-18 09:09:48 +00001189 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001190
Greg Steindd6eefb2000-07-18 09:09:48 +00001191 def connect(self, host=None, port=None):
1192 "Accept arguments to set the host/port, since the superclass doesn't."
Greg Stein5e0fa402000-06-26 08:28:01 +00001193
Greg Steindd6eefb2000-07-18 09:09:48 +00001194 if host is not None:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001195 self._conn._set_hostport(host, port)
1196 self._conn.connect()
Greg Stein5e0fa402000-06-26 08:28:01 +00001197
Greg Steindd6eefb2000-07-18 09:09:48 +00001198 def getfile(self):
1199 "Provide a getfile, since the superclass' does not use this concept."
1200 return self.file
Greg Stein5e0fa402000-06-26 08:28:01 +00001201
Greg Steindd6eefb2000-07-18 09:09:48 +00001202 def putheader(self, header, *values):
1203 "The superclass allows only one value argument."
Guido van Rossum34735a62000-12-15 15:09:42 +00001204 self._conn.putheader(header, '\r\n\t'.join(values))
Greg Stein5e0fa402000-06-26 08:28:01 +00001205
Greg Steindd6eefb2000-07-18 09:09:48 +00001206 def getreply(self):
1207 """Compat definition since superclass does not define it.
Greg Stein5e0fa402000-06-26 08:28:01 +00001208
Greg Steindd6eefb2000-07-18 09:09:48 +00001209 Returns a tuple consisting of:
1210 - server status code (e.g. '200' if all goes well)
1211 - server "reason" corresponding to status code
1212 - any RFC822 headers in the response from the server
1213 """
1214 try:
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001215 response = self._conn.getresponse()
Guido van Rossumb940e112007-01-10 16:19:56 +00001216 except BadStatusLine as e:
Greg Steindd6eefb2000-07-18 09:09:48 +00001217 # keep the socket open (as a file), and return it
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001218 self.file = self._conn.sock.makefile('rb', 0)
Greg Stein5e0fa402000-06-26 08:28:01 +00001219
Greg Steindd6eefb2000-07-18 09:09:48 +00001220 # close our socket -- we want to restart after any protocol error
1221 self.close()
Greg Stein5e0fa402000-06-26 08:28:01 +00001222
Greg Steindd6eefb2000-07-18 09:09:48 +00001223 self.headers = None
1224 return -1, e.line, None
Greg Stein5e0fa402000-06-26 08:28:01 +00001225
Greg Steindd6eefb2000-07-18 09:09:48 +00001226 self.headers = response.msg
1227 self.file = response.fp
1228 return response.status, response.reason, response.msg
Greg Stein5e0fa402000-06-26 08:28:01 +00001229
Greg Steindd6eefb2000-07-18 09:09:48 +00001230 def close(self):
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001231 self._conn.close()
Greg Stein5e0fa402000-06-26 08:28:01 +00001232
Greg Steindd6eefb2000-07-18 09:09:48 +00001233 # note that self.file == response.fp, which gets closed by the
1234 # superclass. just clear the object ref here.
1235 ### hmm. messy. if status==-1, then self.file is owned by us.
1236 ### well... we aren't explicitly closing, but losing this ref will
1237 ### do it
1238 self.file = None
Greg Stein5e0fa402000-06-26 08:28:01 +00001239
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001240if hasattr(socket, 'ssl'):
1241 class HTTPS(HTTP):
1242 """Compatibility with 1.5 httplib interface
1243
1244 Python 1.5.2 did not have an HTTPS class, but it defined an
1245 interface for sending http requests that is also useful for
Tim Peters5ceadc82001-01-13 19:16:21 +00001246 https.
Jeremy Hylton29b8d5a2000-08-01 17:33:32 +00001247 """
1248
Martin v. Löwisd7bf9742000-09-21 22:09:47 +00001249 _connection_class = HTTPSConnection
Tim Peters5ceadc82001-01-13 19:16:21 +00001250
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001251 def __init__(self, host='', port=None, key_file=None, cert_file=None,
1252 strict=None):
Greg Stein81937a42001-08-18 09:20:23 +00001253 # provide a default host, pass the X509 cert info
1254
1255 # urf. compensate for bad input.
1256 if port == 0:
1257 port = None
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001258 self._setup(self._connection_class(host, port, key_file,
1259 cert_file, strict))
Greg Stein81937a42001-08-18 09:20:23 +00001260
1261 # we never actually use these for anything, but we keep them
1262 # here for compatibility with post-1.5.2 CVS.
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001263 self.key_file = key_file
1264 self.cert_file = cert_file
Greg Stein81937a42001-08-18 09:20:23 +00001265
Greg Stein5e0fa402000-06-26 08:28:01 +00001266
1267class HTTPException(Exception):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001268 # Subclasses that define an __init__ must call Exception.__init__
1269 # or define self.args. Otherwise, str() will fail.
Greg Steindd6eefb2000-07-18 09:09:48 +00001270 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001271
1272class NotConnected(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001273 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001274
Skip Montanaro9d389972002-03-24 16:53:50 +00001275class InvalidURL(HTTPException):
1276 pass
1277
Greg Stein5e0fa402000-06-26 08:28:01 +00001278class UnknownProtocol(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001279 def __init__(self, version):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001280 self.args = version,
Greg Steindd6eefb2000-07-18 09:09:48 +00001281 self.version = version
Greg Stein5e0fa402000-06-26 08:28:01 +00001282
1283class UnknownTransferEncoding(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001284 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001285
Greg Stein5e0fa402000-06-26 08:28:01 +00001286class UnimplementedFileMode(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001287 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001288
1289class IncompleteRead(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001290 def __init__(self, partial):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001291 self.args = partial,
Greg Steindd6eefb2000-07-18 09:09:48 +00001292 self.partial = partial
Greg Stein5e0fa402000-06-26 08:28:01 +00001293
1294class ImproperConnectionState(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001295 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001296
1297class CannotSendRequest(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001298 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001299
1300class CannotSendHeader(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001301 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001302
1303class ResponseNotReady(ImproperConnectionState):
Greg Steindd6eefb2000-07-18 09:09:48 +00001304 pass
Greg Stein5e0fa402000-06-26 08:28:01 +00001305
1306class BadStatusLine(HTTPException):
Greg Steindd6eefb2000-07-18 09:09:48 +00001307 def __init__(self, line):
Jeremy Hylton12f4f352002-07-06 18:55:01 +00001308 self.args = line,
Greg Steindd6eefb2000-07-18 09:09:48 +00001309 self.line = line
Greg Stein5e0fa402000-06-26 08:28:01 +00001310
1311# for backwards compatibility
1312error = HTTPException
1313
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001314class LineAndFileWrapper:
1315 """A limited file-like object for HTTP/0.9 responses."""
1316
1317 # The status-line parsing code calls readline(), which normally
1318 # get the HTTP status line. For a 0.9 response, however, this is
1319 # actually the first line of the body! Clients need to get a
1320 # readable file object that contains that line.
1321
1322 def __init__(self, line, file):
1323 self._line = line
1324 self._file = file
1325 self._line_consumed = 0
1326 self._line_offset = 0
1327 self._line_left = len(line)
1328
1329 def __getattr__(self, attr):
1330 return getattr(self._file, attr)
1331
1332 def _done(self):
1333 # called when the last byte is read from the line. After the
1334 # call, all read methods are delegated to the underlying file
Skip Montanaro74b9a7a2003-02-25 17:48:15 +00001335 # object.
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001336 self._line_consumed = 1
1337 self.read = self._file.read
1338 self.readline = self._file.readline
1339 self.readlines = self._file.readlines
1340
1341 def read(self, amt=None):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001342 if self._line_consumed:
1343 return self._file.read(amt)
1344 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001345 if amt is None or amt > self._line_left:
1346 s = self._line[self._line_offset:]
1347 self._done()
1348 if amt is None:
1349 return s + self._file.read()
1350 else:
Tim Petersc411dba2002-07-16 21:35:23 +00001351 return s + self._file.read(amt - len(s))
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001352 else:
1353 assert amt <= self._line_left
1354 i = self._line_offset
1355 j = i + amt
1356 s = self._line[i:j]
1357 self._line_offset = j
1358 self._line_left -= amt
1359 if self._line_left == 0:
1360 self._done()
1361 return s
Tim Petersc411dba2002-07-16 21:35:23 +00001362
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001363 def readline(self):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001364 if self._line_consumed:
1365 return self._file.readline()
1366 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001367 s = self._line[self._line_offset:]
1368 self._done()
1369 return s
1370
1371 def readlines(self, size=None):
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001372 if self._line_consumed:
1373 return self._file.readlines(size)
1374 assert self._line_left
Jeremy Hyltond46aa372002-07-06 18:48:07 +00001375 L = [self._line[self._line_offset:]]
1376 self._done()
1377 if size is None:
1378 return L + self._file.readlines()
1379 else:
1380 return L + self._file.readlines(size)
Greg Stein5e0fa402000-06-26 08:28:01 +00001381
Guido van Rossum23acc951994-02-21 16:36:04 +00001382def test():
Guido van Rossum41999c11997-12-09 00:12:23 +00001383 """Test this module.
1384
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001385 A hodge podge of tests collected here, because they have too many
1386 external dependencies for the regular test suite.
Guido van Rossum41999c11997-12-09 00:12:23 +00001387 """
Greg Stein5e0fa402000-06-26 08:28:01 +00001388
Guido van Rossum41999c11997-12-09 00:12:23 +00001389 import sys
1390 import getopt
1391 opts, args = getopt.getopt(sys.argv[1:], 'd')
1392 dl = 0
1393 for o, a in opts:
1394 if o == '-d': dl = dl + 1
1395 host = 'www.python.org'
1396 selector = '/'
1397 if args[0:]: host = args[0]
1398 if args[1:]: selector = args[1]
1399 h = HTTP()
1400 h.set_debuglevel(dl)
1401 h.connect(host)
1402 h.putrequest('GET', selector)
1403 h.endheaders()
Greg Stein5e0fa402000-06-26 08:28:01 +00001404 status, reason, headers = h.getreply()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001405 print('status =', status)
1406 print('reason =', reason)
Jeremy Hylton97043c32007-08-04 02:34:24 +00001407 print('read', len(h.getfile().read()))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001408 print()
Guido van Rossum41999c11997-12-09 00:12:23 +00001409 if headers:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001410 for header in headers.headers: print(header.strip())
1411 print()
Greg Stein5e0fa402000-06-26 08:28:01 +00001412
Jeremy Hylton8acf1e02002-03-08 19:35:51 +00001413 # minimal test that code to extract host from url works
1414 class HTTP11(HTTP):
1415 _http_vsn = 11
1416 _http_vsn_str = 'HTTP/1.1'
1417
1418 h = HTTP11('www.python.org')
1419 h.putrequest('GET', 'http://www.python.org/~jeremy/')
1420 h.endheaders()
1421 h.getreply()
1422 h.close()
1423
Greg Stein5e0fa402000-06-26 08:28:01 +00001424 if hasattr(socket, 'ssl'):
Tim Petersc411dba2002-07-16 21:35:23 +00001425
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001426 for host, selector in (('sourceforge.net', '/projects/python'),
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001427 ):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001428 print("https://%s%s" % (host, selector))
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001429 hs = HTTPS()
Jeremy Hylton8531b1b2002-07-16 21:21:11 +00001430 hs.set_debuglevel(dl)
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001431 hs.connect(host)
1432 hs.putrequest('GET', selector)
1433 hs.endheaders()
1434 status, reason, headers = hs.getreply()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001435 print('status =', status)
1436 print('reason =', reason)
1437 print("read", len(hs.getfile().read()))
1438 print()
Jeremy Hylton29d27ac2002-07-09 21:22:36 +00001439 if headers:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001440 for header in headers.headers: print(header.strip())
1441 print()
Guido van Rossum23acc951994-02-21 16:36:04 +00001442
Guido van Rossum23acc951994-02-21 16:36:04 +00001443if __name__ == '__main__':
Guido van Rossum41999c11997-12-09 00:12:23 +00001444 test()