blob: 158e9678a56b9e4f931d4cd1ac72f852a0cda2ca [file] [log] [blame]
Joe Gregorio845a5452010-09-08 13:50:34 -04001from __future__ import generators
2"""
3httplib2
4
5A caching http interface that supports ETags and gzip
6to conserve bandwidth.
7
8Requires Python 2.3 or later
9
10Changelog:
112007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
12
13"""
14
15__author__ = "Joe Gregorio (joe@bitworking.org)"
16__copyright__ = "Copyright 2006, Joe Gregorio"
17__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
18 "James Antill",
19 "Xavier Verges Farrero",
20 "Jonathan Feinberg",
21 "Blair Zajac",
22 "Sam Ruby",
23 "Louis Nyffenegger"]
24__license__ = "MIT"
Joe Gregorioe7518002011-06-14 10:56:36 -040025__version__ = "0.7.0"
Joe Gregorio845a5452010-09-08 13:50:34 -040026
27import re
28import sys
29import email
30import email.Utils
31import email.Message
32import email.FeedParser
33import StringIO
34import gzip
35import zlib
36import httplib
37import urlparse
38import base64
39import os
40import copy
41import calendar
42import time
43import random
44import errno
45# remove depracated warning in python2.6
46try:
47 from hashlib import sha1 as _sha, md5 as _md5
48except ImportError:
49 import sha
50 import md5
51 _sha = sha.new
52 _md5 = md5.new
53import hmac
54from gettext import gettext as _
55import socket
56
57try:
Joe Gregorio5e3a5fa2010-10-11 13:03:56 -040058 from httplib2 import socks
Joe Gregorio845a5452010-09-08 13:50:34 -040059except ImportError:
Joe Gregorio5e3a5fa2010-10-11 13:03:56 -040060 socks = None
Joe Gregorio845a5452010-09-08 13:50:34 -040061
62# Build the appropriate socket wrapper for ssl
63try:
64 import ssl # python 2.6
Joe Gregorioe7518002011-06-14 10:56:36 -040065 ssl_SSLError = ssl.SSLError
66 def _ssl_wrap_socket(sock, key_file, cert_file,
67 disable_validation, ca_certs):
68 if disable_validation:
69 cert_reqs = ssl.CERT_NONE
70 else:
71 cert_reqs = ssl.CERT_REQUIRED
72 # We should be specifying SSL version 3 or TLS v1, but the ssl module
73 # doesn't expose the necessary knobs. So we need to go with the default
74 # of SSLv23.
75 return ssl.wrap_socket(sock, keyfile=key_file, certfile=cert_file,
76 cert_reqs=cert_reqs, ca_certs=ca_certs)
Joe Gregoriof8434792011-05-03 09:11:49 -040077except (AttributeError, ImportError):
Joe Gregorioe7518002011-06-14 10:56:36 -040078 ssl_SSLError = None
79 def _ssl_wrap_socket(sock, key_file, cert_file,
80 disable_validation, ca_certs):
81 if not disable_validation:
82 raise CertificateValidationUnsupported(
83 "SSL certificate validation is not supported without "
84 "the ssl module installed. To avoid this error, install "
85 "the ssl module, or explicity disable validation.")
Joe Gregorio845a5452010-09-08 13:50:34 -040086 ssl_sock = socket.ssl(sock, key_file, cert_file)
87 return httplib.FakeSocket(sock, ssl_sock)
88
89
90if sys.version_info >= (2,3):
91 from iri2uri import iri2uri
92else:
93 def iri2uri(uri):
94 return uri
95
96def has_timeout(timeout): # python 2.6
97 if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):
98 return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)
99 return (timeout is not None)
100
101__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
102 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
103 'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
Joe Gregorio5e3a5fa2010-10-11 13:03:56 -0400104 'debuglevel', 'ProxiesUnavailableError']
Joe Gregorio845a5452010-09-08 13:50:34 -0400105
106
107# The httplib debug level, set to a non-zero value to get debug output
108debuglevel = 0
109
110
111# Python 2.3 support
112if sys.version_info < (2,4):
113 def sorted(seq):
114 seq.sort()
115 return seq
116
117# Python 2.3 support
118def HTTPResponse__getheaders(self):
119 """Return list of (header, value) tuples."""
120 if self.msg is None:
121 raise httplib.ResponseNotReady()
122 return self.msg.items()
123
124if not hasattr(httplib.HTTPResponse, 'getheaders'):
125 httplib.HTTPResponse.getheaders = HTTPResponse__getheaders
126
127# All exceptions raised here derive from HttpLib2Error
128class HttpLib2Error(Exception): pass
129
130# Some exceptions can be caught and optionally
131# be turned back into responses.
132class HttpLib2ErrorWithResponse(HttpLib2Error):
133 def __init__(self, desc, response, content):
134 self.response = response
135 self.content = content
136 HttpLib2Error.__init__(self, desc)
137
138class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
139class RedirectLimit(HttpLib2ErrorWithResponse): pass
140class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
141class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
142class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
143
Joe Gregoriof8434792011-05-03 09:11:49 -0400144class MalformedHeader(HttpLib2Error): pass
Joe Gregorio845a5452010-09-08 13:50:34 -0400145class RelativeURIError(HttpLib2Error): pass
146class ServerNotFoundError(HttpLib2Error): pass
Joe Gregorio5e3a5fa2010-10-11 13:03:56 -0400147class ProxiesUnavailableError(HttpLib2Error): pass
Joe Gregorioe7518002011-06-14 10:56:36 -0400148class CertificateValidationUnsupported(HttpLib2Error): pass
149class SSLHandshakeError(HttpLib2Error): pass
150class CertificateHostnameMismatch(SSLHandshakeError):
151 def __init__(self, desc, host, cert):
152 HttpLib2Error.__init__(self, desc)
153 self.host = host
154 self.cert = cert
Joe Gregorio845a5452010-09-08 13:50:34 -0400155
156# Open Items:
157# -----------
158# Proxy support
159
160# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
161
162# Pluggable cache storage (supports storing the cache in
163# flat files by default. We need a plug-in architecture
164# that can support Berkeley DB and Squid)
165
166# == Known Issues ==
167# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
168# Does not handle Cache-Control: max-stale
169# Does not use Age: headers when calculating cache freshness.
170
171
172# The number of redirections to follow before giving up.
173# Note that only GET redirects are automatically followed.
174# Will also honor 301 requests by saving that info and never
175# requesting that URI again.
176DEFAULT_MAX_REDIRECTS = 5
177
Joe Gregorioe7518002011-06-14 10:56:36 -0400178# Default CA certificates file bundled with httplib2.
179CA_CERTS = os.path.join(
180 os.path.dirname(os.path.abspath(__file__ )), "cacerts.txt")
181
Joe Gregorio845a5452010-09-08 13:50:34 -0400182# Which headers are hop-by-hop headers by default
183HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']
184
185def _get_end2end_headers(response):
186 hopbyhop = list(HOP_BY_HOP)
187 hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])
188 return [header for header in response.keys() if header not in hopbyhop]
189
190URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
191
192def parse_uri(uri):
193 """Parses a URI using the regex given in Appendix B of RFC 3986.
194
195 (scheme, authority, path, query, fragment) = parse_uri(uri)
196 """
197 groups = URI.match(uri).groups()
198 return (groups[1], groups[3], groups[4], groups[6], groups[8])
199
200def urlnorm(uri):
201 (scheme, authority, path, query, fragment) = parse_uri(uri)
202 if not scheme or not authority:
203 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
204 authority = authority.lower()
205 scheme = scheme.lower()
206 if not path:
207 path = "/"
208 # Could do syntax based normalization of the URI before
209 # computing the digest. See Section 6.2.2 of Std 66.
210 request_uri = query and "?".join([path, query]) or path
211 scheme = scheme.lower()
212 defrag_uri = scheme + "://" + authority + request_uri
213 return scheme, authority, request_uri, defrag_uri
214
215
216# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
217re_url_scheme = re.compile(r'^\w+://')
218re_slash = re.compile(r'[?/:|]+')
219
220def safename(filename):
221 """Return a filename suitable for the cache.
222
223 Strips dangerous and common characters to create a filename we
224 can use to store the cache in.
225 """
226
227 try:
228 if re_url_scheme.match(filename):
229 if isinstance(filename,str):
230 filename = filename.decode('utf-8')
231 filename = filename.encode('idna')
232 else:
233 filename = filename.encode('idna')
234 except UnicodeError:
235 pass
236 if isinstance(filename,unicode):
237 filename=filename.encode('utf-8')
238 filemd5 = _md5(filename).hexdigest()
239 filename = re_url_scheme.sub("", filename)
240 filename = re_slash.sub(",", filename)
241
242 # limit length of filename
243 if len(filename)>200:
244 filename=filename[:200]
245 return ",".join((filename, filemd5))
246
247NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
248def _normalize_headers(headers):
249 return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()])
250
251def _parse_cache_control(headers):
252 retval = {}
253 if headers.has_key('cache-control'):
254 parts = headers['cache-control'].split(',')
255 parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]
256 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
257 retval = dict(parts_with_args + parts_wo_args)
258 return retval
259
260# Whether to use a strict mode to parse WWW-Authenticate headers
261# Might lead to bad results in case of ill-formed header value,
262# so disabled by default, falling back to relaxed parsing.
263# Set to true to turn on, usefull for testing servers.
264USE_WWW_AUTH_STRICT_PARSING = 0
265
266# In regex below:
267# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP
268# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
269# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
270# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
271WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")
272WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")
273UNQUOTE_PAIRS = re.compile(r'\\(.)')
274def _parse_www_authenticate(headers, headername='www-authenticate'):
275 """Returns a dictionary of dictionaries, one dict
276 per auth_scheme."""
277 retval = {}
278 if headers.has_key(headername):
Joe Gregoriof8434792011-05-03 09:11:49 -0400279 try:
280 authenticate = headers[headername].strip()
281 www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
282 while authenticate:
283 # Break off the scheme at the beginning of the line
284 if headername == 'authentication-info':
285 (auth_scheme, the_rest) = ('digest', authenticate)
286 else:
287 (auth_scheme, the_rest) = authenticate.split(" ", 1)
288 # Now loop over all the key value pairs that come after the scheme,
289 # being careful not to roll into the next scheme
290 match = www_auth.search(the_rest)
291 auth_params = {}
292 while match:
293 if match and len(match.groups()) == 3:
294 (key, value, the_rest) = match.groups()
295 auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
296 match = www_auth.search(the_rest)
297 retval[auth_scheme.lower()] = auth_params
298 authenticate = the_rest.strip()
299 except ValueError:
300 raise MalformedHeader("WWW-Authenticate")
Joe Gregorio845a5452010-09-08 13:50:34 -0400301 return retval
302
303
304def _entry_disposition(response_headers, request_headers):
305 """Determine freshness from the Date, Expires and Cache-Control headers.
306
307 We don't handle the following:
308
309 1. Cache-Control: max-stale
310 2. Age: headers are not used in the calculations.
311
312 Not that this algorithm is simpler than you might think
313 because we are operating as a private (non-shared) cache.
314 This lets us ignore 's-maxage'. We can also ignore
315 'proxy-invalidate' since we aren't a proxy.
316 We will never return a stale document as
317 fresh as a design decision, and thus the non-implementation
318 of 'max-stale'. This also lets us safely ignore 'must-revalidate'
319 since we operate as if every server has sent 'must-revalidate'.
320 Since we are private we get to ignore both 'public' and
321 'private' parameters. We also ignore 'no-transform' since
322 we don't do any transformations.
323 The 'no-store' parameter is handled at a higher level.
324 So the only Cache-Control parameters we look at are:
325
326 no-cache
327 only-if-cached
328 max-age
329 min-fresh
330 """
331
332 retval = "STALE"
333 cc = _parse_cache_control(request_headers)
334 cc_response = _parse_cache_control(response_headers)
335
336 if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
337 retval = "TRANSPARENT"
338 if 'cache-control' not in request_headers:
339 request_headers['cache-control'] = 'no-cache'
340 elif cc.has_key('no-cache'):
341 retval = "TRANSPARENT"
342 elif cc_response.has_key('no-cache'):
343 retval = "STALE"
344 elif cc.has_key('only-if-cached'):
345 retval = "FRESH"
346 elif response_headers.has_key('date'):
347 date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))
348 now = time.time()
349 current_age = max(0, now - date)
350 if cc_response.has_key('max-age'):
351 try:
352 freshness_lifetime = int(cc_response['max-age'])
353 except ValueError:
354 freshness_lifetime = 0
355 elif response_headers.has_key('expires'):
356 expires = email.Utils.parsedate_tz(response_headers['expires'])
357 if None == expires:
358 freshness_lifetime = 0
359 else:
360 freshness_lifetime = max(0, calendar.timegm(expires) - date)
361 else:
362 freshness_lifetime = 0
363 if cc.has_key('max-age'):
364 try:
365 freshness_lifetime = int(cc['max-age'])
366 except ValueError:
367 freshness_lifetime = 0
368 if cc.has_key('min-fresh'):
369 try:
370 min_fresh = int(cc['min-fresh'])
371 except ValueError:
372 min_fresh = 0
373 current_age += min_fresh
374 if freshness_lifetime > current_age:
375 retval = "FRESH"
376 return retval
377
378def _decompressContent(response, new_content):
379 content = new_content
380 try:
381 encoding = response.get('content-encoding', None)
382 if encoding in ['gzip', 'deflate']:
383 if encoding == 'gzip':
384 content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
385 if encoding == 'deflate':
386 content = zlib.decompress(content)
387 response['content-length'] = str(len(content))
388 # Record the historical presence of the encoding in a way the won't interfere.
389 response['-content-encoding'] = response['content-encoding']
390 del response['content-encoding']
391 except IOError:
392 content = ""
393 raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
394 return content
395
396def _updateCache(request_headers, response_headers, content, cache, cachekey):
397 if cachekey:
398 cc = _parse_cache_control(request_headers)
399 cc_response = _parse_cache_control(response_headers)
400 if cc.has_key('no-store') or cc_response.has_key('no-store'):
401 cache.delete(cachekey)
402 else:
403 info = email.Message.Message()
404 for key, value in response_headers.iteritems():
405 if key not in ['status','content-encoding','transfer-encoding']:
406 info[key] = value
407
408 # Add annotations to the cache to indicate what headers
409 # are variant for this request.
410 vary = response_headers.get('vary', None)
411 if vary:
412 vary_headers = vary.lower().replace(' ', '').split(',')
413 for header in vary_headers:
414 key = '-varied-%s' % header
415 try:
416 info[key] = request_headers[header]
417 except KeyError:
418 pass
419
420 status = response_headers.status
421 if status == 304:
422 status = 200
423
Joe Gregoriof8434792011-05-03 09:11:49 -0400424 status_header = 'status: %d\r\n' % status
Joe Gregorio845a5452010-09-08 13:50:34 -0400425
426 header_str = info.as_string()
427
428 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
429 text = "".join([status_header, header_str, content])
430
431 cache.set(cachekey, text)
432
433def _cnonce():
434 dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
435 return dig[:16]
436
437def _wsse_username_token(cnonce, iso_now, password):
438 return base64.b64encode(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
439
440
441# For credentials we need two things, first
442# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
443# Then we also need a list of URIs that have already demanded authentication
444# That list is tricky since sub-URIs can take the same auth, or the
445# auth scheme may change as you descend the tree.
446# So we also need each Auth instance to be able to tell us
447# how close to the 'top' it is.
448
449class Authentication(object):
450 def __init__(self, credentials, host, request_uri, headers, response, content, http):
451 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
452 self.path = path
453 self.host = host
454 self.credentials = credentials
455 self.http = http
456
457 def depth(self, request_uri):
458 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
459 return request_uri[len(self.path):].count("/")
460
461 def inscope(self, host, request_uri):
462 # XXX Should we normalize the request_uri?
463 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
464 return (host == self.host) and path.startswith(self.path)
465
466 def request(self, method, request_uri, headers, content):
467 """Modify the request headers to add the appropriate
468 Authorization header. Over-rise this in sub-classes."""
469 pass
470
471 def response(self, response, content):
472 """Gives us a chance to update with new nonces
473 or such returned from the last authorized response.
474 Over-rise this in sub-classes if necessary.
475
476 Return TRUE is the request is to be retried, for
477 example Digest may return stale=true.
478 """
479 return False
480
481
482
483class BasicAuthentication(Authentication):
484 def __init__(self, credentials, host, request_uri, headers, response, content, http):
485 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
486
487 def request(self, method, request_uri, headers, content):
488 """Modify the request headers to add the appropriate
489 Authorization header."""
490 headers['authorization'] = 'Basic ' + base64.b64encode("%s:%s" % self.credentials).strip()
491
492
493class DigestAuthentication(Authentication):
494 """Only do qop='auth' and MD5, since that
495 is all Apache currently implements"""
496 def __init__(self, credentials, host, request_uri, headers, response, content, http):
497 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
498 challenge = _parse_www_authenticate(response, 'www-authenticate')
499 self.challenge = challenge['digest']
500 qop = self.challenge.get('qop', 'auth')
501 self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
502 if self.challenge['qop'] is None:
503 raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
504 self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()
505 if self.challenge['algorithm'] != 'MD5':
506 raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
507 self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
508 self.challenge['nc'] = 1
509
510 def request(self, method, request_uri, headers, content, cnonce = None):
511 """Modify the request headers"""
512 H = lambda x: _md5(x).hexdigest()
513 KD = lambda s, d: H("%s:%s" % (s, d))
514 A2 = "".join([method, ":", request_uri])
515 self.challenge['cnonce'] = cnonce or _cnonce()
516 request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'],
517 '%08x' % self.challenge['nc'],
518 self.challenge['cnonce'],
519 self.challenge['qop'], H(A2)
520 ))
Joe Gregorioe7518002011-06-14 10:56:36 -0400521 headers['authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
Joe Gregorio845a5452010-09-08 13:50:34 -0400522 self.credentials[0],
523 self.challenge['realm'],
524 self.challenge['nonce'],
525 request_uri,
526 self.challenge['algorithm'],
527 request_digest,
528 self.challenge['qop'],
529 self.challenge['nc'],
530 self.challenge['cnonce'],
531 )
532 self.challenge['nc'] += 1
533
534 def response(self, response, content):
535 if not response.has_key('authentication-info'):
536 challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
537 if 'true' == challenge.get('stale'):
538 self.challenge['nonce'] = challenge['nonce']
539 self.challenge['nc'] = 1
540 return True
541 else:
542 updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
543
544 if updated_challenge.has_key('nextnonce'):
545 self.challenge['nonce'] = updated_challenge['nextnonce']
546 self.challenge['nc'] = 1
547 return False
548
549
550class HmacDigestAuthentication(Authentication):
551 """Adapted from Robert Sayre's code and DigestAuthentication above."""
552 __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
553
554 def __init__(self, credentials, host, request_uri, headers, response, content, http):
555 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
556 challenge = _parse_www_authenticate(response, 'www-authenticate')
557 self.challenge = challenge['hmacdigest']
558 # TODO: self.challenge['domain']
559 self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
560 if self.challenge['reason'] not in ['unauthorized', 'integrity']:
561 self.challenge['reason'] = 'unauthorized'
562 self.challenge['salt'] = self.challenge.get('salt', '')
563 if not self.challenge.get('snonce'):
564 raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))
565 self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')
566 if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
567 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
568 self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')
569 if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
570 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
571 if self.challenge['algorithm'] == 'HMAC-MD5':
572 self.hashmod = _md5
573 else:
574 self.hashmod = _sha
575 if self.challenge['pw-algorithm'] == 'MD5':
576 self.pwhashmod = _md5
577 else:
578 self.pwhashmod = _sha
579 self.key = "".join([self.credentials[0], ":",
580 self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
581 ":", self.challenge['realm']
582 ])
583 self.key = self.pwhashmod.new(self.key).hexdigest().lower()
584
585 def request(self, method, request_uri, headers, content):
586 """Modify the request headers"""
587 keys = _get_end2end_headers(headers)
588 keylist = "".join(["%s " % k for k in keys])
589 headers_val = "".join([headers[k] for k in keys])
590 created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
591 cnonce = _cnonce()
592 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
593 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
Joe Gregorioe7518002011-06-14 10:56:36 -0400594 headers['authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
Joe Gregorio845a5452010-09-08 13:50:34 -0400595 self.credentials[0],
596 self.challenge['realm'],
597 self.challenge['snonce'],
598 cnonce,
599 request_uri,
600 created,
601 request_digest,
602 keylist,
603 )
604
605 def response(self, response, content):
606 challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})
607 if challenge.get('reason') in ['integrity', 'stale']:
608 return True
609 return False
610
611
612class WsseAuthentication(Authentication):
613 """This is thinly tested and should not be relied upon.
614 At this time there isn't any third party server to test against.
615 Blogger and TypePad implemented this algorithm at one point
616 but Blogger has since switched to Basic over HTTPS and
617 TypePad has implemented it wrong, by never issuing a 401
618 challenge but instead requiring your client to telepathically know that
619 their endpoint is expecting WSSE profile="UsernameToken"."""
620 def __init__(self, credentials, host, request_uri, headers, response, content, http):
621 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
622
623 def request(self, method, request_uri, headers, content):
624 """Modify the request headers to add the appropriate
625 Authorization header."""
Joe Gregorioe7518002011-06-14 10:56:36 -0400626 headers['authorization'] = 'WSSE profile="UsernameToken"'
Joe Gregorio845a5452010-09-08 13:50:34 -0400627 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
628 cnonce = _cnonce()
629 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
630 headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
631 self.credentials[0],
632 password_digest,
633 cnonce,
634 iso_now)
635
636class GoogleLoginAuthentication(Authentication):
637 def __init__(self, credentials, host, request_uri, headers, response, content, http):
638 from urllib import urlencode
639 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
640 challenge = _parse_www_authenticate(response, 'www-authenticate')
641 service = challenge['googlelogin'].get('service', 'xapi')
642 # Bloggger actually returns the service in the challenge
643 # For the rest we guess based on the URI
644 if service == 'xapi' and request_uri.find("calendar") > 0:
645 service = "cl"
646 # No point in guessing Base or Spreadsheet
647 #elif request_uri.find("spreadsheets") > 0:
648 # service = "wise"
649
650 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
651 resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
652 lines = content.split('\n')
653 d = dict([tuple(line.split("=", 1)) for line in lines if line])
654 if resp.status == 403:
655 self.Auth = ""
656 else:
657 self.Auth = d['Auth']
658
659 def request(self, method, request_uri, headers, content):
660 """Modify the request headers to add the appropriate
661 Authorization header."""
662 headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
663
664
665AUTH_SCHEME_CLASSES = {
666 "basic": BasicAuthentication,
667 "wsse": WsseAuthentication,
668 "digest": DigestAuthentication,
669 "hmacdigest": HmacDigestAuthentication,
670 "googlelogin": GoogleLoginAuthentication
671}
672
673AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
674
675class FileCache(object):
676 """Uses a local directory as a store for cached files.
677 Not really safe to use if multiple threads or processes are going to
678 be running on the same cache.
679 """
680 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
681 self.cache = cache
682 self.safe = safe
683 if not os.path.exists(cache):
684 os.makedirs(self.cache)
685
686 def get(self, key):
687 retval = None
688 cacheFullPath = os.path.join(self.cache, self.safe(key))
689 try:
690 f = file(cacheFullPath, "rb")
691 retval = f.read()
692 f.close()
693 except IOError:
694 pass
695 return retval
696
697 def set(self, key, value):
698 cacheFullPath = os.path.join(self.cache, self.safe(key))
699 f = file(cacheFullPath, "wb")
700 f.write(value)
701 f.close()
702
703 def delete(self, key):
704 cacheFullPath = os.path.join(self.cache, self.safe(key))
705 if os.path.exists(cacheFullPath):
706 os.remove(cacheFullPath)
707
708class Credentials(object):
709 def __init__(self):
710 self.credentials = []
711
712 def add(self, name, password, domain=""):
713 self.credentials.append((domain.lower(), name, password))
714
715 def clear(self):
716 self.credentials = []
717
718 def iter(self, domain):
719 for (cdomain, name, password) in self.credentials:
720 if cdomain == "" or domain == cdomain:
721 yield (name, password)
722
723class KeyCerts(Credentials):
724 """Identical to Credentials except that
725 name/password are mapped to key/cert."""
726 pass
727
728
729class ProxyInfo(object):
730 """Collect information required to use a proxy."""
731 def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
732 """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
733 constants. For example:
734
735p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)
736 """
737 self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
738
739 def astuple(self):
740 return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
741 self.proxy_user, self.proxy_pass)
742
743 def isgood(self):
744 return (self.proxy_host != None) and (self.proxy_port != None)
745
746
747class HTTPConnectionWithTimeout(httplib.HTTPConnection):
Joe Gregoriof8434792011-05-03 09:11:49 -0400748 """
749 HTTPConnection subclass that supports timeouts
750
751 All timeouts are in seconds. If None is passed for timeout then
752 Python's default timeout for sockets will be used. See for example
753 the docs of socket.setdefaulttimeout():
754 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
755 """
Joe Gregorio845a5452010-09-08 13:50:34 -0400756
757 def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
758 httplib.HTTPConnection.__init__(self, host, port, strict)
759 self.timeout = timeout
760 self.proxy_info = proxy_info
761
762 def connect(self):
763 """Connect to the host and port specified in __init__."""
764 # Mostly verbatim from httplib.py.
Joe Gregorio5e3a5fa2010-10-11 13:03:56 -0400765 if self.proxy_info and socks is None:
766 raise ProxiesUnavailableError(
767 'Proxy support missing but proxy use was requested!')
Joe Gregorio845a5452010-09-08 13:50:34 -0400768 msg = "getaddrinfo returns an empty list"
769 for res in socket.getaddrinfo(self.host, self.port, 0,
770 socket.SOCK_STREAM):
771 af, socktype, proto, canonname, sa = res
772 try:
773 if self.proxy_info and self.proxy_info.isgood():
774 self.sock = socks.socksocket(af, socktype, proto)
775 self.sock.setproxy(*self.proxy_info.astuple())
776 else:
777 self.sock = socket.socket(af, socktype, proto)
778 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
779 # Different from httplib: support timeouts.
780 if has_timeout(self.timeout):
781 self.sock.settimeout(self.timeout)
782 # End of difference from httplib.
783 if self.debuglevel > 0:
784 print "connect: (%s, %s)" % (self.host, self.port)
785
786 self.sock.connect(sa)
787 except socket.error, msg:
788 if self.debuglevel > 0:
789 print 'connect fail:', (self.host, self.port)
790 if self.sock:
791 self.sock.close()
792 self.sock = None
793 continue
794 break
795 if not self.sock:
796 raise socket.error, msg
797
798class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
Joe Gregoriof8434792011-05-03 09:11:49 -0400799 """
800 This class allows communication via SSL.
Joe Gregorio845a5452010-09-08 13:50:34 -0400801
Joe Gregoriof8434792011-05-03 09:11:49 -0400802 All timeouts are in seconds. If None is passed for timeout then
803 Python's default timeout for sockets will be used. See for example
804 the docs of socket.setdefaulttimeout():
805 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
806 """
Joe Gregorio845a5452010-09-08 13:50:34 -0400807 def __init__(self, host, port=None, key_file=None, cert_file=None,
Joe Gregorioe7518002011-06-14 10:56:36 -0400808 strict=None, timeout=None, proxy_info=None,
809 ca_certs=None, disable_ssl_certificate_validation=False):
Joe Gregorio845a5452010-09-08 13:50:34 -0400810 httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,
811 cert_file=cert_file, strict=strict)
812 self.timeout = timeout
813 self.proxy_info = proxy_info
Joe Gregorioe7518002011-06-14 10:56:36 -0400814 if ca_certs is None:
815 ca_certs = CA_CERTS
816 self.ca_certs = ca_certs
817 self.disable_ssl_certificate_validation = \
818 disable_ssl_certificate_validation
819
820 # The following two methods were adapted from https_wrapper.py, released
821 # with the Google Appengine SDK at
822 # http://googleappengine.googlecode.com/svn-history/r136/trunk/python/google/appengine/tools/https_wrapper.py
823 # under the following license:
824 #
825 # Copyright 2007 Google Inc.
826 #
827 # Licensed under the Apache License, Version 2.0 (the "License");
828 # you may not use this file except in compliance with the License.
829 # You may obtain a copy of the License at
830 #
831 # http://www.apache.org/licenses/LICENSE-2.0
832 #
833 # Unless required by applicable law or agreed to in writing, software
834 # distributed under the License is distributed on an "AS IS" BASIS,
835 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
836 # See the License for the specific language governing permissions and
837 # limitations under the License.
838 #
839
840 def _GetValidHostsForCert(self, cert):
841 """Returns a list of valid host globs for an SSL certificate.
842
843 Args:
844 cert: A dictionary representing an SSL certificate.
845 Returns:
846 list: A list of valid host globs.
847 """
848 if 'subjectAltName' in cert:
849 return [x[1] for x in cert['subjectAltName']
850 if x[0].lower() == 'dns']
851 else:
852 return [x[0][1] for x in cert['subject']
853 if x[0][0].lower() == 'commonname']
854
855 def _ValidateCertificateHostname(self, cert, hostname):
856 """Validates that a given hostname is valid for an SSL certificate.
857
858 Args:
859 cert: A dictionary representing an SSL certificate.
860 hostname: The hostname to test.
861 Returns:
862 bool: Whether or not the hostname is valid for this certificate.
863 """
864 hosts = self._GetValidHostsForCert(cert)
865 for host in hosts:
866 host_re = host.replace('.', '\.').replace('*', '[^.]*')
867 if re.search('^%s$' % (host_re,), hostname, re.I):
868 return True
869 return False
Joe Gregorio845a5452010-09-08 13:50:34 -0400870
871 def connect(self):
872 "Connect to a host on a given (SSL) port."
873
Joe Gregoriof8434792011-05-03 09:11:49 -0400874 msg = "getaddrinfo returns an empty list"
875 for family, socktype, proto, canonname, sockaddr in socket.getaddrinfo(
876 self.host, self.port, 0, socket.SOCK_STREAM):
877 try:
878 if self.proxy_info and self.proxy_info.isgood():
879 sock = socks.socksocket(family, socktype, proto)
880 sock.setproxy(*self.proxy_info.astuple())
881 else:
882 sock = socket.socket(family, socktype, proto)
883 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
Joe Gregorio845a5452010-09-08 13:50:34 -0400884
Joe Gregoriof8434792011-05-03 09:11:49 -0400885 if has_timeout(self.timeout):
886 sock.settimeout(self.timeout)
887 sock.connect((self.host, self.port))
Joe Gregorioe7518002011-06-14 10:56:36 -0400888 self.sock =_ssl_wrap_socket(
889 sock, self.key_file, self.cert_file,
890 self.disable_ssl_certificate_validation, self.ca_certs)
Joe Gregoriof8434792011-05-03 09:11:49 -0400891 if self.debuglevel > 0:
892 print "connect: (%s, %s)" % (self.host, self.port)
Joe Gregorioe7518002011-06-14 10:56:36 -0400893 if not self.disable_ssl_certificate_validation:
894 cert = self.sock.getpeercert()
895 hostname = self.host.split(':', 0)[0]
896 if not self._ValidateCertificateHostname(cert, hostname):
897 raise CertificateHostnameMismatch(
898 'Server presented certificate that does not match '
899 'host %s: %s' % (hostname, cert), hostname, cert)
900 except ssl_SSLError, e:
901 if sock:
902 sock.close()
903 if self.sock:
904 self.sock.close()
905 self.sock = None
906 # Unfortunately the ssl module doesn't seem to provide any way
907 # to get at more detailed error information, in particular
908 # whether the error is due to certificate validation or
909 # something else (such as SSL protocol mismatch).
910 if e.errno == ssl.SSL_ERROR_SSL:
911 raise SSLHandshakeError(e)
912 else:
913 raise
914 except (socket.timeout, socket.gaierror):
915 raise
Joe Gregoriof8434792011-05-03 09:11:49 -0400916 except socket.error, msg:
917 if self.debuglevel > 0:
918 print 'connect fail:', (self.host, self.port)
919 if self.sock:
920 self.sock.close()
921 self.sock = None
922 continue
923 break
924 if not self.sock:
925 raise socket.error, msg
Joe Gregorio845a5452010-09-08 13:50:34 -0400926
Joe Gregorioe7518002011-06-14 10:56:36 -0400927SCHEME_TO_CONNECTION = {
928 'http': HTTPConnectionWithTimeout,
929 'https': HTTPSConnectionWithTimeout
930 }
931
932# Use a different connection object for Google App Engine
933try:
934 from google.appengine.api.urlfetch import fetch
935 from google.appengine.api.urlfetch import InvalidURLError
936 from google.appengine.api.urlfetch import DownloadError
937 from google.appengine.api.urlfetch import ResponseTooLargeError
938 from google.appengine.api.urlfetch import SSLCertificateError
939
940
941 class ResponseDict(dict):
942 """Is a dictionary that also has a read() method, so
943 that it can pass itself off as an httlib.HTTPResponse()."""
944 def read(self):
945 pass
946
947
948 class AppEngineHttpConnection(object):
949 """Emulates an httplib.HTTPConnection object, but actually uses the Google
950 App Engine urlfetch library. This allows the timeout to be properly used on
951 Google App Engine, and avoids using httplib, which on Google App Engine is
952 just another wrapper around urlfetch.
953 """
954 def __init__(self, host, port=None, key_file=None, cert_file=None,
955 strict=None, timeout=None, proxy_info=None, ca_certs=None,
956 disable_certificate_validation=False):
957 self.host = host
958 self.port = port
959 self.timeout = timeout
960 if key_file or cert_file or proxy_info or ca_certs:
961 raise NotSupportedOnThisPlatform()
962 self.response = None
963 self.scheme = 'http'
964 self.validate_certificate = not disable_certificate_validation
965 self.sock = True
966
967 def request(self, method, url, body, headers):
968 # Calculate the absolute URI, which fetch requires
969 netloc = self.host
970 if self.port:
971 netloc = '%s:%s' % (self.host, self.port)
972 absolute_uri = '%s://%s%s' % (self.scheme, netloc, url)
973 try:
974 response = fetch(absolute_uri, payload=body, method=method,
975 headers=headers, allow_truncated=False, follow_redirects=False,
976 deadline=self.timeout,
977 validate_certificate=self.validate_certificate)
978 self.response = ResponseDict(response.headers)
979 self.response['status'] = response.status_code
980 setattr(self.response, 'read', lambda : response.content)
981
982 # Make sure the exceptions raised match the exceptions expected.
983 except InvalidURLError:
984 raise socket.gaierror('')
985 except (DownloadError, ResponseTooLargeError, SSLCertificateError):
986 raise httplib.HTTPException()
987
988 def getresponse(self):
989 return self.response
990
991 def set_debuglevel(self, level):
992 pass
993
994 def connect(self):
995 pass
996
997 def close(self):
998 pass
999
1000
1001 class AppEngineHttpsConnection(AppEngineHttpConnection):
1002 """Same as AppEngineHttpConnection, but for HTTPS URIs."""
1003 def __init__(self, host, port=None, key_file=None, cert_file=None,
1004 strict=None, timeout=None, proxy_info=None):
1005 AppEngineHttpConnection.__init__(self, host, port, key_file, cert_file,
1006 strict, timeout, proxy_info)
1007 self.scheme = 'https'
1008
1009 # Update the connection classes to use the Googel App Engine specific ones.
1010 SCHEME_TO_CONNECTION = {
1011 'http': AppEngineHttpConnection,
1012 'https': AppEngineHttpsConnection
1013 }
1014
1015except ImportError:
1016 pass
Joe Gregorio845a5452010-09-08 13:50:34 -04001017
1018
1019class Http(object):
1020 """An HTTP client that handles:
1021- all methods
1022- caching
1023- ETags
1024- compression,
1025- HTTPS
1026- Basic
1027- Digest
1028- WSSE
1029
1030and more.
1031 """
Joe Gregorioe7518002011-06-14 10:56:36 -04001032 def __init__(self, cache=None, timeout=None, proxy_info=None,
1033 ca_certs=None, disable_ssl_certificate_validation=False):
Joe Gregoriof8434792011-05-03 09:11:49 -04001034 """
1035 The value of proxy_info is a ProxyInfo instance.
Joe Gregorio845a5452010-09-08 13:50:34 -04001036
Joe Gregoriof8434792011-05-03 09:11:49 -04001037 If 'cache' is a string then it is used as a directory name for
1038 a disk cache. Otherwise it must be an object that supports the
1039 same interface as FileCache.
1040
1041 All timeouts are in seconds. If None is passed for timeout
1042 then Python's default timeout for sockets will be used. See
1043 for example the docs of socket.setdefaulttimeout():
1044 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
Joe Gregorioe7518002011-06-14 10:56:36 -04001045
1046 ca_certs is the path of a file containing root CA certificates for SSL
1047 server certificate validation. By default, a CA cert file bundled with
1048 httplib2 is used.
1049
1050 If disable_ssl_certificate_validation is true, SSL cert validation will
1051 not be performed.
Joe Gregoriof8434792011-05-03 09:11:49 -04001052 """
Joe Gregorio845a5452010-09-08 13:50:34 -04001053 self.proxy_info = proxy_info
Joe Gregorioe7518002011-06-14 10:56:36 -04001054 self.ca_certs = ca_certs
1055 self.disable_ssl_certificate_validation = \
1056 disable_ssl_certificate_validation
1057
Joe Gregorio845a5452010-09-08 13:50:34 -04001058 # Map domain name to an httplib connection
1059 self.connections = {}
1060 # The location of the cache, for now a directory
1061 # where cached responses are held.
Joe Gregorioe7518002011-06-14 10:56:36 -04001062 if cache and isinstance(cache, basestring):
Joe Gregorio845a5452010-09-08 13:50:34 -04001063 self.cache = FileCache(cache)
1064 else:
1065 self.cache = cache
1066
1067 # Name/password
1068 self.credentials = Credentials()
1069
1070 # Key/cert
1071 self.certificates = KeyCerts()
1072
1073 # authorization objects
1074 self.authorizations = []
1075
1076 # If set to False then no redirects are followed, even safe ones.
1077 self.follow_redirects = True
1078
1079 # Which HTTP methods do we apply optimistic concurrency to, i.e.
1080 # which methods get an "if-match:" etag header added to them.
Joe Gregorioe7518002011-06-14 10:56:36 -04001081 self.optimistic_concurrency_methods = ["PUT", "PATCH"]
Joe Gregorio845a5452010-09-08 13:50:34 -04001082
1083 # If 'follow_redirects' is True, and this is set to True then
1084 # all redirecs are followed, including unsafe ones.
1085 self.follow_all_redirects = False
1086
1087 self.ignore_etag = False
1088
1089 self.force_exception_to_status_code = False
1090
1091 self.timeout = timeout
1092
1093 def _auth_from_challenge(self, host, request_uri, headers, response, content):
1094 """A generator that creates Authorization objects
1095 that can be applied to requests.
1096 """
1097 challenges = _parse_www_authenticate(response, 'www-authenticate')
1098 for cred in self.credentials.iter(host):
1099 for scheme in AUTH_SCHEME_ORDER:
1100 if challenges.has_key(scheme):
1101 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
1102
1103 def add_credentials(self, name, password, domain=""):
1104 """Add a name and password that will be used
1105 any time a request requires authentication."""
1106 self.credentials.add(name, password, domain)
1107
1108 def add_certificate(self, key, cert, domain):
1109 """Add a key and cert that will be used
1110 any time a request requires authentication."""
1111 self.certificates.add(key, cert, domain)
1112
1113 def clear_credentials(self):
1114 """Remove all the names and passwords
1115 that are used for authentication"""
1116 self.credentials.clear()
1117 self.authorizations = []
1118
1119 def _conn_request(self, conn, request_uri, method, body, headers):
1120 for i in range(2):
1121 try:
Joe Gregorioe7518002011-06-14 10:56:36 -04001122 if conn.sock is None:
1123 conn.connect()
Joe Gregorio845a5452010-09-08 13:50:34 -04001124 conn.request(method, request_uri, body, headers)
Joe Gregoriof8434792011-05-03 09:11:49 -04001125 except socket.timeout:
1126 raise
Joe Gregorio845a5452010-09-08 13:50:34 -04001127 except socket.gaierror:
1128 conn.close()
1129 raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
Joe Gregorioe7518002011-06-14 10:56:36 -04001130 except ssl_SSLError:
1131 conn.close()
1132 raise
Joe Gregorio845a5452010-09-08 13:50:34 -04001133 except socket.error, e:
Joe Gregoriof8434792011-05-03 09:11:49 -04001134 err = 0
1135 if hasattr(e, 'args'):
1136 err = getattr(e, 'args')[0]
1137 else:
1138 err = e.errno
1139 if err == errno.ECONNREFUSED: # Connection refused
Joe Gregorio845a5452010-09-08 13:50:34 -04001140 raise
1141 except httplib.HTTPException:
1142 # Just because the server closed the connection doesn't apparently mean
1143 # that the server didn't send a response.
Joe Gregoriof8434792011-05-03 09:11:49 -04001144 if conn.sock is None:
1145 if i == 0:
1146 conn.close()
1147 conn.connect()
1148 continue
1149 else:
1150 conn.close()
1151 raise
1152 if i == 0:
1153 conn.close()
1154 conn.connect()
1155 continue
Joe Gregorio845a5452010-09-08 13:50:34 -04001156 pass
1157 try:
1158 response = conn.getresponse()
1159 except (socket.error, httplib.HTTPException):
1160 if i == 0:
1161 conn.close()
1162 conn.connect()
1163 continue
1164 else:
1165 raise
1166 else:
1167 content = ""
1168 if method == "HEAD":
1169 response.close()
1170 else:
1171 content = response.read()
1172 response = Response(response)
1173 if method != "HEAD":
1174 content = _decompressContent(response, content)
1175 break
1176 return (response, content)
1177
1178
1179 def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):
1180 """Do the actual request using the connection object
1181 and also follow one level of redirects if necessary"""
1182
1183 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
1184 auth = auths and sorted(auths)[0][1] or None
1185 if auth:
1186 auth.request(method, request_uri, headers, body)
1187
1188 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1189
1190 if auth:
1191 if auth.response(response, body):
1192 auth.request(method, request_uri, headers, body)
1193 (response, content) = self._conn_request(conn, request_uri, method, body, headers )
1194 response._stale_digest = 1
1195
1196 if response.status == 401:
1197 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
1198 authorization.request(method, request_uri, headers, body)
1199 (response, content) = self._conn_request(conn, request_uri, method, body, headers, )
1200 if response.status != 401:
1201 self.authorizations.append(authorization)
1202 authorization.response(response, body)
1203 break
1204
1205 if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
1206 if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
1207 # Pick out the location header and basically start from the beginning
1208 # remembering first to strip the ETag header and decrement our 'depth'
1209 if redirections:
1210 if not response.has_key('location') and response.status != 300:
1211 raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
1212 # Fix-up relative redirects (which violate an RFC 2616 MUST)
1213 if response.has_key('location'):
1214 location = response['location']
1215 (scheme, authority, path, query, fragment) = parse_uri(location)
1216 if authority == None:
1217 response['location'] = urlparse.urljoin(absolute_uri, location)
1218 if response.status == 301 and method in ["GET", "HEAD"]:
1219 response['-x-permanent-redirect-url'] = response['location']
1220 if not response.has_key('content-location'):
1221 response['content-location'] = absolute_uri
1222 _updateCache(headers, response, content, self.cache, cachekey)
1223 if headers.has_key('if-none-match'):
1224 del headers['if-none-match']
1225 if headers.has_key('if-modified-since'):
1226 del headers['if-modified-since']
1227 if response.has_key('location'):
1228 location = response['location']
1229 old_response = copy.deepcopy(response)
1230 if not old_response.has_key('content-location'):
1231 old_response['content-location'] = absolute_uri
Joe Gregoriof8434792011-05-03 09:11:49 -04001232 redirect_method = method
Joe Gregorioe7518002011-06-14 10:56:36 -04001233 if response.status in [302, 303]:
Joe Gregoriof8434792011-05-03 09:11:49 -04001234 redirect_method = "GET"
Joe Gregorioe7518002011-06-14 10:56:36 -04001235 body = None
Joe Gregorio845a5452010-09-08 13:50:34 -04001236 (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
1237 response.previous = old_response
1238 else:
Joe Gregoriof8434792011-05-03 09:11:49 -04001239 raise RedirectLimit("Redirected more times than rediection_limit allows.", response, content)
Joe Gregorioe7518002011-06-14 10:56:36 -04001240 elif response.status in [200, 203] and method in ["GET", "HEAD"]:
Joe Gregorio845a5452010-09-08 13:50:34 -04001241 # Don't cache 206's since we aren't going to handle byte range requests
1242 if not response.has_key('content-location'):
1243 response['content-location'] = absolute_uri
1244 _updateCache(headers, response, content, self.cache, cachekey)
1245
1246 return (response, content)
1247
1248 def _normalize_headers(self, headers):
1249 return _normalize_headers(headers)
1250
1251# Need to catch and rebrand some exceptions
1252# Then need to optionally turn all exceptions into status codes
1253# including all socket.* and httplib.* exceptions.
1254
1255
1256 def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
1257 """ Performs a single HTTP request.
1258The 'uri' is the URI of the HTTP resource and can begin
1259with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
1260
1261The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
1262There is no restriction on the methods allowed.
1263
1264The 'body' is the entity body to be sent with the request. It is a string
1265object.
1266
1267Any extra headers that are to be sent with the request should be provided in the
1268'headers' dictionary.
1269
1270The maximum number of redirect to follow before raising an
1271exception is 'redirections. The default is 5.
1272
1273The return value is a tuple of (response, content), the first
1274being and instance of the 'Response' class, the second being
1275a string that contains the response entity body.
1276 """
1277 try:
1278 if headers is None:
1279 headers = {}
1280 else:
1281 headers = self._normalize_headers(headers)
1282
1283 if not headers.has_key('user-agent'):
Joe Gregorioe7518002011-06-14 10:56:36 -04001284 headers['user-agent'] = "Python-httplib2/%s (gzip)" % __version__
Joe Gregorio845a5452010-09-08 13:50:34 -04001285
1286 uri = iri2uri(uri)
1287
1288 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
1289 domain_port = authority.split(":")[0:2]
1290 if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':
1291 scheme = 'https'
1292 authority = domain_port[0]
1293
1294 conn_key = scheme+":"+authority
1295 if conn_key in self.connections:
1296 conn = self.connections[conn_key]
1297 else:
1298 if not connection_type:
Joe Gregorioe7518002011-06-14 10:56:36 -04001299 connection_type = SCHEME_TO_CONNECTION[scheme]
Joe Gregorio845a5452010-09-08 13:50:34 -04001300 certs = list(self.certificates.iter(authority))
Joe Gregorioe7518002011-06-14 10:56:36 -04001301 if issubclass(connection_type, HTTPSConnectionWithTimeout):
1302 if certs:
1303 conn = self.connections[conn_key] = connection_type(
1304 authority, key_file=certs[0][0],
1305 cert_file=certs[0][1], timeout=self.timeout,
1306 proxy_info=self.proxy_info,
1307 ca_certs=self.ca_certs,
1308 disable_ssl_certificate_validation=
1309 self.disable_ssl_certificate_validation)
1310 else:
1311 conn = self.connections[conn_key] = connection_type(
1312 authority, timeout=self.timeout,
1313 proxy_info=self.proxy_info,
1314 ca_certs=self.ca_certs,
1315 disable_ssl_certificate_validation=
1316 self.disable_ssl_certificate_validation)
Joe Gregorio845a5452010-09-08 13:50:34 -04001317 else:
Joe Gregorioe7518002011-06-14 10:56:36 -04001318 conn = self.connections[conn_key] = connection_type(
1319 authority, timeout=self.timeout,
1320 proxy_info=self.proxy_info)
Joe Gregorio845a5452010-09-08 13:50:34 -04001321 conn.set_debuglevel(debuglevel)
1322
Joe Gregoriof8434792011-05-03 09:11:49 -04001323 if 'range' not in headers and 'accept-encoding' not in headers:
Joe Gregorio845a5452010-09-08 13:50:34 -04001324 headers['accept-encoding'] = 'gzip, deflate'
1325
1326 info = email.Message.Message()
1327 cached_value = None
1328 if self.cache:
1329 cachekey = defrag_uri
1330 cached_value = self.cache.get(cachekey)
1331 if cached_value:
1332 # info = email.message_from_string(cached_value)
1333 #
1334 # Need to replace the line above with the kludge below
1335 # to fix the non-existent bug not fixed in this
1336 # bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html
1337 try:
1338 info, content = cached_value.split('\r\n\r\n', 1)
1339 feedparser = email.FeedParser.FeedParser()
1340 feedparser.feed(info)
1341 info = feedparser.close()
1342 feedparser._parse = None
1343 except IndexError:
1344 self.cache.delete(cachekey)
1345 cachekey = None
1346 cached_value = None
1347 else:
1348 cachekey = None
1349
1350 if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
1351 # http://www.w3.org/1999/04/Editing/
1352 headers['if-match'] = info['etag']
1353
1354 if method not in ["GET", "HEAD"] and self.cache and cachekey:
1355 # RFC 2616 Section 13.10
1356 self.cache.delete(cachekey)
1357
1358 # Check the vary header in the cache to see if this request
1359 # matches what varies in the cache.
1360 if method in ['GET', 'HEAD'] and 'vary' in info:
1361 vary = info['vary']
1362 vary_headers = vary.lower().replace(' ', '').split(',')
1363 for header in vary_headers:
1364 key = '-varied-%s' % header
1365 value = info[key]
1366 if headers.get(header, None) != value:
1367 cached_value = None
1368 break
1369
1370 if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
1371 if info.has_key('-x-permanent-redirect-url'):
1372 # Should cached permanent redirects be counted in our redirection count? For now, yes.
Joe Gregoriof8434792011-05-03 09:11:49 -04001373 if redirections <= 0:
1374 raise RedirectLimit("Redirected more times than rediection_limit allows.", {}, "")
Joe Gregorio845a5452010-09-08 13:50:34 -04001375 (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
1376 response.previous = Response(info)
1377 response.previous.fromcache = True
1378 else:
1379 # Determine our course of action:
1380 # Is the cached entry fresh or stale?
1381 # Has the client requested a non-cached response?
1382 #
1383 # There seems to be three possible answers:
1384 # 1. [FRESH] Return the cache entry w/o doing a GET
1385 # 2. [STALE] Do the GET (but add in cache validators if available)
1386 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
1387 entry_disposition = _entry_disposition(info, headers)
1388
1389 if entry_disposition == "FRESH":
1390 if not cached_value:
1391 info['status'] = '504'
1392 content = ""
1393 response = Response(info)
1394 if cached_value:
1395 response.fromcache = True
1396 return (response, content)
1397
1398 if entry_disposition == "STALE":
1399 if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
1400 headers['if-none-match'] = info['etag']
1401 if info.has_key('last-modified') and not 'last-modified' in headers:
1402 headers['if-modified-since'] = info['last-modified']
1403 elif entry_disposition == "TRANSPARENT":
1404 pass
1405
1406 (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1407
1408 if response.status == 304 and method == "GET":
1409 # Rewrite the cache entry with the new end-to-end headers
1410 # Take all headers that are in response
1411 # and overwrite their values in info.
1412 # unless they are hop-by-hop, or are listed in the connection header.
1413
1414 for key in _get_end2end_headers(response):
1415 info[key] = response[key]
1416 merged_response = Response(info)
1417 if hasattr(response, "_stale_digest"):
1418 merged_response._stale_digest = response._stale_digest
1419 _updateCache(headers, merged_response, content, self.cache, cachekey)
1420 response = merged_response
1421 response.status = 200
1422 response.fromcache = True
1423
1424 elif response.status == 200:
1425 content = new_content
1426 else:
1427 self.cache.delete(cachekey)
1428 content = new_content
1429 else:
1430 cc = _parse_cache_control(headers)
1431 if cc.has_key('only-if-cached'):
1432 info['status'] = '504'
1433 response = Response(info)
1434 content = ""
1435 else:
1436 (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1437 except Exception, e:
1438 if self.force_exception_to_status_code:
1439 if isinstance(e, HttpLib2ErrorWithResponse):
1440 response = e.response
1441 content = e.content
1442 response.status = 500
1443 response.reason = str(e)
1444 elif isinstance(e, socket.timeout):
1445 content = "Request Timeout"
1446 response = Response( {
1447 "content-type": "text/plain",
1448 "status": "408",
1449 "content-length": len(content)
1450 })
1451 response.reason = "Request Timeout"
1452 else:
1453 content = str(e)
1454 response = Response( {
1455 "content-type": "text/plain",
1456 "status": "400",
1457 "content-length": len(content)
1458 })
1459 response.reason = "Bad Request"
1460 else:
1461 raise
1462
1463
1464 return (response, content)
1465
1466
1467
1468class Response(dict):
1469 """An object more like email.Message than httplib.HTTPResponse."""
1470
1471 """Is this response from our local cache"""
1472 fromcache = False
1473
1474 """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """
1475 version = 11
1476
1477 "Status code returned by server. "
1478 status = 200
1479
1480 """Reason phrase returned by server."""
1481 reason = "Ok"
1482
1483 previous = None
1484
1485 def __init__(self, info):
1486 # info is either an email.Message or
1487 # an httplib.HTTPResponse object.
1488 if isinstance(info, httplib.HTTPResponse):
1489 for key, value in info.getheaders():
1490 self[key.lower()] = value
1491 self.status = info.status
1492 self['status'] = str(self.status)
1493 self.reason = info.reason
1494 self.version = info.version
1495 elif isinstance(info, email.Message.Message):
1496 for key, value in info.items():
1497 self[key] = value
1498 self.status = int(self['status'])
1499 else:
1500 for key, value in info.iteritems():
1501 self[key] = value
1502 self.status = int(self.get('status', self.status))
1503
1504
1505 def __getattr__(self, name):
1506 if name == 'dict':
1507 return self
1508 else:
1509 raise AttributeError, name