Blame - httplib2/__init__.py - platform/external/python/google-api-python-client

blob: 64f2e17e7b5520bf4c94eac581588d3909a3d680 [file] [log] [blame]

Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	1	from __future__ import generators
				2	"""
				3	httplib2
				4
				5	A caching http interface that supports ETags and gzip
				6	to conserve bandwidth.
				7
				8	Requires Python 2.3 or later
				9
				10	Changelog:
				11	2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
				12
				13	"""
				14
				15	__author__ = "Joe Gregorio (joe@bitworking.org)"
				16	__copyright__ = "Copyright 2006, Joe Gregorio"
				17	__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
				18	"James Antill",
				19	"Xavier Verges Farrero",
				20	"Jonathan Feinberg",
				21	"Blair Zajac",
				22	"Sam Ruby",
				23	"Louis Nyffenegger"]
				24	__license__ = "MIT"
				25	__version__ = "$Rev$"
				26
				27	import re
				28	import sys
				29	import email
				30	import email.Utils
				31	import email.Message
				32	import email.FeedParser
				33	import StringIO
				34	import gzip
				35	import zlib
				36	import httplib
				37	import urlparse
				38	import base64
				39	import os
				40	import copy
				41	import calendar
				42	import time
				43	import random
				44	import errno
				45	# remove depracated warning in python2.6
				46	try:
				47	from hashlib import sha1 as _sha, md5 as _md5
				48	except ImportError:
				49	import sha
				50	import md5
				51	_sha = sha.new
				52	_md5 = md5.new
				53	import hmac
				54	from gettext import gettext as _
				55	import socket
				56
				57	try:
Joe Gregorio	5e3a5fa	2010-10-11 13:03:56 -0400	[diff] [blame]	58	from httplib2 import socks
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	59	except ImportError:
Joe Gregorio	5e3a5fa	2010-10-11 13:03:56 -0400	[diff] [blame]	60	socks = None
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	61
				62	# Build the appropriate socket wrapper for ssl
				63	try:
				64	import ssl # python 2.6
				65	_ssl_wrap_socket = ssl.wrap_socket
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	66	except (AttributeError, ImportError):
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	67	def _ssl_wrap_socket(sock, key_file, cert_file):
				68	ssl_sock = socket.ssl(sock, key_file, cert_file)
				69	return httplib.FakeSocket(sock, ssl_sock)
				70
				71
				72	if sys.version_info >= (2,3):
				73	from iri2uri import iri2uri
				74	else:
				75	def iri2uri(uri):
				76	return uri
				77
				78	def has_timeout(timeout): # python 2.6
				79	if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):
				80	return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)
				81	return (timeout is not None)
				82
				83	__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
				84	'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
				85	'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
Joe Gregorio	5e3a5fa	2010-10-11 13:03:56 -0400	[diff] [blame]	86	'debuglevel', 'ProxiesUnavailableError']
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	87
				88
				89	# The httplib debug level, set to a non-zero value to get debug output
				90	debuglevel = 0
				91
				92
				93	# Python 2.3 support
				94	if sys.version_info < (2,4):
				95	def sorted(seq):
				96	seq.sort()
				97	return seq
				98
				99	# Python 2.3 support
				100	def HTTPResponse__getheaders(self):
				101	"""Return list of (header, value) tuples."""
				102	if self.msg is None:
				103	raise httplib.ResponseNotReady()
				104	return self.msg.items()
				105
				106	if not hasattr(httplib.HTTPResponse, 'getheaders'):
				107	httplib.HTTPResponse.getheaders = HTTPResponse__getheaders
				108
				109	# All exceptions raised here derive from HttpLib2Error
				110	class HttpLib2Error(Exception): pass
				111
				112	# Some exceptions can be caught and optionally
				113	# be turned back into responses.
				114	class HttpLib2ErrorWithResponse(HttpLib2Error):
				115	def __init__(self, desc, response, content):
				116	self.response = response
				117	self.content = content
				118	HttpLib2Error.__init__(self, desc)
				119
				120	class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
				121	class RedirectLimit(HttpLib2ErrorWithResponse): pass
				122	class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
				123	class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
				124	class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
				125
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	126	class MalformedHeader(HttpLib2Error): pass
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	127	class RelativeURIError(HttpLib2Error): pass
				128	class ServerNotFoundError(HttpLib2Error): pass
Joe Gregorio	5e3a5fa	2010-10-11 13:03:56 -0400	[diff] [blame]	129	class ProxiesUnavailableError(HttpLib2Error): pass
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	130
				131	# Open Items:
				132	# -----------
				133	# Proxy support
				134
				135	# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
				136
				137	# Pluggable cache storage (supports storing the cache in
				138	# flat files by default. We need a plug-in architecture
				139	# that can support Berkeley DB and Squid)
				140
				141	# == Known Issues ==
				142	# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
				143	# Does not handle Cache-Control: max-stale
				144	# Does not use Age: headers when calculating cache freshness.
				145
				146
				147	# The number of redirections to follow before giving up.
				148	# Note that only GET redirects are automatically followed.
				149	# Will also honor 301 requests by saving that info and never
				150	# requesting that URI again.
				151	DEFAULT_MAX_REDIRECTS = 5
				152
				153	# Which headers are hop-by-hop headers by default
				154	HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']
				155
				156	def _get_end2end_headers(response):
				157	hopbyhop = list(HOP_BY_HOP)
				158	hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])
				159	return [header for header in response.keys() if header not in hopbyhop]
				160
				161	URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]))?([^?#])(\?([^#]))?(#(.))?")
				162
				163	def parse_uri(uri):
				164	"""Parses a URI using the regex given in Appendix B of RFC 3986.
				165
				166	(scheme, authority, path, query, fragment) = parse_uri(uri)
				167	"""
				168	groups = URI.match(uri).groups()
				169	return (groups[1], groups[3], groups[4], groups[6], groups[8])
				170
				171	def urlnorm(uri):
				172	(scheme, authority, path, query, fragment) = parse_uri(uri)
				173	if not scheme or not authority:
				174	raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
				175	authority = authority.lower()
				176	scheme = scheme.lower()
				177	if not path:
				178	path = "/"
				179	# Could do syntax based normalization of the URI before
				180	# computing the digest. See Section 6.2.2 of Std 66.
				181	request_uri = query and "?".join([path, query]) or path
				182	scheme = scheme.lower()
				183	defrag_uri = scheme + "://" + authority + request_uri
				184	return scheme, authority, request_uri, defrag_uri
				185
				186
				187	# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
				188	re_url_scheme = re.compile(r'^\w+://')
				189	re_slash = re.compile(r'[?/:\|]+')
				190
				191	def safename(filename):
				192	"""Return a filename suitable for the cache.
				193
				194	Strips dangerous and common characters to create a filename we
				195	can use to store the cache in.
				196	"""
				197
				198	try:
				199	if re_url_scheme.match(filename):
				200	if isinstance(filename,str):
				201	filename = filename.decode('utf-8')
				202	filename = filename.encode('idna')
				203	else:
				204	filename = filename.encode('idna')
				205	except UnicodeError:
				206	pass
				207	if isinstance(filename,unicode):
				208	filename=filename.encode('utf-8')
				209	filemd5 = _md5(filename).hexdigest()
				210	filename = re_url_scheme.sub("", filename)
				211	filename = re_slash.sub(",", filename)
				212
				213	# limit length of filename
				214	if len(filename)>200:
				215	filename=filename[:200]
				216	return ",".join((filename, filemd5))
				217
				218	NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
				219	def _normalize_headers(headers):
				220	return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()])
				221
				222	def _parse_cache_control(headers):
				223	retval = {}
				224	if headers.has_key('cache-control'):
				225	parts = headers['cache-control'].split(',')
				226	parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]
				227	parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
				228	retval = dict(parts_with_args + parts_wo_args)
				229	return retval
				230
				231	# Whether to use a strict mode to parse WWW-Authenticate headers
				232	# Might lead to bad results in case of ill-formed header value,
				233	# so disabled by default, falling back to relaxed parsing.
				234	# Set to true to turn on, usefull for testing servers.
				235	USE_WWW_AUTH_STRICT_PARSING = 0
				236
				237	# In regex below:
				238	# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP
				239	# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]\|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
				240	# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
				241	# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]\|\\[\0-\x7f])*?(?=\")\|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
				242	WWW_AUTH_STRICT = re.compile(r"^(?:\s(?:,\s)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s=\s\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]\|\\[\0-\x7f])?(?=\")\|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.)$")
				243	WWW_AUTH_RELAXED = re.compile(r"^(?:\s(?:,\s)?([^ \t\r\n=]+)\s=\s\"?((?<=\")(?:[^\\\"]\|\\.)?(?=\")\|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.)$")
				244	UNQUOTE_PAIRS = re.compile(r'\\(.)')
				245	def _parse_www_authenticate(headers, headername='www-authenticate'):
				246	"""Returns a dictionary of dictionaries, one dict
				247	per auth_scheme."""
				248	retval = {}
				249	if headers.has_key(headername):
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	250	try:
				251	authenticate = headers[headername].strip()
				252	www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
				253	while authenticate:
				254	# Break off the scheme at the beginning of the line
				255	if headername == 'authentication-info':
				256	(auth_scheme, the_rest) = ('digest', authenticate)
				257	else:
				258	(auth_scheme, the_rest) = authenticate.split(" ", 1)
				259	# Now loop over all the key value pairs that come after the scheme,
				260	# being careful not to roll into the next scheme
				261	match = www_auth.search(the_rest)
				262	auth_params = {}
				263	while match:
				264	if match and len(match.groups()) == 3:
				265	(key, value, the_rest) = match.groups()
				266	auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
				267	match = www_auth.search(the_rest)
				268	retval[auth_scheme.lower()] = auth_params
				269	authenticate = the_rest.strip()
				270	except ValueError:
				271	raise MalformedHeader("WWW-Authenticate")
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	272	return retval
				273
				274
				275	def _entry_disposition(response_headers, request_headers):
				276	"""Determine freshness from the Date, Expires and Cache-Control headers.
				277
				278	We don't handle the following:
				279
				280	1. Cache-Control: max-stale
				281	2. Age: headers are not used in the calculations.
				282
				283	Not that this algorithm is simpler than you might think
				284	because we are operating as a private (non-shared) cache.
				285	This lets us ignore 's-maxage'. We can also ignore
				286	'proxy-invalidate' since we aren't a proxy.
				287	We will never return a stale document as
				288	fresh as a design decision, and thus the non-implementation
				289	of 'max-stale'. This also lets us safely ignore 'must-revalidate'
				290	since we operate as if every server has sent 'must-revalidate'.
				291	Since we are private we get to ignore both 'public' and
				292	'private' parameters. We also ignore 'no-transform' since
				293	we don't do any transformations.
				294	The 'no-store' parameter is handled at a higher level.
				295	So the only Cache-Control parameters we look at are:
				296
				297	no-cache
				298	only-if-cached
				299	max-age
				300	min-fresh
				301	"""
				302
				303	retval = "STALE"
				304	cc = _parse_cache_control(request_headers)
				305	cc_response = _parse_cache_control(response_headers)
				306
				307	if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
				308	retval = "TRANSPARENT"
				309	if 'cache-control' not in request_headers:
				310	request_headers['cache-control'] = 'no-cache'
				311	elif cc.has_key('no-cache'):
				312	retval = "TRANSPARENT"
				313	elif cc_response.has_key('no-cache'):
				314	retval = "STALE"
				315	elif cc.has_key('only-if-cached'):
				316	retval = "FRESH"
				317	elif response_headers.has_key('date'):
				318	date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))
				319	now = time.time()
				320	current_age = max(0, now - date)
				321	if cc_response.has_key('max-age'):
				322	try:
				323	freshness_lifetime = int(cc_response['max-age'])
				324	except ValueError:
				325	freshness_lifetime = 0
				326	elif response_headers.has_key('expires'):
				327	expires = email.Utils.parsedate_tz(response_headers['expires'])
				328	if None == expires:
				329	freshness_lifetime = 0
				330	else:
				331	freshness_lifetime = max(0, calendar.timegm(expires) - date)
				332	else:
				333	freshness_lifetime = 0
				334	if cc.has_key('max-age'):
				335	try:
				336	freshness_lifetime = int(cc['max-age'])
				337	except ValueError:
				338	freshness_lifetime = 0
				339	if cc.has_key('min-fresh'):
				340	try:
				341	min_fresh = int(cc['min-fresh'])
				342	except ValueError:
				343	min_fresh = 0
				344	current_age += min_fresh
				345	if freshness_lifetime > current_age:
				346	retval = "FRESH"
				347	return retval
				348
				349	def _decompressContent(response, new_content):
				350	content = new_content
				351	try:
				352	encoding = response.get('content-encoding', None)
				353	if encoding in ['gzip', 'deflate']:
				354	if encoding == 'gzip':
				355	content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
				356	if encoding == 'deflate':
				357	content = zlib.decompress(content)
				358	response['content-length'] = str(len(content))
				359	# Record the historical presence of the encoding in a way the won't interfere.
				360	response['-content-encoding'] = response['content-encoding']
				361	del response['content-encoding']
				362	except IOError:
				363	content = ""
				364	raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
				365	return content
				366
				367	def _updateCache(request_headers, response_headers, content, cache, cachekey):
				368	if cachekey:
				369	cc = _parse_cache_control(request_headers)
				370	cc_response = _parse_cache_control(response_headers)
				371	if cc.has_key('no-store') or cc_response.has_key('no-store'):
				372	cache.delete(cachekey)
				373	else:
				374	info = email.Message.Message()
				375	for key, value in response_headers.iteritems():
				376	if key not in ['status','content-encoding','transfer-encoding']:
				377	info[key] = value
				378
				379	# Add annotations to the cache to indicate what headers
				380	# are variant for this request.
				381	vary = response_headers.get('vary', None)
				382	if vary:
				383	vary_headers = vary.lower().replace(' ', '').split(',')
				384	for header in vary_headers:
				385	key = '-varied-%s' % header
				386	try:
				387	info[key] = request_headers[header]
				388	except KeyError:
				389	pass
				390
				391	status = response_headers.status
				392	if status == 304:
				393	status = 200
				394
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	395	status_header = 'status: %d\r\n' % status
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	396
				397	header_str = info.as_string()
				398
				399	header_str = re.sub("\r(?!\n)\|(?<!\r)\n", "\r\n", header_str)
				400	text = "".join([status_header, header_str, content])
				401
				402	cache.set(cachekey, text)
				403
				404	def _cnonce():
				405	dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
				406	return dig[:16]
				407
				408	def _wsse_username_token(cnonce, iso_now, password):
				409	return base64.b64encode(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
				410
				411
				412	# For credentials we need two things, first
				413	# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
				414	# Then we also need a list of URIs that have already demanded authentication
				415	# That list is tricky since sub-URIs can take the same auth, or the
				416	# auth scheme may change as you descend the tree.
				417	# So we also need each Auth instance to be able to tell us
				418	# how close to the 'top' it is.
				419
				420	class Authentication(object):
				421	def __init__(self, credentials, host, request_uri, headers, response, content, http):
				422	(scheme, authority, path, query, fragment) = parse_uri(request_uri)
				423	self.path = path
				424	self.host = host
				425	self.credentials = credentials
				426	self.http = http
				427
				428	def depth(self, request_uri):
				429	(scheme, authority, path, query, fragment) = parse_uri(request_uri)
				430	return request_uri[len(self.path):].count("/")
				431
				432	def inscope(self, host, request_uri):
				433	# XXX Should we normalize the request_uri?
				434	(scheme, authority, path, query, fragment) = parse_uri(request_uri)
				435	return (host == self.host) and path.startswith(self.path)
				436
				437	def request(self, method, request_uri, headers, content):
				438	"""Modify the request headers to add the appropriate
				439	Authorization header. Over-rise this in sub-classes."""
				440	pass
				441
				442	def response(self, response, content):
				443	"""Gives us a chance to update with new nonces
				444	or such returned from the last authorized response.
				445	Over-rise this in sub-classes if necessary.
				446
				447	Return TRUE is the request is to be retried, for
				448	example Digest may return stale=true.
				449	"""
				450	return False
				451
				452
				453
				454	class BasicAuthentication(Authentication):
				455	def __init__(self, credentials, host, request_uri, headers, response, content, http):
				456	Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
				457
				458	def request(self, method, request_uri, headers, content):
				459	"""Modify the request headers to add the appropriate
				460	Authorization header."""
				461	headers['authorization'] = 'Basic ' + base64.b64encode("%s:%s" % self.credentials).strip()
				462
				463
				464	class DigestAuthentication(Authentication):
				465	"""Only do qop='auth' and MD5, since that
				466	is all Apache currently implements"""
				467	def __init__(self, credentials, host, request_uri, headers, response, content, http):
				468	Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
				469	challenge = _parse_www_authenticate(response, 'www-authenticate')
				470	self.challenge = challenge['digest']
				471	qop = self.challenge.get('qop', 'auth')
				472	self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
				473	if self.challenge['qop'] is None:
				474	raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
				475	self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()
				476	if self.challenge['algorithm'] != 'MD5':
				477	raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
				478	self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
				479	self.challenge['nc'] = 1
				480
				481	def request(self, method, request_uri, headers, content, cnonce = None):
				482	"""Modify the request headers"""
				483	H = lambda x: _md5(x).hexdigest()
				484	KD = lambda s, d: H("%s:%s" % (s, d))
				485	A2 = "".join([method, ":", request_uri])
				486	self.challenge['cnonce'] = cnonce or _cnonce()
				487	request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'],
				488	'%08x' % self.challenge['nc'],
				489	self.challenge['cnonce'],
				490	self.challenge['qop'], H(A2)
				491	))
				492	headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
				493	self.credentials[0],
				494	self.challenge['realm'],
				495	self.challenge['nonce'],
				496	request_uri,
				497	self.challenge['algorithm'],
				498	request_digest,
				499	self.challenge['qop'],
				500	self.challenge['nc'],
				501	self.challenge['cnonce'],
				502	)
				503	self.challenge['nc'] += 1
				504
				505	def response(self, response, content):
				506	if not response.has_key('authentication-info'):
				507	challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
				508	if 'true' == challenge.get('stale'):
				509	self.challenge['nonce'] = challenge['nonce']
				510	self.challenge['nc'] = 1
				511	return True
				512	else:
				513	updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
				514
				515	if updated_challenge.has_key('nextnonce'):
				516	self.challenge['nonce'] = updated_challenge['nextnonce']
				517	self.challenge['nc'] = 1
				518	return False
				519
				520
				521	class HmacDigestAuthentication(Authentication):
				522	"""Adapted from Robert Sayre's code and DigestAuthentication above."""
				523	__author__ = "Thomas Broyer (t.broyer@ltgt.net)"
				524
				525	def __init__(self, credentials, host, request_uri, headers, response, content, http):
				526	Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
				527	challenge = _parse_www_authenticate(response, 'www-authenticate')
				528	self.challenge = challenge['hmacdigest']
				529	# TODO: self.challenge['domain']
				530	self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
				531	if self.challenge['reason'] not in ['unauthorized', 'integrity']:
				532	self.challenge['reason'] = 'unauthorized'
				533	self.challenge['salt'] = self.challenge.get('salt', '')
				534	if not self.challenge.get('snonce'):
				535	raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))
				536	self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')
				537	if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
				538	raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
				539	self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')
				540	if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
				541	raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
				542	if self.challenge['algorithm'] == 'HMAC-MD5':
				543	self.hashmod = _md5
				544	else:
				545	self.hashmod = _sha
				546	if self.challenge['pw-algorithm'] == 'MD5':
				547	self.pwhashmod = _md5
				548	else:
				549	self.pwhashmod = _sha
				550	self.key = "".join([self.credentials[0], ":",
				551	self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
				552	":", self.challenge['realm']
				553	])
				554	self.key = self.pwhashmod.new(self.key).hexdigest().lower()
				555
				556	def request(self, method, request_uri, headers, content):
				557	"""Modify the request headers"""
				558	keys = _get_end2end_headers(headers)
				559	keylist = "".join(["%s " % k for k in keys])
				560	headers_val = "".join([headers[k] for k in keys])
				561	created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
				562	cnonce = _cnonce()
				563	request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
				564	request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
				565	headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
				566	self.credentials[0],
				567	self.challenge['realm'],
				568	self.challenge['snonce'],
				569	cnonce,
				570	request_uri,
				571	created,
				572	request_digest,
				573	keylist,
				574	)
				575
				576	def response(self, response, content):
				577	challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})
				578	if challenge.get('reason') in ['integrity', 'stale']:
				579	return True
				580	return False
				581
				582
				583	class WsseAuthentication(Authentication):
				584	"""This is thinly tested and should not be relied upon.
				585	At this time there isn't any third party server to test against.
				586	Blogger and TypePad implemented this algorithm at one point
				587	but Blogger has since switched to Basic over HTTPS and
				588	TypePad has implemented it wrong, by never issuing a 401
				589	challenge but instead requiring your client to telepathically know that
				590	their endpoint is expecting WSSE profile="UsernameToken"."""
				591	def __init__(self, credentials, host, request_uri, headers, response, content, http):
				592	Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
				593
				594	def request(self, method, request_uri, headers, content):
				595	"""Modify the request headers to add the appropriate
				596	Authorization header."""
				597	headers['Authorization'] = 'WSSE profile="UsernameToken"'
				598	iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
				599	cnonce = _cnonce()
				600	password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
				601	headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
				602	self.credentials[0],
				603	password_digest,
				604	cnonce,
				605	iso_now)
				606
				607	class GoogleLoginAuthentication(Authentication):
				608	def __init__(self, credentials, host, request_uri, headers, response, content, http):
				609	from urllib import urlencode
				610	Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
				611	challenge = _parse_www_authenticate(response, 'www-authenticate')
				612	service = challenge['googlelogin'].get('service', 'xapi')
				613	# Bloggger actually returns the service in the challenge
				614	# For the rest we guess based on the URI
				615	if service == 'xapi' and request_uri.find("calendar") > 0:
				616	service = "cl"
				617	# No point in guessing Base or Spreadsheet
				618	#elif request_uri.find("spreadsheets") > 0:
				619	# service = "wise"
				620
				621	auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
				622	resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
				623	lines = content.split('\n')
				624	d = dict([tuple(line.split("=", 1)) for line in lines if line])
				625	if resp.status == 403:
				626	self.Auth = ""
				627	else:
				628	self.Auth = d['Auth']
				629
				630	def request(self, method, request_uri, headers, content):
				631	"""Modify the request headers to add the appropriate
				632	Authorization header."""
				633	headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
				634
				635
				636	AUTH_SCHEME_CLASSES = {
				637	"basic": BasicAuthentication,
				638	"wsse": WsseAuthentication,
				639	"digest": DigestAuthentication,
				640	"hmacdigest": HmacDigestAuthentication,
				641	"googlelogin": GoogleLoginAuthentication
				642	}
				643
				644	AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
				645
				646	class FileCache(object):
				647	"""Uses a local directory as a store for cached files.
				648	Not really safe to use if multiple threads or processes are going to
				649	be running on the same cache.
				650	"""
				651	def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
				652	self.cache = cache
				653	self.safe = safe
				654	if not os.path.exists(cache):
				655	os.makedirs(self.cache)
				656
				657	def get(self, key):
				658	retval = None
				659	cacheFullPath = os.path.join(self.cache, self.safe(key))
				660	try:
				661	f = file(cacheFullPath, "rb")
				662	retval = f.read()
				663	f.close()
				664	except IOError:
				665	pass
				666	return retval
				667
				668	def set(self, key, value):
				669	cacheFullPath = os.path.join(self.cache, self.safe(key))
				670	f = file(cacheFullPath, "wb")
				671	f.write(value)
				672	f.close()
				673
				674	def delete(self, key):
				675	cacheFullPath = os.path.join(self.cache, self.safe(key))
				676	if os.path.exists(cacheFullPath):
				677	os.remove(cacheFullPath)
				678
				679	class Credentials(object):
				680	def __init__(self):
				681	self.credentials = []
				682
				683	def add(self, name, password, domain=""):
				684	self.credentials.append((domain.lower(), name, password))
				685
				686	def clear(self):
				687	self.credentials = []
				688
				689	def iter(self, domain):
				690	for (cdomain, name, password) in self.credentials:
				691	if cdomain == "" or domain == cdomain:
				692	yield (name, password)
				693
				694	class KeyCerts(Credentials):
				695	"""Identical to Credentials except that
				696	name/password are mapped to key/cert."""
				697	pass
				698
				699
				700	class ProxyInfo(object):
				701	"""Collect information required to use a proxy."""
				702	def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
				703	"""The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
				704	constants. For example:
				705
				706	p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)
				707	"""
				708	self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
				709
				710	def astuple(self):
				711	return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
				712	self.proxy_user, self.proxy_pass)
				713
				714	def isgood(self):
				715	return (self.proxy_host != None) and (self.proxy_port != None)
				716
				717
				718	class HTTPConnectionWithTimeout(httplib.HTTPConnection):
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	719	"""
				720	HTTPConnection subclass that supports timeouts
				721
				722	All timeouts are in seconds. If None is passed for timeout then
				723	Python's default timeout for sockets will be used. See for example
				724	the docs of socket.setdefaulttimeout():
				725	http://docs.python.org/library/socket.html#socket.setdefaulttimeout
				726	"""
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	727
				728	def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
				729	httplib.HTTPConnection.__init__(self, host, port, strict)
				730	self.timeout = timeout
				731	self.proxy_info = proxy_info
				732
				733	def connect(self):
				734	"""Connect to the host and port specified in __init__."""
				735	# Mostly verbatim from httplib.py.
Joe Gregorio	5e3a5fa	2010-10-11 13:03:56 -0400	[diff] [blame]	736	if self.proxy_info and socks is None:
				737	raise ProxiesUnavailableError(
				738	'Proxy support missing but proxy use was requested!')
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	739	msg = "getaddrinfo returns an empty list"
				740	for res in socket.getaddrinfo(self.host, self.port, 0,
				741	socket.SOCK_STREAM):
				742	af, socktype, proto, canonname, sa = res
				743	try:
				744	if self.proxy_info and self.proxy_info.isgood():
				745	self.sock = socks.socksocket(af, socktype, proto)
				746	self.sock.setproxy(*self.proxy_info.astuple())
				747	else:
				748	self.sock = socket.socket(af, socktype, proto)
				749	self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
				750	# Different from httplib: support timeouts.
				751	if has_timeout(self.timeout):
				752	self.sock.settimeout(self.timeout)
				753	# End of difference from httplib.
				754	if self.debuglevel > 0:
				755	print "connect: (%s, %s)" % (self.host, self.port)
				756
				757	self.sock.connect(sa)
				758	except socket.error, msg:
				759	if self.debuglevel > 0:
				760	print 'connect fail:', (self.host, self.port)
				761	if self.sock:
				762	self.sock.close()
				763	self.sock = None
				764	continue
				765	break
				766	if not self.sock:
				767	raise socket.error, msg
				768
				769	class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	770	"""
				771	This class allows communication via SSL.
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	772
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	773	All timeouts are in seconds. If None is passed for timeout then
				774	Python's default timeout for sockets will be used. See for example
				775	the docs of socket.setdefaulttimeout():
				776	http://docs.python.org/library/socket.html#socket.setdefaulttimeout
				777	"""
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	778	def __init__(self, host, port=None, key_file=None, cert_file=None,
				779	strict=None, timeout=None, proxy_info=None):
				780	httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,
				781	cert_file=cert_file, strict=strict)
				782	self.timeout = timeout
				783	self.proxy_info = proxy_info
				784
				785	def connect(self):
				786	"Connect to a host on a given (SSL) port."
				787
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	788	msg = "getaddrinfo returns an empty list"
				789	for family, socktype, proto, canonname, sockaddr in socket.getaddrinfo(
				790	self.host, self.port, 0, socket.SOCK_STREAM):
				791	try:
				792	if self.proxy_info and self.proxy_info.isgood():
				793	sock = socks.socksocket(family, socktype, proto)
				794	sock.setproxy(*self.proxy_info.astuple())
				795	else:
				796	sock = socket.socket(family, socktype, proto)
				797	sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	798
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	799	if has_timeout(self.timeout):
				800	sock.settimeout(self.timeout)
				801	sock.connect((self.host, self.port))
				802	self.sock =_ssl_wrap_socket(sock, self.key_file, self.cert_file)
				803	if self.debuglevel > 0:
				804	print "connect: (%s, %s)" % (self.host, self.port)
				805	except socket.error, msg:
				806	if self.debuglevel > 0:
				807	print 'connect fail:', (self.host, self.port)
				808	if self.sock:
				809	self.sock.close()
				810	self.sock = None
				811	continue
				812	break
				813	if not self.sock:
				814	raise socket.error, msg
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	815
				816
				817
				818	class Http(object):
				819	"""An HTTP client that handles:
				820	- all methods
				821	- caching
				822	- ETags
				823	- compression,
				824	- HTTPS
				825	- Basic
				826	- Digest
				827	- WSSE
				828
				829	and more.
				830	"""
				831	def __init__(self, cache=None, timeout=None, proxy_info=None):
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	832	"""
				833	The value of proxy_info is a ProxyInfo instance.
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	834
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	835	If 'cache' is a string then it is used as a directory name for
				836	a disk cache. Otherwise it must be an object that supports the
				837	same interface as FileCache.
				838
				839	All timeouts are in seconds. If None is passed for timeout
				840	then Python's default timeout for sockets will be used. See
				841	for example the docs of socket.setdefaulttimeout():
				842	http://docs.python.org/library/socket.html#socket.setdefaulttimeout
				843	"""
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	844	self.proxy_info = proxy_info
				845	# Map domain name to an httplib connection
				846	self.connections = {}
				847	# The location of the cache, for now a directory
				848	# where cached responses are held.
				849	if cache and isinstance(cache, str):
				850	self.cache = FileCache(cache)
				851	else:
				852	self.cache = cache
				853
				854	# Name/password
				855	self.credentials = Credentials()
				856
				857	# Key/cert
				858	self.certificates = KeyCerts()
				859
				860	# authorization objects
				861	self.authorizations = []
				862
				863	# If set to False then no redirects are followed, even safe ones.
				864	self.follow_redirects = True
				865
				866	# Which HTTP methods do we apply optimistic concurrency to, i.e.
				867	# which methods get an "if-match:" etag header added to them.
				868	self.optimistic_concurrency_methods = ["PUT"]
				869
				870	# If 'follow_redirects' is True, and this is set to True then
				871	# all redirecs are followed, including unsafe ones.
				872	self.follow_all_redirects = False
				873
				874	self.ignore_etag = False
				875
				876	self.force_exception_to_status_code = False
				877
				878	self.timeout = timeout
				879
				880	def _auth_from_challenge(self, host, request_uri, headers, response, content):
				881	"""A generator that creates Authorization objects
				882	that can be applied to requests.
				883	"""
				884	challenges = _parse_www_authenticate(response, 'www-authenticate')
				885	for cred in self.credentials.iter(host):
				886	for scheme in AUTH_SCHEME_ORDER:
				887	if challenges.has_key(scheme):
				888	yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
				889
				890	def add_credentials(self, name, password, domain=""):
				891	"""Add a name and password that will be used
				892	any time a request requires authentication."""
				893	self.credentials.add(name, password, domain)
				894
				895	def add_certificate(self, key, cert, domain):
				896	"""Add a key and cert that will be used
				897	any time a request requires authentication."""
				898	self.certificates.add(key, cert, domain)
				899
				900	def clear_credentials(self):
				901	"""Remove all the names and passwords
				902	that are used for authentication"""
				903	self.credentials.clear()
				904	self.authorizations = []
				905
				906	def _conn_request(self, conn, request_uri, method, body, headers):
				907	for i in range(2):
				908	try:
				909	conn.request(method, request_uri, body, headers)
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	910	except socket.timeout:
				911	raise
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	912	except socket.gaierror:
				913	conn.close()
				914	raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
				915	except socket.error, e:
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	916	err = 0
				917	if hasattr(e, 'args'):
				918	err = getattr(e, 'args')[0]
				919	else:
				920	err = e.errno
				921	if err == errno.ECONNREFUSED: # Connection refused
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	922	raise
				923	except httplib.HTTPException:
				924	# Just because the server closed the connection doesn't apparently mean
				925	# that the server didn't send a response.
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	926	if conn.sock is None:
				927	if i == 0:
				928	conn.close()
				929	conn.connect()
				930	continue
				931	else:
				932	conn.close()
				933	raise
				934	if i == 0:
				935	conn.close()
				936	conn.connect()
				937	continue
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	938	pass
				939	try:
				940	response = conn.getresponse()
				941	except (socket.error, httplib.HTTPException):
				942	if i == 0:
				943	conn.close()
				944	conn.connect()
				945	continue
				946	else:
				947	raise
				948	else:
				949	content = ""
				950	if method == "HEAD":
				951	response.close()
				952	else:
				953	content = response.read()
				954	response = Response(response)
				955	if method != "HEAD":
				956	content = _decompressContent(response, content)
				957	break
				958	return (response, content)
				959
				960
				961	def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):
				962	"""Do the actual request using the connection object
				963	and also follow one level of redirects if necessary"""
				964
				965	auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
				966	auth = auths and sorted(auths)[0][1] or None
				967	if auth:
				968	auth.request(method, request_uri, headers, body)
				969
				970	(response, content) = self._conn_request(conn, request_uri, method, body, headers)
				971
				972	if auth:
				973	if auth.response(response, body):
				974	auth.request(method, request_uri, headers, body)
				975	(response, content) = self._conn_request(conn, request_uri, method, body, headers )
				976	response._stale_digest = 1
				977
				978	if response.status == 401:
				979	for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
				980	authorization.request(method, request_uri, headers, body)
				981	(response, content) = self._conn_request(conn, request_uri, method, body, headers, )
				982	if response.status != 401:
				983	self.authorizations.append(authorization)
				984	authorization.response(response, body)
				985	break
				986
				987	if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
				988	if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
				989	# Pick out the location header and basically start from the beginning
				990	# remembering first to strip the ETag header and decrement our 'depth'
				991	if redirections:
				992	if not response.has_key('location') and response.status != 300:
				993	raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
				994	# Fix-up relative redirects (which violate an RFC 2616 MUST)
				995	if response.has_key('location'):
				996	location = response['location']
				997	(scheme, authority, path, query, fragment) = parse_uri(location)
				998	if authority == None:
				999	response['location'] = urlparse.urljoin(absolute_uri, location)
				1000	if response.status == 301 and method in ["GET", "HEAD"]:
				1001	response['-x-permanent-redirect-url'] = response['location']
				1002	if not response.has_key('content-location'):
				1003	response['content-location'] = absolute_uri
				1004	_updateCache(headers, response, content, self.cache, cachekey)
				1005	if headers.has_key('if-none-match'):
				1006	del headers['if-none-match']
				1007	if headers.has_key('if-modified-since'):
				1008	del headers['if-modified-since']
				1009	if response.has_key('location'):
				1010	location = response['location']
				1011	old_response = copy.deepcopy(response)
				1012	if not old_response.has_key('content-location'):
				1013	old_response['content-location'] = absolute_uri
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	1014	redirect_method = method
				1015	if response.status == 303:
				1016	redirect_method = "GET"
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	1017	(response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
				1018	response.previous = old_response
				1019	else:
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	1020	raise RedirectLimit("Redirected more times than rediection_limit allows.", response, content)
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	1021	elif response.status in [200, 203] and method == "GET":
				1022	# Don't cache 206's since we aren't going to handle byte range requests
				1023	if not response.has_key('content-location'):
				1024	response['content-location'] = absolute_uri
				1025	_updateCache(headers, response, content, self.cache, cachekey)
				1026
				1027	return (response, content)
				1028
				1029	def _normalize_headers(self, headers):
				1030	return _normalize_headers(headers)
				1031
				1032	# Need to catch and rebrand some exceptions
				1033	# Then need to optionally turn all exceptions into status codes
				1034	# including all socket.* and httplib.* exceptions.
				1035
				1036
				1037	def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
				1038	""" Performs a single HTTP request.
				1039	The 'uri' is the URI of the HTTP resource and can begin
				1040	with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
				1041
				1042	The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
				1043	There is no restriction on the methods allowed.
				1044
				1045	The 'body' is the entity body to be sent with the request. It is a string
				1046	object.
				1047
				1048	Any extra headers that are to be sent with the request should be provided in the
				1049	'headers' dictionary.
				1050
				1051	The maximum number of redirect to follow before raising an
				1052	exception is 'redirections. The default is 5.
				1053
				1054	The return value is a tuple of (response, content), the first
				1055	being and instance of the 'Response' class, the second being
				1056	a string that contains the response entity body.
				1057	"""
				1058	try:
				1059	if headers is None:
				1060	headers = {}
				1061	else:
				1062	headers = self._normalize_headers(headers)
				1063
				1064	if not headers.has_key('user-agent'):
				1065	headers['user-agent'] = "Python-httplib2/%s" % __version__
				1066
				1067	uri = iri2uri(uri)
				1068
				1069	(scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
				1070	domain_port = authority.split(":")[0:2]
				1071	if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':
				1072	scheme = 'https'
				1073	authority = domain_port[0]
				1074
				1075	conn_key = scheme+":"+authority
				1076	if conn_key in self.connections:
				1077	conn = self.connections[conn_key]
				1078	else:
				1079	if not connection_type:
				1080	connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout
				1081	certs = list(self.certificates.iter(authority))
				1082	if scheme == 'https' and certs:
				1083	conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0],
				1084	cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info)
				1085	else:
				1086	conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info)
				1087	conn.set_debuglevel(debuglevel)
				1088
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	1089	if 'range' not in headers and 'accept-encoding' not in headers:
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	1090	headers['accept-encoding'] = 'gzip, deflate'
				1091
				1092	info = email.Message.Message()
				1093	cached_value = None
				1094	if self.cache:
				1095	cachekey = defrag_uri
				1096	cached_value = self.cache.get(cachekey)
				1097	if cached_value:
				1098	# info = email.message_from_string(cached_value)
				1099	#
				1100	# Need to replace the line above with the kludge below
				1101	# to fix the non-existent bug not fixed in this
				1102	# bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html
				1103	try:
				1104	info, content = cached_value.split('\r\n\r\n', 1)
				1105	feedparser = email.FeedParser.FeedParser()
				1106	feedparser.feed(info)
				1107	info = feedparser.close()
				1108	feedparser._parse = None
				1109	except IndexError:
				1110	self.cache.delete(cachekey)
				1111	cachekey = None
				1112	cached_value = None
				1113	else:
				1114	cachekey = None
				1115
				1116	if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
				1117	# http://www.w3.org/1999/04/Editing/
				1118	headers['if-match'] = info['etag']
				1119
				1120	if method not in ["GET", "HEAD"] and self.cache and cachekey:
				1121	# RFC 2616 Section 13.10
				1122	self.cache.delete(cachekey)
				1123
				1124	# Check the vary header in the cache to see if this request
				1125	# matches what varies in the cache.
				1126	if method in ['GET', 'HEAD'] and 'vary' in info:
				1127	vary = info['vary']
				1128	vary_headers = vary.lower().replace(' ', '').split(',')
				1129	for header in vary_headers:
				1130	key = '-varied-%s' % header
				1131	value = info[key]
				1132	if headers.get(header, None) != value:
				1133	cached_value = None
				1134	break
				1135
				1136	if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
				1137	if info.has_key('-x-permanent-redirect-url'):
				1138	# Should cached permanent redirects be counted in our redirection count? For now, yes.
Joe Gregorio	f843479	2011-05-03 09:11:49 -0400	[diff] [blame^]	1139	if redirections <= 0:
				1140	raise RedirectLimit("Redirected more times than rediection_limit allows.", {}, "")
Joe Gregorio	845a545	2010-09-08 13:50:34 -0400	[diff] [blame]	1141	(response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
				1142	response.previous = Response(info)
				1143	response.previous.fromcache = True
				1144	else:
				1145	# Determine our course of action:
				1146	# Is the cached entry fresh or stale?
				1147	# Has the client requested a non-cached response?
				1148	#
				1149	# There seems to be three possible answers:
				1150	# 1. [FRESH] Return the cache entry w/o doing a GET
				1151	# 2. [STALE] Do the GET (but add in cache validators if available)
				1152	# 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
				1153	entry_disposition = _entry_disposition(info, headers)
				1154
				1155	if entry_disposition == "FRESH":
				1156	if not cached_value:
				1157	info['status'] = '504'
				1158	content = ""
				1159	response = Response(info)
				1160	if cached_value:
				1161	response.fromcache = True
				1162	return (response, content)
				1163
				1164	if entry_disposition == "STALE":
				1165	if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
				1166	headers['if-none-match'] = info['etag']
				1167	if info.has_key('last-modified') and not 'last-modified' in headers:
				1168	headers['if-modified-since'] = info['last-modified']
				1169	elif entry_disposition == "TRANSPARENT":
				1170	pass
				1171
				1172	(response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
				1173
				1174	if response.status == 304 and method == "GET":
				1175	# Rewrite the cache entry with the new end-to-end headers
				1176	# Take all headers that are in response
				1177	# and overwrite their values in info.
				1178	# unless they are hop-by-hop, or are listed in the connection header.
				1179
				1180	for key in _get_end2end_headers(response):
				1181	info[key] = response[key]
				1182	merged_response = Response(info)
				1183	if hasattr(response, "_stale_digest"):
				1184	merged_response._stale_digest = response._stale_digest
				1185	_updateCache(headers, merged_response, content, self.cache, cachekey)
				1186	response = merged_response
				1187	response.status = 200
				1188	response.fromcache = True
				1189
				1190	elif response.status == 200:
				1191	content = new_content
				1192	else:
				1193	self.cache.delete(cachekey)
				1194	content = new_content
				1195	else:
				1196	cc = _parse_cache_control(headers)
				1197	if cc.has_key('only-if-cached'):
				1198	info['status'] = '504'
				1199	response = Response(info)
				1200	content = ""
				1201	else:
				1202	(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
				1203	except Exception, e:
				1204	if self.force_exception_to_status_code:
				1205	if isinstance(e, HttpLib2ErrorWithResponse):
				1206	response = e.response
				1207	content = e.content
				1208	response.status = 500
				1209	response.reason = str(e)
				1210	elif isinstance(e, socket.timeout):
				1211	content = "Request Timeout"
				1212	response = Response( {
				1213	"content-type": "text/plain",
				1214	"status": "408",
				1215	"content-length": len(content)
				1216	})
				1217	response.reason = "Request Timeout"
				1218	else:
				1219	content = str(e)
				1220	response = Response( {
				1221	"content-type": "text/plain",
				1222	"status": "400",
				1223	"content-length": len(content)
				1224	})
				1225	response.reason = "Bad Request"
				1226	else:
				1227	raise
				1228
				1229
				1230	return (response, content)
				1231
				1232
				1233
				1234	class Response(dict):
				1235	"""An object more like email.Message than httplib.HTTPResponse."""
				1236
				1237	"""Is this response from our local cache"""
				1238	fromcache = False
				1239
				1240	"""HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """
				1241	version = 11
				1242
				1243	"Status code returned by server. "
				1244	status = 200
				1245
				1246	"""Reason phrase returned by server."""
				1247	reason = "Ok"
				1248
				1249	previous = None
				1250
				1251	def __init__(self, info):
				1252	# info is either an email.Message or
				1253	# an httplib.HTTPResponse object.
				1254	if isinstance(info, httplib.HTTPResponse):
				1255	for key, value in info.getheaders():
				1256	self[key.lower()] = value
				1257	self.status = info.status
				1258	self['status'] = str(self.status)
				1259	self.reason = info.reason
				1260	self.version = info.version
				1261	elif isinstance(info, email.Message.Message):
				1262	for key, value in info.items():
				1263	self[key] = value
				1264	self.status = int(self['status'])
				1265	else:
				1266	for key, value in info.iteritems():
				1267	self[key] = value
				1268	self.status = int(self.get('status', self.status))
				1269
				1270
				1271	def __getattr__(self, name):
				1272	if name == 'dict':
				1273	return self
				1274	else:
				1275	raise AttributeError, name