blob: 6e5973b1632c607f346e813e343a2ab5c631f0f1 [file] [log] [blame]
jcgregorioa46fe4e2006-11-16 04:13:45 +00001from __future__ import generators
jcgregorio2d66d4f2006-02-07 05:34:14 +00002"""
3httplib2
4
5A caching http interface that supports ETags and gzip
6to conserve bandwidth.
7
jcgregorio8421f272006-02-14 18:19:51 +00008Requires Python 2.3 or later
9
jcgregorio2d66d4f2006-02-07 05:34:14 +000010"""
11
12__author__ = "Joe Gregorio (joe@bitworking.org)"
13__copyright__ = "Copyright 2006, Joe Gregorio"
jcgregorioa0713ab2006-07-01 05:21:34 +000014__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
15 "James Antill",
jcgregorio92088922006-07-01 05:53:21 +000016 "Xavier Verges Farrero",
17 "Jonathan Feinberg",
jcgregoriocd1c27d2006-11-16 04:49:30 +000018 "Blair Zajac",
jcgregorio093bae62007-01-18 15:24:52 +000019 "Sam Ruby",
20 "Louis Nyffenegger"]
jcgregorio2d66d4f2006-02-07 05:34:14 +000021__license__ = "MIT"
jcgregorio8421f272006-02-14 18:19:51 +000022__version__ = "$Rev$"
jcgregorio2d66d4f2006-02-07 05:34:14 +000023
24import re
jcgregoriodebceec2006-12-12 20:26:02 +000025import sys
jcgregorio2d66d4f2006-02-07 05:34:14 +000026import md5
jcgregorio11eb4f12006-11-17 14:59:26 +000027import email
28import email.Utils
29import email.Message
jcgregorio2d66d4f2006-02-07 05:34:14 +000030import StringIO
31import gzip
32import zlib
33import httplib
34import urlparse
35import base64
36import os
37import copy
38import calendar
39import time
40import random
41import sha
jcgregorio22a9e162006-03-22 14:45:46 +000042import hmac
jcgregorio2d66d4f2006-02-07 05:34:14 +000043from gettext import gettext as _
jcgregorio07a9a4a2007-03-08 21:18:39 +000044import socket
jcgregoriodebceec2006-12-12 20:26:02 +000045
46if sys.version_info >= (2,3):
47 from iri2uri import iri2uri
48else:
49 def iri2uri(uri):
50 return uri
jcgregorio2d66d4f2006-02-07 05:34:14 +000051
jcgregorio900e05d2006-04-02 03:21:39 +000052__all__ = ['Http', 'Response', 'HttpLib2Error',
53 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
jcgregorio076f54d2006-07-03 17:34:16 +000054 'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
55 'debuglevel']
jcgregorio900e05d2006-04-02 03:21:39 +000056
57
jcgregoriod62c5d22006-03-18 04:39:31 +000058# The httplib debug level, set to a non-zero value to get debug output
59debuglevel = 0
60
jcgregorio8421f272006-02-14 18:19:51 +000061# Python 2.3 support
jcgregorio89e6edb2006-12-18 16:26:18 +000062if sys.version_info < (2,4):
jcgregorio8421f272006-02-14 18:19:51 +000063 def sorted(seq):
64 seq.sort()
65 return seq
66
67# Python 2.3 support
68def HTTPResponse__getheaders(self):
69 """Return list of (header, value) tuples."""
70 if self.msg is None:
jcgregorio8421f272006-02-14 18:19:51 +000071 raise httplib.ResponseNotReady()
72 return self.msg.items()
73
74if not hasattr(httplib.HTTPResponse, 'getheaders'):
75 httplib.HTTPResponse.getheaders = HTTPResponse__getheaders
76
jcgregorio2d66d4f2006-02-07 05:34:14 +000077# All exceptions raised here derive from HttpLib2Error
78class HttpLib2Error(Exception): pass
79
jcgregorio07a9a4a2007-03-08 21:18:39 +000080# Some exceptions can be caught and optionally
81# be turned back into responses.
82class HttpLib2ErrorWithResponse(HttpLib2Error):
83 def __init__(self, desc, response, content):
84 self.response = response
85 self.content = content
86 HttpLib2Error.__init__(self, desc)
87
88class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
89class RedirectLimit(HttpLib2ErrorWithResponse): pass
90class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
91class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
92class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
93
jcgregorio132d28e2007-01-23 16:22:53 +000094class RelativeURIError(HttpLib2Error): pass
jcgregorio6a638172007-01-23 16:40:23 +000095class ServerNotFoundError(HttpLib2Error): pass
jcgregorio2d66d4f2006-02-07 05:34:14 +000096
97# Open Items:
98# -----------
99# Proxy support
100
101# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
102
103# Pluggable cache storage (supports storing the cache in
104# flat files by default. We need a plug-in architecture
105# that can support Berkeley DB and Squid)
106
107# == Known Issues ==
108# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
109# Does not handle Cache-Control: max-stale
110# Does not use Age: headers when calculating cache freshness.
111
112
113# The number of redirections to follow before giving up.
114# Note that only GET redirects are automatically followed.
115# Will also honor 301 requests by saving that info and never
116# requesting that URI again.
117DEFAULT_MAX_REDIRECTS = 5
118
119# Which headers are hop-by-hop headers by default
120HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']
121
jcgregoriodb8dfc82006-03-31 14:59:46 +0000122def _get_end2end_headers(response):
jcgregorio6cb373b2006-04-03 13:51:00 +0000123 hopbyhop = list(HOP_BY_HOP)
jcgregoriodb8dfc82006-03-31 14:59:46 +0000124 hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])
125 return [header for header in response.keys() if header not in hopbyhop]
126
jcgregorio2d66d4f2006-02-07 05:34:14 +0000127URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
128
129def parse_uri(uri):
130 """Parses a URI using the regex given in Appendix B of RFC 3986.
131
132 (scheme, authority, path, query, fragment) = parse_uri(uri)
133 """
134 groups = URI.match(uri).groups()
135 return (groups[1], groups[3], groups[4], groups[6], groups[8])
136
jcgregorioa46fe4e2006-11-16 04:13:45 +0000137def urlnorm(uri):
138 (scheme, authority, path, query, fragment) = parse_uri(uri)
jcgregorio132d28e2007-01-23 16:22:53 +0000139 if not scheme or not authority:
140 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
jcgregorioa46fe4e2006-11-16 04:13:45 +0000141 authority = authority.lower()
jcgregoriob4e9ab02006-11-17 15:53:15 +0000142 scheme = scheme.lower()
jcgregorioa46fe4e2006-11-16 04:13:45 +0000143 if not path:
144 path = "/"
145 # Could do syntax based normalization of the URI before
146 # computing the digest. See Section 6.2.2 of Std 66.
147 request_uri = query and "?".join([path, query]) or path
jcgregorioa898f8f2006-12-12 17:16:55 +0000148 scheme = scheme.lower()
jcgregorioa46fe4e2006-11-16 04:13:45 +0000149 defrag_uri = scheme + "://" + authority + request_uri
150 return scheme, authority, request_uri, defrag_uri
151
152
153# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
154re_url_scheme = re.compile(r'^\w+://')
155re_slash = re.compile(r'[?/:|]+')
156
157def safename(filename):
158 """Return a filename suitable for the cache.
159
160 Strips dangerous and common characters to create a filename we
161 can use to store the cache in.
162 """
163
164 try:
165 if re_url_scheme.match(filename):
166 if isinstance(filename,str):
jcgregorioa898f8f2006-12-12 17:16:55 +0000167 filename = filename.decode('utf-8')
168 filename = filename.encode('idna')
jcgregorioa46fe4e2006-11-16 04:13:45 +0000169 else:
jcgregorioa898f8f2006-12-12 17:16:55 +0000170 filename = filename.encode('idna')
jcgregorioa46fe4e2006-11-16 04:13:45 +0000171 except:
172 pass
173 if isinstance(filename,unicode):
174 filename=filename.encode('utf-8')
175 filemd5 = md5.new(filename).hexdigest()
176 filename = re_url_scheme.sub("", filename)
177 filename = re_slash.sub(",", filename)
178
179 # limit length of filename
180 if len(filename)>200:
181 filename=filename[:200]
182 return ",".join((filename, filemd5))
183
jcgregoriofd22e432006-04-27 02:00:08 +0000184NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
jcgregorio2d66d4f2006-02-07 05:34:14 +0000185def _normalize_headers(headers):
jcgregoriofd22e432006-04-27 02:00:08 +0000186 return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()])
jcgregorio2d66d4f2006-02-07 05:34:14 +0000187
188def _parse_cache_control(headers):
189 retval = {}
190 if headers.has_key('cache-control'):
191 parts = headers['cache-control'].split(',')
192 parts_with_args = [tuple([x.strip() for x in part.split("=")]) for part in parts if -1 != part.find("=")]
193 parts_wo_args = [(name.strip(), 1) for name in parts if -1 == name.find("=")]
194 retval = dict(parts_with_args + parts_wo_args)
195 return retval
196
jcgregoriofd22e432006-04-27 02:00:08 +0000197# Whether to use a strict mode to parse WWW-Authenticate headers
198# Might lead to bad results in case of ill-formed header value,
199# so disabled by default, falling back to relaxed parsing.
200# Set to true to turn on, usefull for testing servers.
201USE_WWW_AUTH_STRICT_PARSING = 0
202
203# In regex below:
204# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP
205# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
206# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
207# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
208WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")
209WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")
210UNQUOTE_PAIRS = re.compile(r'\\(.)')
jcgregorio2d66d4f2006-02-07 05:34:14 +0000211def _parse_www_authenticate(headers, headername='www-authenticate'):
212 """Returns a dictionary of dictionaries, one dict
213 per auth_scheme."""
214 retval = {}
215 if headers.has_key(headername):
216 authenticate = headers[headername].strip()
jcgregoriofd22e432006-04-27 02:00:08 +0000217 www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
jcgregorio2d66d4f2006-02-07 05:34:14 +0000218 while authenticate:
219 # Break off the scheme at the beginning of the line
220 if headername == 'authentication-info':
221 (auth_scheme, the_rest) = ('digest', authenticate)
222 else:
223 (auth_scheme, the_rest) = authenticate.split(" ", 1)
224 # Now loop over all the key value pairs that come after the scheme,
225 # being careful not to roll into the next scheme
jcgregoriofd22e432006-04-27 02:00:08 +0000226 match = www_auth.search(the_rest)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000227 auth_params = {}
jcgregoriofd22e432006-04-27 02:00:08 +0000228 while match:
229 if match and len(match.groups()) == 3:
jcgregorio2d66d4f2006-02-07 05:34:14 +0000230 (key, value, the_rest) = match.groups()
jcgregoriofd22e432006-04-27 02:00:08 +0000231 auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
232 match = www_auth.search(the_rest)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000233 retval[auth_scheme.lower()] = auth_params
234 authenticate = the_rest.strip()
235 return retval
236
237
238def _entry_disposition(response_headers, request_headers):
239 """Determine freshness from the Date, Expires and Cache-Control headers.
240
241 We don't handle the following:
242
243 1. Cache-Control: max-stale
244 2. Age: headers are not used in the calculations.
245
246 Not that this algorithm is simpler than you might think
247 because we are operating as a private (non-shared) cache.
jcgregorio900e05d2006-04-02 03:21:39 +0000248 This lets us ignore 's-maxage'. We can also ignore
jcgregorio2d66d4f2006-02-07 05:34:14 +0000249 'proxy-invalidate' since we aren't a proxy.
250 We will never return a stale document as
251 fresh as a design decision, and thus the non-implementation
jcgregorio900e05d2006-04-02 03:21:39 +0000252 of 'max-stale'. This also lets us safely ignore 'must-revalidate'
jcgregorio2d66d4f2006-02-07 05:34:14 +0000253 since we operate as if every server has sent 'must-revalidate'.
254 Since we are private we get to ignore both 'public' and
255 'private' parameters. We also ignore 'no-transform' since
256 we don't do any transformations.
257 The 'no-store' parameter is handled at a higher level.
jcgregorio900e05d2006-04-02 03:21:39 +0000258 So the only Cache-Control parameters we look at are:
jcgregorio2d66d4f2006-02-07 05:34:14 +0000259
260 no-cache
261 only-if-cached
262 max-age
263 min-fresh
264 """
265
266 retval = "STALE"
267 cc = _parse_cache_control(request_headers)
268 cc_response = _parse_cache_control(response_headers)
269
270 if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
271 retval = "TRANSPARENT"
272 if 'cache-control' not in request_headers:
273 request_headers['cache-control'] = 'no-cache'
274 elif cc.has_key('no-cache'):
275 retval = "TRANSPARENT"
276 elif cc_response.has_key('no-cache'):
277 retval = "STALE"
278 elif cc.has_key('only-if-cached'):
279 retval = "FRESH"
280 elif response_headers.has_key('date'):
jcgregorio11eb4f12006-11-17 14:59:26 +0000281 date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))
jcgregorio2d66d4f2006-02-07 05:34:14 +0000282 now = time.time()
283 current_age = max(0, now - date)
284 if cc_response.has_key('max-age'):
jcgregorioac4c7532007-01-18 16:25:01 +0000285 try:
286 freshness_lifetime = int(cc_response['max-age'])
287 except:
288 freshness_lifetime = 0
jcgregorio2d66d4f2006-02-07 05:34:14 +0000289 elif response_headers.has_key('expires'):
jcgregorio11eb4f12006-11-17 14:59:26 +0000290 expires = email.Utils.parsedate_tz(response_headers['expires'])
jcgregorio2d66d4f2006-02-07 05:34:14 +0000291 freshness_lifetime = max(0, calendar.timegm(expires) - date)
292 else:
293 freshness_lifetime = 0
294 if cc.has_key('max-age'):
jcgregorioac4c7532007-01-18 16:25:01 +0000295 try:
296 freshness_lifetime = int(cc['max-age'])
297 except:
298 freshness_lifetime = 0
jcgregorio2d66d4f2006-02-07 05:34:14 +0000299 if cc.has_key('min-fresh'):
jcgregorioac4c7532007-01-18 16:25:01 +0000300 try:
301 min_fresh = int(cc['min-fresh'])
302 except:
303 min_fresh = 0
304 current_age += min_fresh
jcgregorio2d66d4f2006-02-07 05:34:14 +0000305 if freshness_lifetime > current_age:
306 retval = "FRESH"
307 return retval
308
309def _decompressContent(response, new_content):
310 content = new_content
311 try:
jcgregorio90fb4a42006-11-17 16:19:47 +0000312 encoding = response.get('content-encoding', None)
313 if encoding in ['gzip', 'deflate']:
314 if encoding == 'gzip':
315 content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
316 if encoding == 'deflate':
317 content = zlib.decompress(content)
jcgregorio153f5882006-11-06 03:33:24 +0000318 response['content-length'] = str(len(content))
jcgregorio90fb4a42006-11-17 16:19:47 +0000319 del response['content-encoding']
jcgregorio2d66d4f2006-02-07 05:34:14 +0000320 except:
321 content = ""
jcgregorio07a9a4a2007-03-08 21:18:39 +0000322 raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000323 return content
324
jcgregorio36140b52006-06-13 02:17:52 +0000325def _updateCache(request_headers, response_headers, content, cache, cachekey):
326 if cachekey:
jcgregorio2d66d4f2006-02-07 05:34:14 +0000327 cc = _parse_cache_control(request_headers)
328 cc_response = _parse_cache_control(response_headers)
329 if cc.has_key('no-store') or cc_response.has_key('no-store'):
jcgregorio36140b52006-06-13 02:17:52 +0000330 cache.delete(cachekey)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000331 else:
jcgregorio11eb4f12006-11-17 14:59:26 +0000332 info = email.Message.Message()
jcgregorio2d66d4f2006-02-07 05:34:14 +0000333 for key, value in response_headers.iteritems():
jcgregorio90fb4a42006-11-17 16:19:47 +0000334 if key not in ['status','content-encoding','transfer-encoding']:
335 info[key] = value
336
337 status = response_headers.status
338 if status == 304:
339 status = 200
340
341 status_header = 'status: %d\r\n' % response_headers.status
342
343 header_str = info.as_string()
344
345 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
346 text = "".join([status_header, header_str, content])
jcgregorio11eb4f12006-11-17 14:59:26 +0000347
348 cache.set(cachekey, text)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000349
350def _cnonce():
351 dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
352 return dig[:16]
353
354def _wsse_username_token(cnonce, iso_now, password):
355 return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
356
357
358# For credentials we need two things, first
359# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
360# Then we also need a list of URIs that have already demanded authentication
361# That list is tricky since sub-URIs can take the same auth, or the
362# auth scheme may change as you descend the tree.
363# So we also need each Auth instance to be able to tell us
364# how close to the 'top' it is.
365
366class Authentication:
jcgregorio6cbab7e2006-04-21 20:35:43 +0000367 def __init__(self, credentials, host, request_uri, headers, response, content, http):
jcgregorio2d66d4f2006-02-07 05:34:14 +0000368 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
369 self.path = path
370 self.host = host
371 self.credentials = credentials
jcgregorio6cbab7e2006-04-21 20:35:43 +0000372 self.http = http
jcgregorio2d66d4f2006-02-07 05:34:14 +0000373
374 def depth(self, request_uri):
375 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
376 return request_uri[len(self.path):].count("/")
377
378 def inscope(self, host, request_uri):
379 # XXX Should we normalize the request_uri?
380 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
381 return (host == self.host) and path.startswith(self.path)
382
383 def request(self, method, request_uri, headers, content):
384 """Modify the request headers to add the appropriate
385 Authorization header. Over-rise this in sub-classes."""
386 pass
387
388 def response(self, response, content):
389 """Gives us a chance to update with new nonces
390 or such returned from the last authorized response.
391 Over-rise this in sub-classes if necessary.
392
393 Return TRUE is the request is to be retried, for
394 example Digest may return stale=true.
395 """
396 return False
397
398
399
400class BasicAuthentication(Authentication):
jcgregorio6cbab7e2006-04-21 20:35:43 +0000401 def __init__(self, credentials, host, request_uri, headers, response, content, http):
402 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000403
404 def request(self, method, request_uri, headers, content):
405 """Modify the request headers to add the appropriate
406 Authorization header."""
407 headers['authorization'] = 'Basic ' + base64.encodestring("%s:%s" % self.credentials).strip()
408
409
410class DigestAuthentication(Authentication):
411 """Only do qop='auth' and MD5, since that
412 is all Apache currently implements"""
jcgregorio6cbab7e2006-04-21 20:35:43 +0000413 def __init__(self, credentials, host, request_uri, headers, response, content, http):
414 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000415 challenge = _parse_www_authenticate(response, 'www-authenticate')
416 self.challenge = challenge['digest']
417 qop = self.challenge.get('qop')
418 self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
419 if self.challenge['qop'] is None:
jcgregorio6934caf2006-03-30 13:20:10 +0000420 raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
jcgregorio22a9e162006-03-22 14:45:46 +0000421 self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5')
jcgregorio2d66d4f2006-02-07 05:34:14 +0000422 if self.challenge['algorithm'] != 'MD5':
jcgregorio6934caf2006-03-30 13:20:10 +0000423 raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
jcgregorio2d66d4f2006-02-07 05:34:14 +0000424 self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
425 self.challenge['nc'] = 1
426
427 def request(self, method, request_uri, headers, content, cnonce = None):
428 """Modify the request headers"""
429 H = lambda x: md5.new(x).hexdigest()
430 KD = lambda s, d: H("%s:%s" % (s, d))
431 A2 = "".join([method, ":", request_uri])
432 self.challenge['cnonce'] = cnonce or _cnonce()
433 request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'],
434 '%08x' % self.challenge['nc'],
435 self.challenge['cnonce'],
436 self.challenge['qop'], H(A2)
437 ))
438 headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
439 self.credentials[0],
440 self.challenge['realm'],
441 self.challenge['nonce'],
442 request_uri,
443 self.challenge['algorithm'],
444 request_digest,
445 self.challenge['qop'],
446 self.challenge['nc'],
447 self.challenge['cnonce'],
448 )
449 self.challenge['nc'] += 1
450
451 def response(self, response, content):
452 if not response.has_key('authentication-info'):
jcgregoriocd140be2007-01-18 14:44:09 +0000453 challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
jcgregorio2d66d4f2006-02-07 05:34:14 +0000454 if 'true' == challenge.get('stale'):
455 self.challenge['nonce'] = challenge['nonce']
456 self.challenge['nc'] = 1
457 return True
458 else:
jcgregoriocd140be2007-01-18 14:44:09 +0000459 updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
jcgregorio2d66d4f2006-02-07 05:34:14 +0000460
461 if updated_challenge.has_key('nextnonce'):
462 self.challenge['nonce'] = updated_challenge['nextnonce']
463 self.challenge['nc'] = 1
464 return False
465
466
jcgregorio22a9e162006-03-22 14:45:46 +0000467class HmacDigestAuthentication(Authentication):
468 """Adapted from Robert Sayre's code and DigestAuthentication above."""
469 __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
470
jcgregorio6cbab7e2006-04-21 20:35:43 +0000471 def __init__(self, credentials, host, request_uri, headers, response, content, http):
472 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
jcgregorio22a9e162006-03-22 14:45:46 +0000473 challenge = _parse_www_authenticate(response, 'www-authenticate')
474 self.challenge = challenge['hmacdigest']
jcgregorio22a9e162006-03-22 14:45:46 +0000475 # TODO: self.challenge['domain']
476 self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
477 if self.challenge['reason'] not in ['unauthorized', 'integrity']:
478 self.challenge['reason'] = 'unauthorized'
479 self.challenge['salt'] = self.challenge.get('salt', '')
jcgregorio6934caf2006-03-30 13:20:10 +0000480 if not self.challenge.get('snonce'):
481 raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))
jcgregorio22a9e162006-03-22 14:45:46 +0000482 self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')
483 if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
jcgregorio6934caf2006-03-30 13:20:10 +0000484 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
jcgregorio22a9e162006-03-22 14:45:46 +0000485 self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')
486 if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
jcgregorio6934caf2006-03-30 13:20:10 +0000487 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
jcgregorio22a9e162006-03-22 14:45:46 +0000488 if self.challenge['algorithm'] == 'HMAC-MD5':
489 self.hashmod = md5
490 else:
491 self.hashmod = sha
492 if self.challenge['pw-algorithm'] == 'MD5':
493 self.pwhashmod = md5
494 else:
495 self.pwhashmod = sha
496 self.key = "".join([self.credentials[0], ":",
jcgregorio6934caf2006-03-30 13:20:10 +0000497 self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
jcgregorio22a9e162006-03-22 14:45:46 +0000498 ":", self.challenge['realm']
499 ])
jcgregorio6934caf2006-03-30 13:20:10 +0000500 self.key = self.pwhashmod.new(self.key).hexdigest().lower()
jcgregorio22a9e162006-03-22 14:45:46 +0000501
502 def request(self, method, request_uri, headers, content):
503 """Modify the request headers"""
jcgregoriodb8dfc82006-03-31 14:59:46 +0000504 keys = _get_end2end_headers(headers)
jcgregorio22a9e162006-03-22 14:45:46 +0000505 keylist = "".join(["%s " % k for k in keys])
506 headers_val = "".join([headers[k] for k in keys])
507 created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
jcgregorio6934caf2006-03-30 13:20:10 +0000508 cnonce = _cnonce()
509 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
jcgregorio6934caf2006-03-30 13:20:10 +0000510 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
511 headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
jcgregorio22a9e162006-03-22 14:45:46 +0000512 self.credentials[0],
513 self.challenge['realm'],
jcgregorio6934caf2006-03-30 13:20:10 +0000514 self.challenge['snonce'],
515 cnonce,
jcgregorio22a9e162006-03-22 14:45:46 +0000516 request_uri,
517 created,
518 request_digest,
519 keylist,
520 )
521
522 def response(self, response, content):
jcgregorio6934caf2006-03-30 13:20:10 +0000523 challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})
524 if challenge.get('reason') in ['integrity', 'stale']:
jcgregorio22a9e162006-03-22 14:45:46 +0000525 return True
526 return False
527
528
jcgregorio2d66d4f2006-02-07 05:34:14 +0000529class WsseAuthentication(Authentication):
530 """This is thinly tested and should not be relied upon.
531 At this time there isn't any third party server to test against.
532 Blogger and TypePad implemented this algorithm at one point
533 but Blogger has since switched to Basic over HTTPS and
534 TypePad has implemented it wrong, by never issuing a 401
535 challenge but instead requiring your client to telepathically know that
536 their endpoint is expecting WSSE profile="UsernameToken"."""
jcgregorio6cbab7e2006-04-21 20:35:43 +0000537 def __init__(self, credentials, host, request_uri, headers, response, content, http):
538 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000539
540 def request(self, method, request_uri, headers, content):
541 """Modify the request headers to add the appropriate
542 Authorization header."""
543 headers['Authorization'] = 'WSSE profile="UsernameToken"'
544 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
545 cnonce = _cnonce()
546 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
547 headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
548 self.credentials[0],
549 password_digest,
550 cnonce,
551 iso_now)
552
jcgregorio6cbab7e2006-04-21 20:35:43 +0000553class GoogleLoginAuthentication(Authentication):
554 def __init__(self, credentials, host, request_uri, headers, response, content, http):
555 from urllib import urlencode
556 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
jcgregorio82cc2a82007-02-04 03:38:04 +0000557 challenge = _parse_www_authenticate(response, 'www-authenticate')
558 service = challenge['googlelogin'].get('service', 'xapi')
559 # Bloggger actually returns the service in the challenge
560 # For the rest we guess based on the URI
561 if service == 'xapi' and request_uri.find("calendar") > 0:
562 service = "cl"
563 # No point in guessing Base or Spreadsheet
564 #elif request_uri.find("spreadsheets") > 0:
565 # service = "wise"
jcgregorio6cbab7e2006-04-21 20:35:43 +0000566
jcgregorio82cc2a82007-02-04 03:38:04 +0000567 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
jcgregorioac26d462006-04-21 20:47:13 +0000568 resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
569 lines = content.split('\n')
jcgregorio0bf72922006-04-27 01:38:17 +0000570 d = dict([tuple(line.split("=", 1)) for line in lines if line])
571 if resp.status == 403:
572 self.Auth = ""
573 else:
574 self.Auth = d['Auth']
jcgregorio6cbab7e2006-04-21 20:35:43 +0000575
576 def request(self, method, request_uri, headers, content):
577 """Modify the request headers to add the appropriate
578 Authorization header."""
579 headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
580
jcgregorio2d66d4f2006-02-07 05:34:14 +0000581
582AUTH_SCHEME_CLASSES = {
583 "basic": BasicAuthentication,
584 "wsse": WsseAuthentication,
jcgregorio22a9e162006-03-22 14:45:46 +0000585 "digest": DigestAuthentication,
jcgregorio6cbab7e2006-04-21 20:35:43 +0000586 "hmacdigest": HmacDigestAuthentication,
587 "googlelogin": GoogleLoginAuthentication
jcgregorio2d66d4f2006-02-07 05:34:14 +0000588}
589
jcgregorio6cbab7e2006-04-21 20:35:43 +0000590AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
jcgregorio2d66d4f2006-02-07 05:34:14 +0000591
jcgregorioa46fe4e2006-11-16 04:13:45 +0000592def _md5(s):
593 return
jcgregorio2d66d4f2006-02-07 05:34:14 +0000594
jcgregorio36140b52006-06-13 02:17:52 +0000595class FileCache:
596 """Uses a local directory as a store for cached files.
597 Not really safe to use if multiple threads or processes are going to
598 be running on the same cache.
599 """
jcgregorioa46fe4e2006-11-16 04:13:45 +0000600 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
jcgregorio36140b52006-06-13 02:17:52 +0000601 self.cache = cache
jcgregorioa46fe4e2006-11-16 04:13:45 +0000602 self.safe = safe
jcgregorio36140b52006-06-13 02:17:52 +0000603 if not os.path.exists(cache):
604 os.makedirs(self.cache)
605
606 def get(self, key):
607 retval = None
jcgregorioa46fe4e2006-11-16 04:13:45 +0000608 cacheFullPath = os.path.join(self.cache, self.safe(key))
jcgregorio36140b52006-06-13 02:17:52 +0000609 try:
610 f = file(cacheFullPath, "r")
611 retval = f.read()
612 f.close()
613 except:
614 pass
615 return retval
616
617 def set(self, key, value):
jcgregorioa46fe4e2006-11-16 04:13:45 +0000618 cacheFullPath = os.path.join(self.cache, self.safe(key))
jcgregorio36140b52006-06-13 02:17:52 +0000619 f = file(cacheFullPath, "w")
620 f.write(value)
621 f.close()
622
623 def delete(self, key):
jcgregorioa46fe4e2006-11-16 04:13:45 +0000624 cacheFullPath = os.path.join(self.cache, self.safe(key))
jcgregorio36140b52006-06-13 02:17:52 +0000625 if os.path.exists(cacheFullPath):
626 os.remove(cacheFullPath)
627
jcgregoriode8238d2007-03-07 19:08:26 +0000628class Credentials:
629 def __init__(self):
630 self.credentials = []
631
632 def add(self, name, password, domain=""):
633 self.credentials.append((domain.lower(), name, password))
634
635 def clear(self):
636 self.credentials = []
637
638 def iter(self, domain):
639 for (cdomain, name, password) in self.credentials:
640 if cdomain == "" or domain == cdomain:
641 yield (name, password)
642
643class KeyCerts(Credentials):
644 """Identical to Credentials except that
645 name/password are mapped to key/cert."""
646 pass
647
jcgregorio2d66d4f2006-02-07 05:34:14 +0000648class Http:
jcgregorio900e05d2006-04-02 03:21:39 +0000649 """An HTTP client that handles all
jcgregorio2d66d4f2006-02-07 05:34:14 +0000650 methods, caching, ETags, compression,
jcgregorio900e05d2006-04-02 03:21:39 +0000651 HTTPS, Basic, Digest, WSSE, etc.
jcgregorio2d66d4f2006-02-07 05:34:14 +0000652 """
653 def __init__(self, cache=None):
654 # Map domain name to an httplib connection
655 self.connections = {}
656 # The location of the cache, for now a directory
657 # where cached responses are held.
jcgregorio36140b52006-06-13 02:17:52 +0000658 if cache and isinstance(cache, str):
659 self.cache = FileCache(cache)
660 else:
661 self.cache = cache
jcgregorio2d66d4f2006-02-07 05:34:14 +0000662
jcgregoriode8238d2007-03-07 19:08:26 +0000663 # Name/password
664 self.credentials = Credentials()
665
666 # Key/cert
667 self.certificates = KeyCerts()
jcgregorio2d66d4f2006-02-07 05:34:14 +0000668
669 # authorization objects
670 self.authorizations = []
671
jcgregorio0bf72922006-04-27 01:38:17 +0000672 self.follow_all_redirects = False
673
jcgregorio25185622006-10-28 05:12:34 +0000674 self.ignore_etag = False
675
jcgregorio07a9a4a2007-03-08 21:18:39 +0000676 self.force_exception_to_status_code = True
677
jcgregorio2d66d4f2006-02-07 05:34:14 +0000678 def _auth_from_challenge(self, host, request_uri, headers, response, content):
679 """A generator that creates Authorization objects
680 that can be applied to requests.
681 """
682 challenges = _parse_www_authenticate(response, 'www-authenticate')
jcgregoriode8238d2007-03-07 19:08:26 +0000683 for cred in self.credentials.iter(host):
jcgregorio2d66d4f2006-02-07 05:34:14 +0000684 for scheme in AUTH_SCHEME_ORDER:
685 if challenges.has_key(scheme):
jcgregorio6cbab7e2006-04-21 20:35:43 +0000686 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000687
jcgregoriode8238d2007-03-07 19:08:26 +0000688 def add_credentials(self, name, password, domain=""):
jcgregorio900e05d2006-04-02 03:21:39 +0000689 """Add a name and password that will be used
690 any time a request requires authentication."""
jcgregoriode8238d2007-03-07 19:08:26 +0000691 self.credentials.add(name, password, domain)
692
693 def add_certificate(self, key, cert, domain):
694 """Add a key and cert that will be used
695 any time a request requires authentication."""
696 self.certificates.add(key, cert, domain)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000697
698 def clear_credentials(self):
jcgregorio900e05d2006-04-02 03:21:39 +0000699 """Remove all the names and passwords
700 that are used for authentication"""
jcgregoriode8238d2007-03-07 19:08:26 +0000701 self.credentials.clear()
jcgregorio2d66d4f2006-02-07 05:34:14 +0000702 self.authorizations = []
703
704 def _conn_request(self, conn, request_uri, method, body, headers):
705 for i in range(2):
706 try:
707 conn.request(method, request_uri, body, headers)
708 response = conn.getresponse()
jcgregorio07a9a4a2007-03-08 21:18:39 +0000709 except socket.gaierror:
710 conn.close()
711 raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
712 except Exception, e:
jcgregorio8421f272006-02-14 18:19:51 +0000713 if i == 0:
jcgregorio2d66d4f2006-02-07 05:34:14 +0000714 conn.close()
715 conn.connect()
716 continue
jcgregorio8421f272006-02-14 18:19:51 +0000717 else:
718 raise
719 else:
720 content = response.read()
721 response = Response(response)
722 content = _decompressContent(response, content)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000723
724 break;
725 return (response, content)
726
727
jcgregorio36140b52006-06-13 02:17:52 +0000728 def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):
jcgregorio2d66d4f2006-02-07 05:34:14 +0000729 """Do the actual request using the connection object
730 and also follow one level of redirects if necessary"""
731
732 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
733 auth = auths and sorted(auths)[0][1] or None
734 if auth:
735 auth.request(method, request_uri, headers, body)
736
737 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
738
739 if auth:
740 if auth.response(response, body):
741 auth.request(method, request_uri, headers, body)
742 (response, content) = self._conn_request(conn, request_uri, method, body, headers )
743 response._stale_digest = 1
744
745 if response.status == 401:
746 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
747 authorization.request(method, request_uri, headers, body)
748 (response, content) = self._conn_request(conn, request_uri, method, body, headers, )
749 if response.status != 401:
750 self.authorizations.append(authorization)
751 authorization.response(response, body)
752 break
753
jcgregorio0bf72922006-04-27 01:38:17 +0000754 if (self.follow_all_redirects or method in ["GET", "HEAD"]) or response.status == 303:
jcgregorio2d66d4f2006-02-07 05:34:14 +0000755 if response.status in [300, 301, 302, 303, 307]:
756 # Pick out the location header and basically start from the beginning
757 # remembering first to strip the ETag header and decrement our 'depth'
758 if redirections:
759 if not response.has_key('location') and response.status != 300:
jcgregorio07a9a4a2007-03-08 21:18:39 +0000760 raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
jcgregorio46075532006-11-04 22:18:55 +0000761 # Fix-up relative redirects (which violate an RFC 2616 MUST)
762 if response.has_key('location'):
763 location = response['location']
764 (scheme, authority, path, query, fragment) = parse_uri(location)
765 if authority == None:
766 response['location'] = urlparse.urljoin(absolute_uri, location)
jcgregorio0bf72922006-04-27 01:38:17 +0000767 if response.status == 301 and method in ["GET", "HEAD"]:
jcgregorio2d66d4f2006-02-07 05:34:14 +0000768 response['-x-permanent-redirect-url'] = response['location']
jcgregorio772adc82006-11-17 21:52:34 +0000769 if not response.has_key('content-location'):
770 response['content-location'] = absolute_uri
jcgregorio36140b52006-06-13 02:17:52 +0000771 _updateCache(headers, response, content, self.cache, cachekey)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000772 if headers.has_key('if-none-match'):
773 del headers['if-none-match']
774 if headers.has_key('if-modified-since'):
775 del headers['if-modified-since']
776 if response.has_key('location'):
jcgregorio2d66d4f2006-02-07 05:34:14 +0000777 location = response['location']
jcgregorio46075532006-11-04 22:18:55 +0000778 old_response = copy.deepcopy(response)
jcgregorio772adc82006-11-17 21:52:34 +0000779 if not old_response.has_key('content-location'):
780 old_response['content-location'] = absolute_uri
jcgregorio2d66d4f2006-02-07 05:34:14 +0000781 redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method
jcgregorio0bf72922006-04-27 01:38:17 +0000782 (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
jcgregorioa0713ab2006-07-01 05:21:34 +0000783 response.previous = old_response
jcgregorio2d66d4f2006-02-07 05:34:14 +0000784 else:
jcgregorio07a9a4a2007-03-08 21:18:39 +0000785 raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000786 elif response.status in [200, 203] and method == "GET":
787 # Don't cache 206's since we aren't going to handle byte range requests
jcgregorio772adc82006-11-17 21:52:34 +0000788 if not response.has_key('content-location'):
789 response['content-location'] = absolute_uri
jcgregorio36140b52006-06-13 02:17:52 +0000790 _updateCache(headers, response, content, self.cache, cachekey)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000791
792 return (response, content)
793
794 def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS):
jcgregorio076f54d2006-07-03 17:34:16 +0000795 """ Performs a single HTTP request.
796The 'uri' is the URI of the HTTP resource and can begin
797with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
jcgregorio2d66d4f2006-02-07 05:34:14 +0000798
jcgregorio076f54d2006-07-03 17:34:16 +0000799The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
800There is no restriction on the methods allowed.
801
802The 'body' is the entity body to be sent with the request. It is a string
803object.
804
805Any extra headers that are to be sent with the request should be provided in the
806'headers' dictionary.
807
808The maximum number of redirect to follow before raising an
809exception is 'redirections. The default is 5.
810
811The return value is a tuple of (response, content), the first
812being and instance of the 'Response' class, the second being
813a string that contains the response entity body.
jcgregorio2d66d4f2006-02-07 05:34:14 +0000814 """
jcgregorio07a9a4a2007-03-08 21:18:39 +0000815 try:
816 if headers is None:
817 headers = {}
jcgregoriode8238d2007-03-07 19:08:26 +0000818 else:
jcgregorio07a9a4a2007-03-08 21:18:39 +0000819 headers = _normalize_headers(headers)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000820
jcgregorio07a9a4a2007-03-08 21:18:39 +0000821 if not headers.has_key('user-agent'):
822 headers['user-agent'] = "Python-httplib2/%s" % __version__
jcgregorio2d66d4f2006-02-07 05:34:14 +0000823
jcgregorio07a9a4a2007-03-08 21:18:39 +0000824 uri = iri2uri(uri)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000825
jcgregorio07a9a4a2007-03-08 21:18:39 +0000826 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
jcgregorio2d66d4f2006-02-07 05:34:14 +0000827
jcgregorio07a9a4a2007-03-08 21:18:39 +0000828 conn_key = scheme+":"+authority
829 if conn_key in self.connections:
830 conn = self.connections[conn_key]
jcgregorio2d66d4f2006-02-07 05:34:14 +0000831 else:
jcgregorio07a9a4a2007-03-08 21:18:39 +0000832 connection_type = (scheme == 'https') and httplib.HTTPSConnection or httplib.HTTPConnection
833 certs = list(self.certificates.iter(authority))
834 if scheme == 'https' and certs:
835 conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0], cert_file=certs[0][1])
836 else:
837 conn = self.connections[conn_key] = connection_type(authority)
838 conn.set_debuglevel(debuglevel)
jcgregorioe4ce13e2006-04-02 03:05:08 +0000839
jcgregorio07a9a4a2007-03-08 21:18:39 +0000840 if method in ["GET", "HEAD"] and 'range' not in headers:
841 headers['accept-encoding'] = 'compress, gzip'
jcgregorio82d99d72006-05-17 19:18:03 +0000842
jcgregorio07a9a4a2007-03-08 21:18:39 +0000843 info = email.Message.Message()
844 cached_value = None
845 if self.cache:
846 cachekey = defrag_uri
847 cached_value = self.cache.get(cachekey)
848 if cached_value:
849 try:
850 info = email.message_from_string(cached_value)
851 content = cached_value.split('\r\n\r\n', 1)[1]
852 except Exception, e:
853 self.cache.delete(cachekey)
854 cachekey = None
855 cached_value = None
jcgregorio2d66d4f2006-02-07 05:34:14 +0000856 else:
jcgregorio07a9a4a2007-03-08 21:18:39 +0000857 cachekey = None
858
859 if method in ["PUT"] and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
860 # http://www.w3.org/1999/04/Editing/
861 headers['if-match'] = info['etag']
862
863 if method not in ["GET", "HEAD"] and self.cache and cachekey:
864 # RFC 2616 Section 13.10
jcgregorio36140b52006-06-13 02:17:52 +0000865 self.cache.delete(cachekey)
jcgregorio07a9a4a2007-03-08 21:18:39 +0000866
867 if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
868 if info.has_key('-x-permanent-redirect-url'):
869 # Should cached permanent redirects be counted in our redirection count? For now, yes.
870 (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
871 response.previous = Response(info)
872 response.previous.fromcache = True
873 else:
874 # Determine our course of action:
875 # Is the cached entry fresh or stale?
876 # Has the client requested a non-cached response?
877 #
878 # There seems to be three possible answers:
879 # 1. [FRESH] Return the cache entry w/o doing a GET
880 # 2. [STALE] Do the GET (but add in cache validators if available)
881 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
882 entry_disposition = _entry_disposition(info, headers)
883
884 if entry_disposition == "FRESH":
885 if not cached_value:
886 info['status'] = '504'
887 content = ""
888 response = Response(info)
889 if cached_value:
890 response.fromcache = True
891 return (response, content)
892
893 if entry_disposition == "STALE":
894 if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
895 headers['if-none-match'] = info['etag']
896 if info.has_key('last-modified') and not 'last-modified' in headers:
897 headers['if-modified-since'] = info['last-modified']
898 elif entry_disposition == "TRANSPARENT":
899 pass
900
901 (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
902
903 if response.status == 304 and method == "GET":
904 # Rewrite the cache entry with the new end-to-end headers
905 # Take all headers that are in response
906 # and overwrite their values in info.
907 # unless they are hop-by-hop, or are listed in the connection header.
908
909 for key in _get_end2end_headers(response):
910 info[key] = response[key]
911 merged_response = Response(info)
912 if hasattr(response, "_stale_digest"):
913 merged_response._stale_digest = response._stale_digest
914 _updateCache(headers, merged_response, content, self.cache, cachekey)
915 response = merged_response
916 response.status = 200
917 response.fromcache = True
918
919 elif response.status == 200:
920 content = new_content
921 else:
922 self.cache.delete(cachekey)
923 content = new_content
924 else:
925 (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
926 except Exception, e:
927 if self.force_exception_to_status_code:
928 if isinstance(e, HttpLib2ErrorWithResponse):
929 response = e.response
930 content = e.content
931 response.status = 500
932 response.reason = str(e)
933 elif isinstance(e, socket.timeout):
934 content = "Request Timeout"
935 response = Response( {
936 "content-type": "text/plain",
937 "status": "408",
938 "content-length": len(content)
939 })
940 response.reason = "Request Timeout"
941 else:
942 content = str(e)
943 response = Response( {
944 "content-type": "text/plain",
945 "status": "400",
946 "content-length": len(content)
947 })
948 response.reason = "Bad Request"
949 else:
950 raise
951
952
jcgregorio2d66d4f2006-02-07 05:34:14 +0000953 return (response, content)
954
955
956
957class Response(dict):
jcgregorio11eb4f12006-11-17 14:59:26 +0000958 """An object more like email.Message than httplib.HTTPResponse."""
jcgregorio2d66d4f2006-02-07 05:34:14 +0000959
960 """Is this response from our local cache"""
961 fromcache = False
962
963 """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """
964 version = 11
965
966 "Status code returned by server. "
967 status = 200
968
jcgregorio2d66d4f2006-02-07 05:34:14 +0000969 """Reason phrase returned by server."""
jcgregorio36140b52006-06-13 02:17:52 +0000970 reason = "Ok"
jcgregorio2d66d4f2006-02-07 05:34:14 +0000971
jcgregorioa0713ab2006-07-01 05:21:34 +0000972 previous = None
jcgregorio2d66d4f2006-02-07 05:34:14 +0000973
974 def __init__(self, info):
jcgregorio11eb4f12006-11-17 14:59:26 +0000975 # info is either an email.Message or
jcgregorio2d66d4f2006-02-07 05:34:14 +0000976 # an httplib.HTTPResponse object.
977 if isinstance(info, httplib.HTTPResponse):
jcgregorioa0713ab2006-07-01 05:21:34 +0000978 for key, value in info.getheaders():
jcgregorio2d66d4f2006-02-07 05:34:14 +0000979 self[key] = value
980 self.status = info.status
981 self['status'] = str(self.status)
982 self.reason = info.reason
983 self.version = info.version
jcgregorio11eb4f12006-11-17 14:59:26 +0000984 elif isinstance(info, email.Message.Message):
jcgregorioa0713ab2006-07-01 05:21:34 +0000985 for key, value in info.items():
jcgregorio2d66d4f2006-02-07 05:34:14 +0000986 self[key] = value
987 self.status = int(self['status'])
jcgregorio07a9a4a2007-03-08 21:18:39 +0000988 else:
989 for key, value in info.iteritems():
990 self[key] = value
991 self.status = int(self.get('status', self.status))
992
jcgregorio2d66d4f2006-02-07 05:34:14 +0000993
jcgregorio153f5882006-11-06 03:33:24 +0000994 def __getattr__(self, name):
995 if name == 'dict':
996 return self
997 else:
998 raise AttributeError, name
999
jcgregorio2d66d4f2006-02-07 05:34:14 +00001000