blob: 64f2e17e7b5520bf4c94eac581588d3909a3d680 [file] [log] [blame]
Joe Gregorio845a5452010-09-08 13:50:34 -04001from __future__ import generators
2"""
3httplib2
4
5A caching http interface that supports ETags and gzip
6to conserve bandwidth.
7
8Requires Python 2.3 or later
9
10Changelog:
112007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
12
13"""
14
15__author__ = "Joe Gregorio (joe@bitworking.org)"
16__copyright__ = "Copyright 2006, Joe Gregorio"
17__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
18 "James Antill",
19 "Xavier Verges Farrero",
20 "Jonathan Feinberg",
21 "Blair Zajac",
22 "Sam Ruby",
23 "Louis Nyffenegger"]
24__license__ = "MIT"
25__version__ = "$Rev$"
26
27import re
28import sys
29import email
30import email.Utils
31import email.Message
32import email.FeedParser
33import StringIO
34import gzip
35import zlib
36import httplib
37import urlparse
38import base64
39import os
40import copy
41import calendar
42import time
43import random
44import errno
45# remove depracated warning in python2.6
46try:
47 from hashlib import sha1 as _sha, md5 as _md5
48except ImportError:
49 import sha
50 import md5
51 _sha = sha.new
52 _md5 = md5.new
53import hmac
54from gettext import gettext as _
55import socket
56
57try:
Joe Gregorio5e3a5fa2010-10-11 13:03:56 -040058 from httplib2 import socks
Joe Gregorio845a5452010-09-08 13:50:34 -040059except ImportError:
Joe Gregorio5e3a5fa2010-10-11 13:03:56 -040060 socks = None
Joe Gregorio845a5452010-09-08 13:50:34 -040061
62# Build the appropriate socket wrapper for ssl
63try:
64 import ssl # python 2.6
65 _ssl_wrap_socket = ssl.wrap_socket
Joe Gregoriof8434792011-05-03 09:11:49 -040066except (AttributeError, ImportError):
Joe Gregorio845a5452010-09-08 13:50:34 -040067 def _ssl_wrap_socket(sock, key_file, cert_file):
68 ssl_sock = socket.ssl(sock, key_file, cert_file)
69 return httplib.FakeSocket(sock, ssl_sock)
70
71
72if sys.version_info >= (2,3):
73 from iri2uri import iri2uri
74else:
75 def iri2uri(uri):
76 return uri
77
78def has_timeout(timeout): # python 2.6
79 if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):
80 return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)
81 return (timeout is not None)
82
83__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
84 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
85 'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
Joe Gregorio5e3a5fa2010-10-11 13:03:56 -040086 'debuglevel', 'ProxiesUnavailableError']
Joe Gregorio845a5452010-09-08 13:50:34 -040087
88
89# The httplib debug level, set to a non-zero value to get debug output
90debuglevel = 0
91
92
93# Python 2.3 support
94if sys.version_info < (2,4):
95 def sorted(seq):
96 seq.sort()
97 return seq
98
99# Python 2.3 support
100def HTTPResponse__getheaders(self):
101 """Return list of (header, value) tuples."""
102 if self.msg is None:
103 raise httplib.ResponseNotReady()
104 return self.msg.items()
105
106if not hasattr(httplib.HTTPResponse, 'getheaders'):
107 httplib.HTTPResponse.getheaders = HTTPResponse__getheaders
108
109# All exceptions raised here derive from HttpLib2Error
110class HttpLib2Error(Exception): pass
111
112# Some exceptions can be caught and optionally
113# be turned back into responses.
114class HttpLib2ErrorWithResponse(HttpLib2Error):
115 def __init__(self, desc, response, content):
116 self.response = response
117 self.content = content
118 HttpLib2Error.__init__(self, desc)
119
120class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
121class RedirectLimit(HttpLib2ErrorWithResponse): pass
122class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
123class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
124class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
125
Joe Gregoriof8434792011-05-03 09:11:49 -0400126class MalformedHeader(HttpLib2Error): pass
Joe Gregorio845a5452010-09-08 13:50:34 -0400127class RelativeURIError(HttpLib2Error): pass
128class ServerNotFoundError(HttpLib2Error): pass
Joe Gregorio5e3a5fa2010-10-11 13:03:56 -0400129class ProxiesUnavailableError(HttpLib2Error): pass
Joe Gregorio845a5452010-09-08 13:50:34 -0400130
131# Open Items:
132# -----------
133# Proxy support
134
135# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
136
137# Pluggable cache storage (supports storing the cache in
138# flat files by default. We need a plug-in architecture
139# that can support Berkeley DB and Squid)
140
141# == Known Issues ==
142# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
143# Does not handle Cache-Control: max-stale
144# Does not use Age: headers when calculating cache freshness.
145
146
147# The number of redirections to follow before giving up.
148# Note that only GET redirects are automatically followed.
149# Will also honor 301 requests by saving that info and never
150# requesting that URI again.
151DEFAULT_MAX_REDIRECTS = 5
152
153# Which headers are hop-by-hop headers by default
154HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']
155
156def _get_end2end_headers(response):
157 hopbyhop = list(HOP_BY_HOP)
158 hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])
159 return [header for header in response.keys() if header not in hopbyhop]
160
161URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
162
163def parse_uri(uri):
164 """Parses a URI using the regex given in Appendix B of RFC 3986.
165
166 (scheme, authority, path, query, fragment) = parse_uri(uri)
167 """
168 groups = URI.match(uri).groups()
169 return (groups[1], groups[3], groups[4], groups[6], groups[8])
170
171def urlnorm(uri):
172 (scheme, authority, path, query, fragment) = parse_uri(uri)
173 if not scheme or not authority:
174 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
175 authority = authority.lower()
176 scheme = scheme.lower()
177 if not path:
178 path = "/"
179 # Could do syntax based normalization of the URI before
180 # computing the digest. See Section 6.2.2 of Std 66.
181 request_uri = query and "?".join([path, query]) or path
182 scheme = scheme.lower()
183 defrag_uri = scheme + "://" + authority + request_uri
184 return scheme, authority, request_uri, defrag_uri
185
186
187# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
188re_url_scheme = re.compile(r'^\w+://')
189re_slash = re.compile(r'[?/:|]+')
190
191def safename(filename):
192 """Return a filename suitable for the cache.
193
194 Strips dangerous and common characters to create a filename we
195 can use to store the cache in.
196 """
197
198 try:
199 if re_url_scheme.match(filename):
200 if isinstance(filename,str):
201 filename = filename.decode('utf-8')
202 filename = filename.encode('idna')
203 else:
204 filename = filename.encode('idna')
205 except UnicodeError:
206 pass
207 if isinstance(filename,unicode):
208 filename=filename.encode('utf-8')
209 filemd5 = _md5(filename).hexdigest()
210 filename = re_url_scheme.sub("", filename)
211 filename = re_slash.sub(",", filename)
212
213 # limit length of filename
214 if len(filename)>200:
215 filename=filename[:200]
216 return ",".join((filename, filemd5))
217
218NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
219def _normalize_headers(headers):
220 return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()])
221
222def _parse_cache_control(headers):
223 retval = {}
224 if headers.has_key('cache-control'):
225 parts = headers['cache-control'].split(',')
226 parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]
227 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
228 retval = dict(parts_with_args + parts_wo_args)
229 return retval
230
231# Whether to use a strict mode to parse WWW-Authenticate headers
232# Might lead to bad results in case of ill-formed header value,
233# so disabled by default, falling back to relaxed parsing.
234# Set to true to turn on, usefull for testing servers.
235USE_WWW_AUTH_STRICT_PARSING = 0
236
237# In regex below:
238# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP
239# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
240# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
241# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
242WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")
243WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")
244UNQUOTE_PAIRS = re.compile(r'\\(.)')
245def _parse_www_authenticate(headers, headername='www-authenticate'):
246 """Returns a dictionary of dictionaries, one dict
247 per auth_scheme."""
248 retval = {}
249 if headers.has_key(headername):
Joe Gregoriof8434792011-05-03 09:11:49 -0400250 try:
251 authenticate = headers[headername].strip()
252 www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
253 while authenticate:
254 # Break off the scheme at the beginning of the line
255 if headername == 'authentication-info':
256 (auth_scheme, the_rest) = ('digest', authenticate)
257 else:
258 (auth_scheme, the_rest) = authenticate.split(" ", 1)
259 # Now loop over all the key value pairs that come after the scheme,
260 # being careful not to roll into the next scheme
261 match = www_auth.search(the_rest)
262 auth_params = {}
263 while match:
264 if match and len(match.groups()) == 3:
265 (key, value, the_rest) = match.groups()
266 auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
267 match = www_auth.search(the_rest)
268 retval[auth_scheme.lower()] = auth_params
269 authenticate = the_rest.strip()
270 except ValueError:
271 raise MalformedHeader("WWW-Authenticate")
Joe Gregorio845a5452010-09-08 13:50:34 -0400272 return retval
273
274
275def _entry_disposition(response_headers, request_headers):
276 """Determine freshness from the Date, Expires and Cache-Control headers.
277
278 We don't handle the following:
279
280 1. Cache-Control: max-stale
281 2. Age: headers are not used in the calculations.
282
283 Not that this algorithm is simpler than you might think
284 because we are operating as a private (non-shared) cache.
285 This lets us ignore 's-maxage'. We can also ignore
286 'proxy-invalidate' since we aren't a proxy.
287 We will never return a stale document as
288 fresh as a design decision, and thus the non-implementation
289 of 'max-stale'. This also lets us safely ignore 'must-revalidate'
290 since we operate as if every server has sent 'must-revalidate'.
291 Since we are private we get to ignore both 'public' and
292 'private' parameters. We also ignore 'no-transform' since
293 we don't do any transformations.
294 The 'no-store' parameter is handled at a higher level.
295 So the only Cache-Control parameters we look at are:
296
297 no-cache
298 only-if-cached
299 max-age
300 min-fresh
301 """
302
303 retval = "STALE"
304 cc = _parse_cache_control(request_headers)
305 cc_response = _parse_cache_control(response_headers)
306
307 if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
308 retval = "TRANSPARENT"
309 if 'cache-control' not in request_headers:
310 request_headers['cache-control'] = 'no-cache'
311 elif cc.has_key('no-cache'):
312 retval = "TRANSPARENT"
313 elif cc_response.has_key('no-cache'):
314 retval = "STALE"
315 elif cc.has_key('only-if-cached'):
316 retval = "FRESH"
317 elif response_headers.has_key('date'):
318 date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))
319 now = time.time()
320 current_age = max(0, now - date)
321 if cc_response.has_key('max-age'):
322 try:
323 freshness_lifetime = int(cc_response['max-age'])
324 except ValueError:
325 freshness_lifetime = 0
326 elif response_headers.has_key('expires'):
327 expires = email.Utils.parsedate_tz(response_headers['expires'])
328 if None == expires:
329 freshness_lifetime = 0
330 else:
331 freshness_lifetime = max(0, calendar.timegm(expires) - date)
332 else:
333 freshness_lifetime = 0
334 if cc.has_key('max-age'):
335 try:
336 freshness_lifetime = int(cc['max-age'])
337 except ValueError:
338 freshness_lifetime = 0
339 if cc.has_key('min-fresh'):
340 try:
341 min_fresh = int(cc['min-fresh'])
342 except ValueError:
343 min_fresh = 0
344 current_age += min_fresh
345 if freshness_lifetime > current_age:
346 retval = "FRESH"
347 return retval
348
349def _decompressContent(response, new_content):
350 content = new_content
351 try:
352 encoding = response.get('content-encoding', None)
353 if encoding in ['gzip', 'deflate']:
354 if encoding == 'gzip':
355 content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
356 if encoding == 'deflate':
357 content = zlib.decompress(content)
358 response['content-length'] = str(len(content))
359 # Record the historical presence of the encoding in a way the won't interfere.
360 response['-content-encoding'] = response['content-encoding']
361 del response['content-encoding']
362 except IOError:
363 content = ""
364 raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
365 return content
366
367def _updateCache(request_headers, response_headers, content, cache, cachekey):
368 if cachekey:
369 cc = _parse_cache_control(request_headers)
370 cc_response = _parse_cache_control(response_headers)
371 if cc.has_key('no-store') or cc_response.has_key('no-store'):
372 cache.delete(cachekey)
373 else:
374 info = email.Message.Message()
375 for key, value in response_headers.iteritems():
376 if key not in ['status','content-encoding','transfer-encoding']:
377 info[key] = value
378
379 # Add annotations to the cache to indicate what headers
380 # are variant for this request.
381 vary = response_headers.get('vary', None)
382 if vary:
383 vary_headers = vary.lower().replace(' ', '').split(',')
384 for header in vary_headers:
385 key = '-varied-%s' % header
386 try:
387 info[key] = request_headers[header]
388 except KeyError:
389 pass
390
391 status = response_headers.status
392 if status == 304:
393 status = 200
394
Joe Gregoriof8434792011-05-03 09:11:49 -0400395 status_header = 'status: %d\r\n' % status
Joe Gregorio845a5452010-09-08 13:50:34 -0400396
397 header_str = info.as_string()
398
399 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
400 text = "".join([status_header, header_str, content])
401
402 cache.set(cachekey, text)
403
404def _cnonce():
405 dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
406 return dig[:16]
407
408def _wsse_username_token(cnonce, iso_now, password):
409 return base64.b64encode(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
410
411
412# For credentials we need two things, first
413# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
414# Then we also need a list of URIs that have already demanded authentication
415# That list is tricky since sub-URIs can take the same auth, or the
416# auth scheme may change as you descend the tree.
417# So we also need each Auth instance to be able to tell us
418# how close to the 'top' it is.
419
420class Authentication(object):
421 def __init__(self, credentials, host, request_uri, headers, response, content, http):
422 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
423 self.path = path
424 self.host = host
425 self.credentials = credentials
426 self.http = http
427
428 def depth(self, request_uri):
429 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
430 return request_uri[len(self.path):].count("/")
431
432 def inscope(self, host, request_uri):
433 # XXX Should we normalize the request_uri?
434 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
435 return (host == self.host) and path.startswith(self.path)
436
437 def request(self, method, request_uri, headers, content):
438 """Modify the request headers to add the appropriate
439 Authorization header. Over-rise this in sub-classes."""
440 pass
441
442 def response(self, response, content):
443 """Gives us a chance to update with new nonces
444 or such returned from the last authorized response.
445 Over-rise this in sub-classes if necessary.
446
447 Return TRUE is the request is to be retried, for
448 example Digest may return stale=true.
449 """
450 return False
451
452
453
454class BasicAuthentication(Authentication):
455 def __init__(self, credentials, host, request_uri, headers, response, content, http):
456 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
457
458 def request(self, method, request_uri, headers, content):
459 """Modify the request headers to add the appropriate
460 Authorization header."""
461 headers['authorization'] = 'Basic ' + base64.b64encode("%s:%s" % self.credentials).strip()
462
463
464class DigestAuthentication(Authentication):
465 """Only do qop='auth' and MD5, since that
466 is all Apache currently implements"""
467 def __init__(self, credentials, host, request_uri, headers, response, content, http):
468 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
469 challenge = _parse_www_authenticate(response, 'www-authenticate')
470 self.challenge = challenge['digest']
471 qop = self.challenge.get('qop', 'auth')
472 self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
473 if self.challenge['qop'] is None:
474 raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
475 self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()
476 if self.challenge['algorithm'] != 'MD5':
477 raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
478 self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
479 self.challenge['nc'] = 1
480
481 def request(self, method, request_uri, headers, content, cnonce = None):
482 """Modify the request headers"""
483 H = lambda x: _md5(x).hexdigest()
484 KD = lambda s, d: H("%s:%s" % (s, d))
485 A2 = "".join([method, ":", request_uri])
486 self.challenge['cnonce'] = cnonce or _cnonce()
487 request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'],
488 '%08x' % self.challenge['nc'],
489 self.challenge['cnonce'],
490 self.challenge['qop'], H(A2)
491 ))
492 headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
493 self.credentials[0],
494 self.challenge['realm'],
495 self.challenge['nonce'],
496 request_uri,
497 self.challenge['algorithm'],
498 request_digest,
499 self.challenge['qop'],
500 self.challenge['nc'],
501 self.challenge['cnonce'],
502 )
503 self.challenge['nc'] += 1
504
505 def response(self, response, content):
506 if not response.has_key('authentication-info'):
507 challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
508 if 'true' == challenge.get('stale'):
509 self.challenge['nonce'] = challenge['nonce']
510 self.challenge['nc'] = 1
511 return True
512 else:
513 updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
514
515 if updated_challenge.has_key('nextnonce'):
516 self.challenge['nonce'] = updated_challenge['nextnonce']
517 self.challenge['nc'] = 1
518 return False
519
520
521class HmacDigestAuthentication(Authentication):
522 """Adapted from Robert Sayre's code and DigestAuthentication above."""
523 __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
524
525 def __init__(self, credentials, host, request_uri, headers, response, content, http):
526 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
527 challenge = _parse_www_authenticate(response, 'www-authenticate')
528 self.challenge = challenge['hmacdigest']
529 # TODO: self.challenge['domain']
530 self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
531 if self.challenge['reason'] not in ['unauthorized', 'integrity']:
532 self.challenge['reason'] = 'unauthorized'
533 self.challenge['salt'] = self.challenge.get('salt', '')
534 if not self.challenge.get('snonce'):
535 raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))
536 self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')
537 if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
538 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
539 self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')
540 if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
541 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
542 if self.challenge['algorithm'] == 'HMAC-MD5':
543 self.hashmod = _md5
544 else:
545 self.hashmod = _sha
546 if self.challenge['pw-algorithm'] == 'MD5':
547 self.pwhashmod = _md5
548 else:
549 self.pwhashmod = _sha
550 self.key = "".join([self.credentials[0], ":",
551 self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
552 ":", self.challenge['realm']
553 ])
554 self.key = self.pwhashmod.new(self.key).hexdigest().lower()
555
556 def request(self, method, request_uri, headers, content):
557 """Modify the request headers"""
558 keys = _get_end2end_headers(headers)
559 keylist = "".join(["%s " % k for k in keys])
560 headers_val = "".join([headers[k] for k in keys])
561 created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
562 cnonce = _cnonce()
563 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
564 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
565 headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
566 self.credentials[0],
567 self.challenge['realm'],
568 self.challenge['snonce'],
569 cnonce,
570 request_uri,
571 created,
572 request_digest,
573 keylist,
574 )
575
576 def response(self, response, content):
577 challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})
578 if challenge.get('reason') in ['integrity', 'stale']:
579 return True
580 return False
581
582
583class WsseAuthentication(Authentication):
584 """This is thinly tested and should not be relied upon.
585 At this time there isn't any third party server to test against.
586 Blogger and TypePad implemented this algorithm at one point
587 but Blogger has since switched to Basic over HTTPS and
588 TypePad has implemented it wrong, by never issuing a 401
589 challenge but instead requiring your client to telepathically know that
590 their endpoint is expecting WSSE profile="UsernameToken"."""
591 def __init__(self, credentials, host, request_uri, headers, response, content, http):
592 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
593
594 def request(self, method, request_uri, headers, content):
595 """Modify the request headers to add the appropriate
596 Authorization header."""
597 headers['Authorization'] = 'WSSE profile="UsernameToken"'
598 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
599 cnonce = _cnonce()
600 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
601 headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
602 self.credentials[0],
603 password_digest,
604 cnonce,
605 iso_now)
606
607class GoogleLoginAuthentication(Authentication):
608 def __init__(self, credentials, host, request_uri, headers, response, content, http):
609 from urllib import urlencode
610 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
611 challenge = _parse_www_authenticate(response, 'www-authenticate')
612 service = challenge['googlelogin'].get('service', 'xapi')
613 # Bloggger actually returns the service in the challenge
614 # For the rest we guess based on the URI
615 if service == 'xapi' and request_uri.find("calendar") > 0:
616 service = "cl"
617 # No point in guessing Base or Spreadsheet
618 #elif request_uri.find("spreadsheets") > 0:
619 # service = "wise"
620
621 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
622 resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
623 lines = content.split('\n')
624 d = dict([tuple(line.split("=", 1)) for line in lines if line])
625 if resp.status == 403:
626 self.Auth = ""
627 else:
628 self.Auth = d['Auth']
629
630 def request(self, method, request_uri, headers, content):
631 """Modify the request headers to add the appropriate
632 Authorization header."""
633 headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
634
635
636AUTH_SCHEME_CLASSES = {
637 "basic": BasicAuthentication,
638 "wsse": WsseAuthentication,
639 "digest": DigestAuthentication,
640 "hmacdigest": HmacDigestAuthentication,
641 "googlelogin": GoogleLoginAuthentication
642}
643
644AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
645
646class FileCache(object):
647 """Uses a local directory as a store for cached files.
648 Not really safe to use if multiple threads or processes are going to
649 be running on the same cache.
650 """
651 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
652 self.cache = cache
653 self.safe = safe
654 if not os.path.exists(cache):
655 os.makedirs(self.cache)
656
657 def get(self, key):
658 retval = None
659 cacheFullPath = os.path.join(self.cache, self.safe(key))
660 try:
661 f = file(cacheFullPath, "rb")
662 retval = f.read()
663 f.close()
664 except IOError:
665 pass
666 return retval
667
668 def set(self, key, value):
669 cacheFullPath = os.path.join(self.cache, self.safe(key))
670 f = file(cacheFullPath, "wb")
671 f.write(value)
672 f.close()
673
674 def delete(self, key):
675 cacheFullPath = os.path.join(self.cache, self.safe(key))
676 if os.path.exists(cacheFullPath):
677 os.remove(cacheFullPath)
678
679class Credentials(object):
680 def __init__(self):
681 self.credentials = []
682
683 def add(self, name, password, domain=""):
684 self.credentials.append((domain.lower(), name, password))
685
686 def clear(self):
687 self.credentials = []
688
689 def iter(self, domain):
690 for (cdomain, name, password) in self.credentials:
691 if cdomain == "" or domain == cdomain:
692 yield (name, password)
693
694class KeyCerts(Credentials):
695 """Identical to Credentials except that
696 name/password are mapped to key/cert."""
697 pass
698
699
700class ProxyInfo(object):
701 """Collect information required to use a proxy."""
702 def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
703 """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
704 constants. For example:
705
706p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)
707 """
708 self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
709
710 def astuple(self):
711 return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
712 self.proxy_user, self.proxy_pass)
713
714 def isgood(self):
715 return (self.proxy_host != None) and (self.proxy_port != None)
716
717
718class HTTPConnectionWithTimeout(httplib.HTTPConnection):
Joe Gregoriof8434792011-05-03 09:11:49 -0400719 """
720 HTTPConnection subclass that supports timeouts
721
722 All timeouts are in seconds. If None is passed for timeout then
723 Python's default timeout for sockets will be used. See for example
724 the docs of socket.setdefaulttimeout():
725 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
726 """
Joe Gregorio845a5452010-09-08 13:50:34 -0400727
728 def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
729 httplib.HTTPConnection.__init__(self, host, port, strict)
730 self.timeout = timeout
731 self.proxy_info = proxy_info
732
733 def connect(self):
734 """Connect to the host and port specified in __init__."""
735 # Mostly verbatim from httplib.py.
Joe Gregorio5e3a5fa2010-10-11 13:03:56 -0400736 if self.proxy_info and socks is None:
737 raise ProxiesUnavailableError(
738 'Proxy support missing but proxy use was requested!')
Joe Gregorio845a5452010-09-08 13:50:34 -0400739 msg = "getaddrinfo returns an empty list"
740 for res in socket.getaddrinfo(self.host, self.port, 0,
741 socket.SOCK_STREAM):
742 af, socktype, proto, canonname, sa = res
743 try:
744 if self.proxy_info and self.proxy_info.isgood():
745 self.sock = socks.socksocket(af, socktype, proto)
746 self.sock.setproxy(*self.proxy_info.astuple())
747 else:
748 self.sock = socket.socket(af, socktype, proto)
749 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
750 # Different from httplib: support timeouts.
751 if has_timeout(self.timeout):
752 self.sock.settimeout(self.timeout)
753 # End of difference from httplib.
754 if self.debuglevel > 0:
755 print "connect: (%s, %s)" % (self.host, self.port)
756
757 self.sock.connect(sa)
758 except socket.error, msg:
759 if self.debuglevel > 0:
760 print 'connect fail:', (self.host, self.port)
761 if self.sock:
762 self.sock.close()
763 self.sock = None
764 continue
765 break
766 if not self.sock:
767 raise socket.error, msg
768
769class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
Joe Gregoriof8434792011-05-03 09:11:49 -0400770 """
771 This class allows communication via SSL.
Joe Gregorio845a5452010-09-08 13:50:34 -0400772
Joe Gregoriof8434792011-05-03 09:11:49 -0400773 All timeouts are in seconds. If None is passed for timeout then
774 Python's default timeout for sockets will be used. See for example
775 the docs of socket.setdefaulttimeout():
776 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
777 """
Joe Gregorio845a5452010-09-08 13:50:34 -0400778 def __init__(self, host, port=None, key_file=None, cert_file=None,
779 strict=None, timeout=None, proxy_info=None):
780 httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,
781 cert_file=cert_file, strict=strict)
782 self.timeout = timeout
783 self.proxy_info = proxy_info
784
785 def connect(self):
786 "Connect to a host on a given (SSL) port."
787
Joe Gregoriof8434792011-05-03 09:11:49 -0400788 msg = "getaddrinfo returns an empty list"
789 for family, socktype, proto, canonname, sockaddr in socket.getaddrinfo(
790 self.host, self.port, 0, socket.SOCK_STREAM):
791 try:
792 if self.proxy_info and self.proxy_info.isgood():
793 sock = socks.socksocket(family, socktype, proto)
794 sock.setproxy(*self.proxy_info.astuple())
795 else:
796 sock = socket.socket(family, socktype, proto)
797 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
Joe Gregorio845a5452010-09-08 13:50:34 -0400798
Joe Gregoriof8434792011-05-03 09:11:49 -0400799 if has_timeout(self.timeout):
800 sock.settimeout(self.timeout)
801 sock.connect((self.host, self.port))
802 self.sock =_ssl_wrap_socket(sock, self.key_file, self.cert_file)
803 if self.debuglevel > 0:
804 print "connect: (%s, %s)" % (self.host, self.port)
805 except socket.error, msg:
806 if self.debuglevel > 0:
807 print 'connect fail:', (self.host, self.port)
808 if self.sock:
809 self.sock.close()
810 self.sock = None
811 continue
812 break
813 if not self.sock:
814 raise socket.error, msg
Joe Gregorio845a5452010-09-08 13:50:34 -0400815
816
817
818class Http(object):
819 """An HTTP client that handles:
820- all methods
821- caching
822- ETags
823- compression,
824- HTTPS
825- Basic
826- Digest
827- WSSE
828
829and more.
830 """
831 def __init__(self, cache=None, timeout=None, proxy_info=None):
Joe Gregoriof8434792011-05-03 09:11:49 -0400832 """
833 The value of proxy_info is a ProxyInfo instance.
Joe Gregorio845a5452010-09-08 13:50:34 -0400834
Joe Gregoriof8434792011-05-03 09:11:49 -0400835 If 'cache' is a string then it is used as a directory name for
836 a disk cache. Otherwise it must be an object that supports the
837 same interface as FileCache.
838
839 All timeouts are in seconds. If None is passed for timeout
840 then Python's default timeout for sockets will be used. See
841 for example the docs of socket.setdefaulttimeout():
842 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
843 """
Joe Gregorio845a5452010-09-08 13:50:34 -0400844 self.proxy_info = proxy_info
845 # Map domain name to an httplib connection
846 self.connections = {}
847 # The location of the cache, for now a directory
848 # where cached responses are held.
849 if cache and isinstance(cache, str):
850 self.cache = FileCache(cache)
851 else:
852 self.cache = cache
853
854 # Name/password
855 self.credentials = Credentials()
856
857 # Key/cert
858 self.certificates = KeyCerts()
859
860 # authorization objects
861 self.authorizations = []
862
863 # If set to False then no redirects are followed, even safe ones.
864 self.follow_redirects = True
865
866 # Which HTTP methods do we apply optimistic concurrency to, i.e.
867 # which methods get an "if-match:" etag header added to them.
868 self.optimistic_concurrency_methods = ["PUT"]
869
870 # If 'follow_redirects' is True, and this is set to True then
871 # all redirecs are followed, including unsafe ones.
872 self.follow_all_redirects = False
873
874 self.ignore_etag = False
875
876 self.force_exception_to_status_code = False
877
878 self.timeout = timeout
879
880 def _auth_from_challenge(self, host, request_uri, headers, response, content):
881 """A generator that creates Authorization objects
882 that can be applied to requests.
883 """
884 challenges = _parse_www_authenticate(response, 'www-authenticate')
885 for cred in self.credentials.iter(host):
886 for scheme in AUTH_SCHEME_ORDER:
887 if challenges.has_key(scheme):
888 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
889
890 def add_credentials(self, name, password, domain=""):
891 """Add a name and password that will be used
892 any time a request requires authentication."""
893 self.credentials.add(name, password, domain)
894
895 def add_certificate(self, key, cert, domain):
896 """Add a key and cert that will be used
897 any time a request requires authentication."""
898 self.certificates.add(key, cert, domain)
899
900 def clear_credentials(self):
901 """Remove all the names and passwords
902 that are used for authentication"""
903 self.credentials.clear()
904 self.authorizations = []
905
906 def _conn_request(self, conn, request_uri, method, body, headers):
907 for i in range(2):
908 try:
909 conn.request(method, request_uri, body, headers)
Joe Gregoriof8434792011-05-03 09:11:49 -0400910 except socket.timeout:
911 raise
Joe Gregorio845a5452010-09-08 13:50:34 -0400912 except socket.gaierror:
913 conn.close()
914 raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
915 except socket.error, e:
Joe Gregoriof8434792011-05-03 09:11:49 -0400916 err = 0
917 if hasattr(e, 'args'):
918 err = getattr(e, 'args')[0]
919 else:
920 err = e.errno
921 if err == errno.ECONNREFUSED: # Connection refused
Joe Gregorio845a5452010-09-08 13:50:34 -0400922 raise
923 except httplib.HTTPException:
924 # Just because the server closed the connection doesn't apparently mean
925 # that the server didn't send a response.
Joe Gregoriof8434792011-05-03 09:11:49 -0400926 if conn.sock is None:
927 if i == 0:
928 conn.close()
929 conn.connect()
930 continue
931 else:
932 conn.close()
933 raise
934 if i == 0:
935 conn.close()
936 conn.connect()
937 continue
Joe Gregorio845a5452010-09-08 13:50:34 -0400938 pass
939 try:
940 response = conn.getresponse()
941 except (socket.error, httplib.HTTPException):
942 if i == 0:
943 conn.close()
944 conn.connect()
945 continue
946 else:
947 raise
948 else:
949 content = ""
950 if method == "HEAD":
951 response.close()
952 else:
953 content = response.read()
954 response = Response(response)
955 if method != "HEAD":
956 content = _decompressContent(response, content)
957 break
958 return (response, content)
959
960
961 def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):
962 """Do the actual request using the connection object
963 and also follow one level of redirects if necessary"""
964
965 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
966 auth = auths and sorted(auths)[0][1] or None
967 if auth:
968 auth.request(method, request_uri, headers, body)
969
970 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
971
972 if auth:
973 if auth.response(response, body):
974 auth.request(method, request_uri, headers, body)
975 (response, content) = self._conn_request(conn, request_uri, method, body, headers )
976 response._stale_digest = 1
977
978 if response.status == 401:
979 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
980 authorization.request(method, request_uri, headers, body)
981 (response, content) = self._conn_request(conn, request_uri, method, body, headers, )
982 if response.status != 401:
983 self.authorizations.append(authorization)
984 authorization.response(response, body)
985 break
986
987 if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
988 if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
989 # Pick out the location header and basically start from the beginning
990 # remembering first to strip the ETag header and decrement our 'depth'
991 if redirections:
992 if not response.has_key('location') and response.status != 300:
993 raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
994 # Fix-up relative redirects (which violate an RFC 2616 MUST)
995 if response.has_key('location'):
996 location = response['location']
997 (scheme, authority, path, query, fragment) = parse_uri(location)
998 if authority == None:
999 response['location'] = urlparse.urljoin(absolute_uri, location)
1000 if response.status == 301 and method in ["GET", "HEAD"]:
1001 response['-x-permanent-redirect-url'] = response['location']
1002 if not response.has_key('content-location'):
1003 response['content-location'] = absolute_uri
1004 _updateCache(headers, response, content, self.cache, cachekey)
1005 if headers.has_key('if-none-match'):
1006 del headers['if-none-match']
1007 if headers.has_key('if-modified-since'):
1008 del headers['if-modified-since']
1009 if response.has_key('location'):
1010 location = response['location']
1011 old_response = copy.deepcopy(response)
1012 if not old_response.has_key('content-location'):
1013 old_response['content-location'] = absolute_uri
Joe Gregoriof8434792011-05-03 09:11:49 -04001014 redirect_method = method
1015 if response.status == 303:
1016 redirect_method = "GET"
Joe Gregorio845a5452010-09-08 13:50:34 -04001017 (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
1018 response.previous = old_response
1019 else:
Joe Gregoriof8434792011-05-03 09:11:49 -04001020 raise RedirectLimit("Redirected more times than rediection_limit allows.", response, content)
Joe Gregorio845a5452010-09-08 13:50:34 -04001021 elif response.status in [200, 203] and method == "GET":
1022 # Don't cache 206's since we aren't going to handle byte range requests
1023 if not response.has_key('content-location'):
1024 response['content-location'] = absolute_uri
1025 _updateCache(headers, response, content, self.cache, cachekey)
1026
1027 return (response, content)
1028
1029 def _normalize_headers(self, headers):
1030 return _normalize_headers(headers)
1031
1032# Need to catch and rebrand some exceptions
1033# Then need to optionally turn all exceptions into status codes
1034# including all socket.* and httplib.* exceptions.
1035
1036
1037 def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
1038 """ Performs a single HTTP request.
1039The 'uri' is the URI of the HTTP resource and can begin
1040with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
1041
1042The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
1043There is no restriction on the methods allowed.
1044
1045The 'body' is the entity body to be sent with the request. It is a string
1046object.
1047
1048Any extra headers that are to be sent with the request should be provided in the
1049'headers' dictionary.
1050
1051The maximum number of redirect to follow before raising an
1052exception is 'redirections. The default is 5.
1053
1054The return value is a tuple of (response, content), the first
1055being and instance of the 'Response' class, the second being
1056a string that contains the response entity body.
1057 """
1058 try:
1059 if headers is None:
1060 headers = {}
1061 else:
1062 headers = self._normalize_headers(headers)
1063
1064 if not headers.has_key('user-agent'):
1065 headers['user-agent'] = "Python-httplib2/%s" % __version__
1066
1067 uri = iri2uri(uri)
1068
1069 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
1070 domain_port = authority.split(":")[0:2]
1071 if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':
1072 scheme = 'https'
1073 authority = domain_port[0]
1074
1075 conn_key = scheme+":"+authority
1076 if conn_key in self.connections:
1077 conn = self.connections[conn_key]
1078 else:
1079 if not connection_type:
1080 connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout
1081 certs = list(self.certificates.iter(authority))
1082 if scheme == 'https' and certs:
1083 conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0],
1084 cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info)
1085 else:
1086 conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info)
1087 conn.set_debuglevel(debuglevel)
1088
Joe Gregoriof8434792011-05-03 09:11:49 -04001089 if 'range' not in headers and 'accept-encoding' not in headers:
Joe Gregorio845a5452010-09-08 13:50:34 -04001090 headers['accept-encoding'] = 'gzip, deflate'
1091
1092 info = email.Message.Message()
1093 cached_value = None
1094 if self.cache:
1095 cachekey = defrag_uri
1096 cached_value = self.cache.get(cachekey)
1097 if cached_value:
1098 # info = email.message_from_string(cached_value)
1099 #
1100 # Need to replace the line above with the kludge below
1101 # to fix the non-existent bug not fixed in this
1102 # bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html
1103 try:
1104 info, content = cached_value.split('\r\n\r\n', 1)
1105 feedparser = email.FeedParser.FeedParser()
1106 feedparser.feed(info)
1107 info = feedparser.close()
1108 feedparser._parse = None
1109 except IndexError:
1110 self.cache.delete(cachekey)
1111 cachekey = None
1112 cached_value = None
1113 else:
1114 cachekey = None
1115
1116 if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
1117 # http://www.w3.org/1999/04/Editing/
1118 headers['if-match'] = info['etag']
1119
1120 if method not in ["GET", "HEAD"] and self.cache and cachekey:
1121 # RFC 2616 Section 13.10
1122 self.cache.delete(cachekey)
1123
1124 # Check the vary header in the cache to see if this request
1125 # matches what varies in the cache.
1126 if method in ['GET', 'HEAD'] and 'vary' in info:
1127 vary = info['vary']
1128 vary_headers = vary.lower().replace(' ', '').split(',')
1129 for header in vary_headers:
1130 key = '-varied-%s' % header
1131 value = info[key]
1132 if headers.get(header, None) != value:
1133 cached_value = None
1134 break
1135
1136 if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
1137 if info.has_key('-x-permanent-redirect-url'):
1138 # Should cached permanent redirects be counted in our redirection count? For now, yes.
Joe Gregoriof8434792011-05-03 09:11:49 -04001139 if redirections <= 0:
1140 raise RedirectLimit("Redirected more times than rediection_limit allows.", {}, "")
Joe Gregorio845a5452010-09-08 13:50:34 -04001141 (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
1142 response.previous = Response(info)
1143 response.previous.fromcache = True
1144 else:
1145 # Determine our course of action:
1146 # Is the cached entry fresh or stale?
1147 # Has the client requested a non-cached response?
1148 #
1149 # There seems to be three possible answers:
1150 # 1. [FRESH] Return the cache entry w/o doing a GET
1151 # 2. [STALE] Do the GET (but add in cache validators if available)
1152 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
1153 entry_disposition = _entry_disposition(info, headers)
1154
1155 if entry_disposition == "FRESH":
1156 if not cached_value:
1157 info['status'] = '504'
1158 content = ""
1159 response = Response(info)
1160 if cached_value:
1161 response.fromcache = True
1162 return (response, content)
1163
1164 if entry_disposition == "STALE":
1165 if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
1166 headers['if-none-match'] = info['etag']
1167 if info.has_key('last-modified') and not 'last-modified' in headers:
1168 headers['if-modified-since'] = info['last-modified']
1169 elif entry_disposition == "TRANSPARENT":
1170 pass
1171
1172 (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1173
1174 if response.status == 304 and method == "GET":
1175 # Rewrite the cache entry with the new end-to-end headers
1176 # Take all headers that are in response
1177 # and overwrite their values in info.
1178 # unless they are hop-by-hop, or are listed in the connection header.
1179
1180 for key in _get_end2end_headers(response):
1181 info[key] = response[key]
1182 merged_response = Response(info)
1183 if hasattr(response, "_stale_digest"):
1184 merged_response._stale_digest = response._stale_digest
1185 _updateCache(headers, merged_response, content, self.cache, cachekey)
1186 response = merged_response
1187 response.status = 200
1188 response.fromcache = True
1189
1190 elif response.status == 200:
1191 content = new_content
1192 else:
1193 self.cache.delete(cachekey)
1194 content = new_content
1195 else:
1196 cc = _parse_cache_control(headers)
1197 if cc.has_key('only-if-cached'):
1198 info['status'] = '504'
1199 response = Response(info)
1200 content = ""
1201 else:
1202 (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1203 except Exception, e:
1204 if self.force_exception_to_status_code:
1205 if isinstance(e, HttpLib2ErrorWithResponse):
1206 response = e.response
1207 content = e.content
1208 response.status = 500
1209 response.reason = str(e)
1210 elif isinstance(e, socket.timeout):
1211 content = "Request Timeout"
1212 response = Response( {
1213 "content-type": "text/plain",
1214 "status": "408",
1215 "content-length": len(content)
1216 })
1217 response.reason = "Request Timeout"
1218 else:
1219 content = str(e)
1220 response = Response( {
1221 "content-type": "text/plain",
1222 "status": "400",
1223 "content-length": len(content)
1224 })
1225 response.reason = "Bad Request"
1226 else:
1227 raise
1228
1229
1230 return (response, content)
1231
1232
1233
1234class Response(dict):
1235 """An object more like email.Message than httplib.HTTPResponse."""
1236
1237 """Is this response from our local cache"""
1238 fromcache = False
1239
1240 """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """
1241 version = 11
1242
1243 "Status code returned by server. "
1244 status = 200
1245
1246 """Reason phrase returned by server."""
1247 reason = "Ok"
1248
1249 previous = None
1250
1251 def __init__(self, info):
1252 # info is either an email.Message or
1253 # an httplib.HTTPResponse object.
1254 if isinstance(info, httplib.HTTPResponse):
1255 for key, value in info.getheaders():
1256 self[key.lower()] = value
1257 self.status = info.status
1258 self['status'] = str(self.status)
1259 self.reason = info.reason
1260 self.version = info.version
1261 elif isinstance(info, email.Message.Message):
1262 for key, value in info.items():
1263 self[key] = value
1264 self.status = int(self['status'])
1265 else:
1266 for key, value in info.iteritems():
1267 self[key] = value
1268 self.status = int(self.get('status', self.status))
1269
1270
1271 def __getattr__(self, name):
1272 if name == 'dict':
1273 return self
1274 else:
1275 raise AttributeError, name