Simplify the Request class. The basic components of the parsed Request are now available as public attributes, e.g. full_url and host. The accessor methods are deprecated. The implementation replace the complicated __getattr__ machinery with a _parse() method. The response from an HTTP request is now an HTTPResponse instance instead of an addinfourl() wrapper instance. The wrapper provided minimal extract functionality and was undocumented. The API of addinfourl() was preserved, except for close hooks, by adding a few methods and public attributes to the HTTPResponse class.

commit: 6c5e28c383bf587f80d01e52f887801be200200d [log] [tgz]
author: Jeremy Hylton <jeremy@alum.mit.edu> Tue Mar 31 14:35:53 2009 +0000
committer: Jeremy Hylton <jeremy@alum.mit.edu> Tue Mar 31 14:35:53 2009 +0000
tree: 6f8485b2ea4820facd7049320142e40028658494
parent: 16caab00a259c1a01f8e47abdd321c42b1b6e554 [diff] [blame]
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index c789ffc..3776536 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py

@@ -1,6 +1,3 @@
-# Issues in merging urllib and urllib2:
-# 1. They both define a function named urlopen()
-
 """An extensible library for opening URLs using a variety of protocols
 
 The simplest way to use this module is to call the urlopen function,
@@ -83,6 +80,7 @@
 # abstract factory for opener
 
 import base64
+import bisect
 import email
 import hashlib
 import http.client
@@ -94,7 +92,6 @@
 import socket
 import sys
 import time
-import bisect
 
 from urllib.error import URLError, HTTPError, ContentTooShortError
 from urllib.parse import (
@@ -149,7 +146,7 @@
     comparison.
 
     """
-    url = request.get_full_url()
+    url = request.full_url
     host = urlparse(url)[1]
     if host == "":
         host = request.get_header("Host", "")
@@ -163,11 +160,7 @@
     def __init__(self, url, data=None, headers={},
                  origin_req_host=None, unverifiable=False):
         # unwrap('<URL:type://host/path>') --> 'type://host/path'
-        self.__original = unwrap(url)
-        self.type = None
-        # self.__r_type is what's left after doing the splittype
-        self.host = None
-        self.port = None
+        self.full_url = unwrap(url)
         self.data = data
         self.headers = {}
         for key, value in headers.items():
@@ -177,26 +170,23 @@
             origin_req_host = request_host(self)
         self.origin_req_host = origin_req_host
         self.unverifiable = unverifiable
+        self._parse()
 
-    def __getattr__(self, attr):
-        # XXX this is a fallback mechanism to guard against these
-        # methods getting called in a non-standard order.  this may be
-        # too complicated and/or unnecessary.
-        # XXX should the __r_XXX attributes be public?
-        if attr[:12] == '_Request__r_':
-            name = attr[12:]
-            if hasattr(Request, 'get_' + name):
-                getattr(self, 'get_' + name)()
-                return getattr(self, attr)
-        raise AttributeError(attr)
+    def _parse(self):
+        self.type, rest = splittype(self.full_url)
+        if self.type is None:
+            raise ValueError("unknown url type: %s" % self.full_url)
+        self.host, self.selector = splithost(rest)
+        if self.host:
+            self.host = unquote(self.host)
 
     def get_method(self):
-        if self.has_data():
+        if self.data is not None:
             return "POST"
         else:
             return "GET"
 
-    # XXX these helper methods are lame
+    # Begin deprecated methods
 
     def add_data(self, data):
         self.data = data
@@ -208,37 +198,31 @@
         return self.data
 
     def get_full_url(self):
-        return self.__original
+        return self.full_url
 
     def get_type(self):
-        if self.type is None:
-            self.type, self.__r_type = splittype(self.__original)
-            if self.type is None:
-                raise ValueError("unknown url type: %s" % self.__original)
         return self.type
 
     def get_host(self):
-        if self.host is None:
-            self.host, self.__r_host = splithost(self.__r_type)
-            if self.host:
-                self.host = unquote(self.host)
         return self.host
 
     def get_selector(self):
-        return self.__r_host
+        return self.selector
 
-    def set_proxy(self, host, type):
-        self.host, self.type = host, type
-        self.__r_host = self.__original
-
-    def has_proxy(self):
-        return self.__r_host == self.__original
+    def is_unverifiable(self):
+        return self.unverifiable
 
     def get_origin_req_host(self):
         return self.origin_req_host
 
-    def is_unverifiable(self):
-        return self.unverifiable
+    # End deprecated methods
+
+    def set_proxy(self, host, type):
+        self.host, self.type = host, type
+        self.selector = self.full_url
+
+    def has_proxy(self):
+        return self.selector == self.full_url
 
     def add_header(self, key, val):
         # useful for something like authentication
@@ -344,10 +328,10 @@
         else:
             req = fullurl
             if data is not None:
-                req.add_data(data)
+                req.data = data
 
         req.timeout = timeout
-        protocol = req.get_type()
+        protocol = req.type
 
         # pre-process request
         meth_name = protocol+"_request"
@@ -371,7 +355,7 @@
         if result:
             return result
 
-        protocol = req.get_type()
+        protocol = req.type
         result = self._call_chain(self.handle_open, protocol, protocol +
                                   '_open', req)
         if result:
@@ -481,7 +465,7 @@
 
 class HTTPDefaultErrorHandler(BaseHandler):
     def http_error_default(self, req, fp, code, msg, hdrs):
-        raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
+        raise HTTPError(req.full_url, code, msg, hdrs, fp)
 
 class HTTPRedirectHandler(BaseHandler):
     # maximum number of redirections to any single URL
@@ -504,7 +488,7 @@
         m = req.get_method()
         if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
             or code in (301, 302, 303) and m == "POST")):
-            raise HTTPError(req.get_full_url(), code, msg, headers, fp)
+            raise HTTPError(req.full_url, code, msg, headers, fp)
 
         # Strictly (according to RFC 2616), 301 or 302 in response to
         # a POST MUST NOT cause a redirection without confirmation
@@ -518,7 +502,7 @@
                           if k.lower() not in CONTENT_HEADERS)
         return Request(newurl,
                        headers=newheaders,
-                       origin_req_host=req.get_origin_req_host(),
+                       origin_req_host=req.origin_req_host,
                        unverifiable=True)
 
     # Implementation note: To avoid the server sending us into an
@@ -542,7 +526,7 @@
             urlparts[2] = "/"
         newurl = urlunparse(urlparts)
 
-        newurl = urljoin(req.get_full_url(), newurl)
+        newurl = urljoin(req.full_url, newurl)
 
         # XXX Probably want to forget about the state of the current
         # request, although that might interact poorly with other
@@ -557,7 +541,7 @@
             visited = new.redirect_dict = req.redirect_dict
             if (visited.get(newurl, 0) >= self.max_repeats or
                 len(visited) >= self.max_redirections):
-                raise HTTPError(req.get_full_url(), code,
+                raise HTTPError(req.full_url, code,
                                 self.inf_msg + msg, headers, fp)
         else:
             visited = new.redirect_dict = req.redirect_dict = {}
@@ -664,7 +648,7 @@
                     meth(r, proxy, type))
 
     def proxy_open(self, req, proxy, type):
-        orig_type = req.get_type()
+        orig_type = req.type
         proxy_type, user, password, hostport = _parse_proxy(proxy)
         if proxy_type is None:
             proxy_type = orig_type
@@ -811,7 +795,7 @@
     auth_header = 'Authorization'
 
     def http_error_401(self, req, fp, code, msg, headers):
-        url = req.get_full_url()
+        url = req.full_url
         return self.http_error_auth_reqed('www-authenticate',
                                           url, req, headers)
 
@@ -825,7 +809,7 @@
         # authority.  Assume there isn't one, since urllib.request does not (and
         # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
         # userinfo.
-        authority = req.get_host()
+        authority = req.host
         return self.http_error_auth_reqed('proxy-authenticate',
                                           authority, req, headers)
 
@@ -864,7 +848,7 @@
             # prompting for the information. Crap. This isn't great
             # but it's better than the current 'repeat until recursion
             # depth exceeded' approach <wink>
-            raise HTTPError(req.get_full_url(), 401, "digest auth failed",
+            raise HTTPError(req.full_url, 401, "digest auth failed",
                             headers, None)
         else:
             self.retried += 1
@@ -912,20 +896,20 @@
         if H is None:
             return None
 
-        user, pw = self.passwd.find_user_password(realm, req.get_full_url())
+        user, pw = self.passwd.find_user_password(realm, req.full_url)
         if user is None:
             return None
 
         # XXX not implemented yet
-        if req.has_data():
-            entdig = self.get_entity_digest(req.get_data(), chal)
+        if req.data is not None:
+            entdig = self.get_entity_digest(req.data, chal)
         else:
             entdig = None
 
         A1 = "%s:%s:%s" % (user, realm, pw)
         A2 = "%s:%s" % (req.get_method(),
                         # XXX selector: what about proxies and full urls
-                        req.get_selector())
+                        req.selector)
         if qop == 'auth':
             self.nonce_count += 1
             ncvalue = '%08x' % self.nonce_count
@@ -941,7 +925,7 @@
         # XXX should the partial digests be encoded too?
 
         base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
-               'response="%s"' % (user, realm, nonce, req.get_selector(),
+               'response="%s"' % (user, realm, nonce, req.selector,
                                   respdig)
         if opaque:
             base += ', opaque="%s"' % opaque
@@ -978,7 +962,7 @@
     handler_order = 490  # before Basic auth
 
     def http_error_401(self, req, fp, code, msg, headers):
-        host = urlparse(req.get_full_url())[1]
+        host = urlparse(req.full_url)[1]
         retry = self.http_error_auth_reqed('www-authenticate',
                                            host, req, headers)
         self.reset_retry_count()
@@ -991,7 +975,7 @@
     handler_order = 490  # before Basic auth
 
     def http_error_407(self, req, fp, code, msg, headers):
-        host = req.get_host()
+        host = req.host
         retry = self.http_error_auth_reqed('proxy-authenticate',
                                            host, req, headers)
         self.reset_retry_count()
@@ -1006,12 +990,12 @@
         self._debuglevel = level
 
     def do_request_(self, request):
-        host = request.get_host()
+        host = request.host
         if not host:
             raise URLError('no host given')
 
-        if request.has_data():  # POST
-            data = request.get_data()
+        if request.data is not None:  # POST
+            data = request.data
             if not request.has_header('Content-type'):
                 request.add_unredirected_header(
                     'Content-type',
@@ -1022,7 +1006,7 @@
 
         sel_host = host
         if request.has_proxy():
-            scheme, sel = splittype(request.get_selector())
+            scheme, sel = splittype(request.selector)
             sel_host, sel_path = splithost(sel)
         if not request.has_header('Host'):
             request.add_unredirected_header('Host', sel_host)
@@ -1034,16 +1018,11 @@
         return request
 
     def do_open(self, http_class, req):
-        """Return an addinfourl object for the request, using http_class.
+        """Return an HTTPResponse object for the request, using http_class.
 
         http_class must implement the HTTPConnection API from http.client.
-        The addinfourl return value is a file-like object.  It also
-        has methods and attributes including:
-            - info(): return a email Message object for the headers
-            - geturl(): return the original request URL
-            - code: HTTP status code
         """
-        host = req.get_host()
+        host = req.host
         if not host:
             raise URLError('no host given')
 
@@ -1061,19 +1040,21 @@
         # So make sure the connection gets closed after the (only)
         # request.
         headers["Connection"] = "close"
-        headers = dict(
-            (name.title(), val) for name, val in headers.items())
+        headers = dict((name.title(), val) for name, val in headers.items())
         try:
-            h.request(req.get_method(), req.get_selector(), req.data, headers)
-            r = h.getresponse()
-        except socket.error as err: # XXX what error?
+            h.request(req.get_method(), req.selector, req.data, headers)
+            r = h.getresponse()  # an HTTPResponse instance
+        except socket.error as err:
             raise URLError(err)
 
-##        resp = addinfourl(r.fp, r.msg, req.get_full_url())
-        resp = addinfourl(r, r.msg, req.get_full_url())
-        resp.code = r.status
-        resp.msg = r.reason
-        return resp
+        r.url = req.full_url
+        # This line replaces the .msg attribute of the HTTPResponse
+        # with .headers, because urllib clients expect the response to
+        # have the reason in .msg.  It would be good to mark this
+        # attribute is deprecated and get then to use info() or
+        # .headers.
+        r.msg = r.reason
+        return r
 
 
 class HTTPHandler(AbstractHTTPHandler):
@@ -1111,7 +1092,7 @@
 
 class UnknownHandler(BaseHandler):
     def unknown_open(self, req):
-        type = req.get_type()
+        type = req.type
         raise URLError('unknown url type: %s' % type)
 
 def parse_keqv_list(l):
@@ -1170,7 +1151,7 @@
 class FileHandler(BaseHandler):
     # Use local file or FTP depending on form of URL
     def file_open(self, req):
-        url = req.get_selector()
+        url = req.selector
         if url[:2] == '//' and url[2:3] != '/':
             req.type = 'ftp'
             return self.parent.open(req)
@@ -1192,8 +1173,8 @@
     def open_local_file(self, req):
         import email.utils
         import mimetypes
-        host = req.get_host()
-        file = req.get_selector()
+        host = req.host
+        file = req.selector
         localfile = url2pathname(file)
         try:
             stats = os.stat(localfile)
@@ -1223,7 +1204,7 @@
     def ftp_open(self, req):
         import ftplib
         import mimetypes
-        host = req.get_host()
+        host = req.host
         if not host:
             raise URLError('ftp error: no host given')
         host, port = splitport(host)
@@ -1246,7 +1227,7 @@
             host = socket.gethostbyname(host)
         except socket.error as msg:
             raise URLError(msg)
-        path, attrs = splitattr(req.get_selector())
+        path, attrs = splitattr(req.selector)
         dirs = path.split('/')
         dirs = list(map(unquote, dirs))
         dirs, file = dirs[:-1], dirs[-1]
@@ -1262,13 +1243,13 @@
                     type = value.upper()
             fp, retrlen = fw.retrfile(file, type)
             headers = ""
-            mtype = mimetypes.guess_type(req.get_full_url())[0]
+            mtype = mimetypes.guess_type(req.full_url)[0]
             if mtype:
                 headers += "Content-type: %s\n" % mtype
             if retrlen is not None and retrlen >= 0:
                 headers += "Content-length: %d\n" % retrlen
             headers = email.message_from_string(headers)
-            return addinfourl(fp, headers, req.get_full_url())
+            return addinfourl(fp, headers, req.full_url)
         except ftplib.all_errors as msg:
             exc = URLError('ftp error: %s' % msg)
             raise exc.with_traceback(sys.exc_info()[2])
@@ -1581,9 +1562,9 @@
         else:
             auth = None
         http_conn = connection_factory(host)
-        # XXX We should fix urllib so that it works with HTTP/1.1.
-        http_conn._http_vsn = 10
-        http_conn._http_vsn_str = "HTTP/1.0"
+##        # XXX We should fix urllib so that it works with HTTP/1.1.
+##        http_conn._http_vsn = 10
+##        http_conn._http_vsn_str = "HTTP/1.0"
 
         headers = {}
         if proxy_auth:
commit	6c5e28c383bf587f80d01e52f887801be200200d	[log] [tgz]
author	Jeremy Hylton <jeremy@alum.mit.edu>	Tue Mar 31 14:35:53 2009 +0000
committer	Jeremy Hylton <jeremy@alum.mit.edu>	Tue Mar 31 14:35:53 2009 +0000
tree	6f8485b2ea4820facd7049320142e40028658494
parent	16caab00a259c1a01f8e47abdd321c42b1b6e554 [diff] [blame]