| \section{\module{urllib2} --- |
| extensible library for opening URLs} |
| |
| \declaremodule{standard}{urllib2} |
| |
| \moduleauthor{Jeremy Hylton}{jhylton@users.sourceforge.net} |
| \sectionauthor{Moshe Zadka}{moshez@users.sourceforge.net} |
| |
| \modulesynopsis{An extensible library for opening URLs using a variety of |
| protocols} |
| |
| The \module{urllib2} module defines functions and classes which help |
| in opening URLs (mostly HTTP) in a complex world -- basic and digest |
| authentication, redirections and more. |
| |
| The \module{urllib2} module defines the following functions: |
| |
| \begin{funcdesc}{urlopen}{url\optional{, data}} |
| Open the url \var{url}, which can either a string or a \class{Request} |
| object (currently the code checks that it really is a \class{Request} |
| instance, or an instance of a subclass of \class{Request}. |
| |
| \var{data} should be a string, which specifies additional data to |
| send to the server. In HTTP requests, which are the only ones that |
| support \var{data}, it should be a buffer in the format of |
| \code{application/x-www-form-urlencoded}, for example one returned |
| from \function{urllib.urlencode}. |
| |
| This function returns a file-like object with two additional methods: |
| |
| \begin{itemize} |
| |
| \item \code{geturl()} --- return the URL of the resource retrieved |
| \item \code{info()} --- return the meta-information of the page, as |
| a dictionary-like object |
| \end{itemize} |
| |
| Raises \exception{URLError} on errors. |
| \end{funcdesc} |
| |
| \begin{funcdesc}{install_opener}{opener} |
| Install a \class{OpenerDirector} instance as the default opener. |
| The code does not check for a real \class{OpenerDirector}, and any |
| class with the appropriate interface will work. |
| \end{funcdesc} |
| |
| \begin{funcdesc}{build_opener}{\optional{handler\optional{, |
| handler\optional{, ...}}}} |
| Return an \class{OpenerDirector} instance, which chains the |
| handlers in the order given. \var{handler}s can be either instances |
| of \class{BaseHandler}, or subclasses of \class{BaseHandler} (in |
| which case it must be possible to call the constructor without |
| any parameters. Instances of the following classes will be in |
| the front of the \var{handler}s, unless the \var{handler}s contain |
| them, instances of them or subclasses of them: |
| |
| \code{ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, |
| HTTPRedirectHandler, FTPHandler, FileHandler} |
| |
| If the Python installation has SSL support (\code{socket.ssl} exists), |
| \class{HTTPSHandler} will also be added. |
| \end{funcdesc} |
| |
| \begin{excdesc}{URLError} |
| The error handlers raise when they run into a problem. It is a subclass |
| of \exception{IOError}. |
| \end{excdesc} |
| |
| \begin{excdesc}{HTTPError} |
| A subclass of \exception{URLError}, it can also function as a |
| non-exceptional file-like return value (the same thing that \function{urlopen} |
| returns). This is useful when handling exotic HTTP errors, such as |
| requests for authentications. |
| \end{excdesc} |
| |
| \begin{excdesc}{GopherError} |
| A subclass of \exception{URLError}, this is the error raised by the |
| Gopher handler. |
| \end{excdesc} |
| |
| \begin{classdesc}{Request}{url\optional{data, \optional{, headers}}} |
| This class is an abstraction of a URL request. |
| |
| \var{url} should be a string which is a valid URL. For descrtion |
| of \var{data} see the \method{add_data} description. |
| \var{headers} should be a dictionary, and will be treated as if |
| \method{add_header} was called with each key and value as arguments. |
| \end{classdesc} |
| |
| The following methods describe all of \class{Request}'s public interface, |
| and so all must be overridden in subclasses. |
| |
| \begin{methoddesc}[Request]{add_data}{data} |
| Set the \class{Request} data to \var{data} is ignored |
| by all handlers except HTTP handlers --- and there it should be an |
| \code{application/x-www-form-encoded} buffer, and will change the |
| request to be \code{POST} rather then \code{GET}. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[Request]{has_data}{data} |
| Return whether the instance has a non-\code{None} data. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[Request]{get_data}{data} |
| Return the instance's data. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[Request]{add_header}{key, val} |
| Add another header to the request. Headers |
| are currently ignored by all handlers except HTTP handlers, where they |
| are added to the list of headers sent to the server. Note that there |
| cannot be more then one header with the same name, and later calls |
| will overwrite previous calls in case the \var{key} collides. Currently, |
| this is no loss of HTTP functionality, since all headers which have meaning |
| when used more then once have a (header-specific) way of gaining the |
| same functionality using only one header. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[Request]{get_full_url}{} |
| Return the URL given in the constructor. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[Request]{get_type}{} |
| Return the type of the URL --- also known as the schema. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[Request]{get_host}{} |
| Return the host to which connection will be made. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[Request]{get_selector}{} |
| Return the selector --- the part of the URL that is sent to |
| the server. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[Request]{set_proxy}{host, type} |
| Make the request by connecting to a proxy server. The \var{host} and \var{type} |
| will replace those of the instance, and the instance's selector will be |
| the original URL given in the constructor. |
| \end{methoddesc} |
| |
| \begin{classdesc}{OpenerDirector}{} |
| The \class{OpenerDirector} class opens URLs via \class{BaseHandler}s chained |
| together. It manages the chaining of handlers, and recovery from errors. |
| \end{classdesc} |
| |
| \begin{methoddesc}[OpenerDirector]{add_handler}{handler} |
| \var{handler} should be an instance of \class{BaseHandler}. The following |
| methods are searched, and added to the possible chains. |
| |
| \begin{itemize} |
| \item \code{{\em protocol}_open} --- signal that the handler knows how |
| to open {\em protocol} URLs. |
| \item \code{{\em protocol}_error_{\em type}} -- signal that the handler |
| knows how to handle {\em type} |
| errors from {\em protocol}. |
| \end{itemize} |
| |
| \end{methoddesc} |
| |
| \begin{methoddesc}[OpenerDirector]{close}{} |
| Explicitly break cycles, and delete all the handlers. |
| Because the \class{OpenerDirector} needs to know the registered handlers, |
| and a handler needs to know who the \class{OpenerDirector} who called |
| it is, there is a reference cycles. Even though recent versions of Python |
| have cycle-collection, it is sometimes preferable to explicitly break |
| the cycles. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[OpenerDirector]{open}{url\optional{, data}} |
| Open the given \var{url}. (which can be a request object or a string), |
| optionally passing the given \var{data}. |
| Arguments, return values and exceptions raised are the same as those |
| of \function{urlopen} (which simply calls the \method{open()} method |
| on the default installed \class{OpenerDirector}. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[OpenerDirector]{error}{proto\optional{, arg\optional{, ...}}} |
| Handle an error in a given protocol. The HTTP protocol is special cased to |
| use the code as the error. This will call the registered error handlers |
| for the given protocol with the given arguments (which are protocol specific). |
| |
| Return values and exceptions raised are the same as those |
| of \function{urlopen}. |
| \end{methoddesc} |
| |
| \begin{classdesc}{BaseHandler}{} |
| This is the base class for all registered handlers --- and handles only |
| the simple mechanics of registration. |
| \end{classdesc} |
| |
| \begin{methoddesc}[BaseHandler]{add_parent}{director} |
| Add a director as parent. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[BaseHandler]{close}{} |
| Remove any parents. |
| \end{methoddesc} |
| |
| The following members and methods should be used only be classes derived |
| from \class{BaseHandler}: |
| |
| \begin{memberdesc}[BaseHandler]{parent} |
| A valid \class{OpenerDirector}, which can be used to open using a different |
| protocol, or handle errors. |
| \end{memberdesc} |
| |
| \begin{methoddesc}[BaseHandler]{default_open}{req} |
| This method is {\em not} defined in \class{BaseHandler}, but subclasses |
| should define it if they want to catch all URLs. |
| |
| This method, if exists, will be called by the \member{parent} |
| \class{OpenerDirector}. It should return a file-like object as described |
| in the return value of the \method{open} of \class{OpenerDirector} or |
| \code{None}. It should raise \exception{URLError}, unless a truly exceptional |
| thing happens (for example, \exception{MemoryError} should not be mapped |
| to \exception{URLError}. |
| |
| This method will be called before any protocol-specific open method. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[BaseHandler]{{\em protocol}_open}{req} |
| This method is {\em not} defined in \class{BaseHandler}, but subclasses |
| should define it if they want to handle URLs with the given protocol. |
| |
| This method, if exists, will be called by the \member{parent} |
| \class{OpenerDirector}. Return values should be the same as for |
| \method{default_open}. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[BaseHandler]{unknown_open}{req} |
| This method is {\em not} defined in \class{BaseHandler}, but subclasses |
| should define it if they want to catch all URLs with no specific |
| registerd handler to open it. |
| |
| This method, if exists, will be called by the \member{parent} |
| \class{OpenerDirector}. Return values should be the same as for |
| \method{default_open}. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[BaseHandler]{http_error_default}{req, fp, code, msg, hdrs} |
| This method is {\em not} defined in \class{BaseHandler}, but subclasses |
| should override it if they intend to provide a catch-all for otherwise |
| unhandled HTTP errors. It will be called automatically by the |
| \class{OpenerDirector} getting the error, and should not normally be called |
| in other circumstances. |
| |
| \var{req} will be a \class{Request} object, \var{fp} will be a file-like |
| object with the HTTP error body, \var{code} will be the three-digit code |
| of the error, \var{msg} will be the user-visible explanation of the |
| code and \var{hdrs} will be a dictionary-like object with the headers of |
| the error. |
| |
| Return values and exceptions raised should be the same as those |
| of \function{urlopen}. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[BaseHandler]{http_error_{\em nnn}}{req, fp, code, msg, hdrs} |
| \code{nnn} should be a three-digit HTTP error code. This method is also |
| not defined in \class{BaseHandler}, but will be called, if it exists, on |
| an instance of a subclass, when an HTTP error with code \code{nnn} occurse. |
| |
| Subclasses should override this method to handle specific HTTP errors. |
| |
| Arguments, return values and exceptions raised shoudl be the same as for |
| \method{http_error_default} |
| \end{methoddesc} |
| |
| |
| \begin{classdesc}{HTTPDefaultErrorHandler}{} |
| A class which catches all HTTP errors. |
| \end{classdesc} |
| |
| \begin{methoddesc}[HTTPDefaultErrorHandler]{http_error_default}{req, fp, code, |
| msg, hdrs} |
| Raise an \exception{HTTPError} |
| \end{methoddesc} |
| |
| \begin{classdesc}{HTTPRedirectHandler}{} |
| A class to handle redirections. |
| \end{classdesc} |
| |
| \begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req, fp, code, |
| msg, hdrs} |
| Redirect to the \code{Location:} URL. This method gets called by |
| the parent \class{OpenerDirector} when getting an HTTP permanent-redirect |
| error. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req, fp, code, |
| msg, hdrs} |
| The same as \method{http_error_301}. |
| \end{methoddesc} |
| |
| \strong{Note:} 303 redirection is not supported by this version of |
| \module{urllib2}. |
| |
| \begin{classdesc}{ProxyHandler}{\optional{proxies}} |
| Cause requests to go through a proxy. |
| If \var{proxies} is given, it must be a dictionary mapping |
| protocol names to URLs of proxies. |
| The default is to read the list of proxies from the environment |
| variables \code{{\em protocol}_proxy}. |
| \end{classdesc} |
| |
| \begin{methoddesc}[ProxyHandler]{{\em protocol}_open}{request} |
| The \class{ProxyHandler} will have a method \code{{\em protocol}_open} for |
| every {\em protocol} which has a proxy in the \var{proxies} dictionary |
| given in the constructor. The method will modify requests to go |
| through the proxy, by calling \code{request.set_proxy()}, and call the next |
| handler in the chain to actually execute the protocol. |
| \end{methoddesc} |
| |
| \begin{classdesc}{HTTPPasswordMgr}{} |
| Keep a database of |
| \code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})} mapping. |
| \end{classdesc} |
| |
| \begin{methoddesc}[HTTPPasswordMgr]{add_password}{realm, uri, user, passwd} |
| \var{uri} can be either a single URI, or a sequene of URIs. \var{realm}, |
| \var{user} and \var{passwd} must be strings. This causes |
| \code{(\var{user}, \var{passwd})} to be used as authentication tokens |
| when authentication for \var{realm} and a super-URI of any of the |
| given URIs is given. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[HTTPPasswordMgr]{find_user_password}{realm, authuri} |
| Get user/password for given realm and URI, if any. This method will |
| return \code{(None, None)} if there is no user/password is known. |
| \end{methoddesc} |
| |
| \begin{classdesc}{HTTPPasswordMgrWithDefaultRealm}{} |
| Keep a database of |
| \code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})} mapping. |
| A realm of \code{None} is considered a catch-all realm, which is searched |
| if no other realm fits. |
| \end{classdesc} |
| |
| \begin{methoddesc}[HTTPPasswordMgrWithDefaultRealm]{add_password} |
| {realm, uri, user, passwd} |
| \var{uri} can be either a single URI, or a sequene of URIs. \var{realm}, |
| \var{user} and \var{passwd} must be strings. This causes |
| \code{(\var{user}, \var{passwd})} to be used as authentication tokens |
| when authentication for \var{realm} and a super-URI of any of the |
| given URIs is given. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[HTTPPasswordMgr]{find_user_password}{realm, authuri} |
| Get user/password for given realm and URI, if any. This method will |
| return \code{(None, None)} if there is no user/password is known. |
| If the given \var{realm} has no user/password, the realm \code{None} |
| will be searched. |
| \end{methoddesc} |
| |
| \begin{classdesc}[AbstractBasicAuthHandler]{\optional{password_mgr}} |
| This is a mixin class, that helps with HTTP authentication, both |
| to the remote host and to a proxy. |
| |
| \var{password_mgr} should be something that is compatible with |
| \class{HTTPPasswordMgr} --- supplies the documented interface above. |
| \end{classdesc} |
| |
| \begin{methoddesc}[AbstractBasicAuthHandler]{handle_authentication_request} |
| {authreq, host, req, headers} |
| Handle an authentication request by getting user/password pair, and retrying. |
| \var{authreq} should be the name of the header where the information about |
| the realm, \var{host} is the host to authenticate too, \var{req} should be the |
| (failed) \class{Request} object, and \var{headers} should be the error headers. |
| \end{methoddesc} |
| |
| \begin{classdesc}{HTTPBasicAuthHandler}{\optional{password_mgr}} |
| Handle authentication with the remote host. |
| Valid \var{password_mgr}, if given, are the same as for |
| \class{AbstractBasicAuthHandler}. |
| \end{classdesc} |
| |
| \begin{methoddesc}[HTTPBasicAuthHandler]{http_error_401}{req, fp, code, |
| msg, hdrs} |
| Retry the request with authentication info, if available. |
| \end{methoddesc} |
| |
| \begin{classdesc}{ProxyBasicAuthHandler}{\optional{password_mgr}} |
| Handle authentication with the proxy. |
| Valid \var{password_mgr}, if given, are the same as for |
| \class{AbstractBasicAuthHandler}. |
| \end{classdesc} |
| |
| \begin{methoddesc}[ProxyBasicAuthHandler]{http_error_407}{req, fp, code, |
| msg, hdrs} |
| Retry the request with authentication info, if available. |
| \end{methoddesc} |
| |
| \begin{classdesc}{AbstractDigestAuthHandler}{\optional{password_mgr}} |
| This is a mixin class, that helps with HTTP authentication, both |
| to the remote host and to a proxy. |
| |
| \var{password_mgr} should be something that is compatible with |
| \class{HTTPPasswordMgr} --- supplies the documented interface above. |
| \end{classdesc} |
| |
| \begin{methoddesc}[AbstractBasicAuthHandler]{handle_authentication_request} |
| {authreq, host, req, headers} |
| \var{authreq} should be the name of the header where the information about |
| the realm, \var{host} should be the host to authenticate too, \var{req} |
| should be the (failed) \class{Request} object, and \var{headers} should be the |
| error headers. |
| \end{methoddesc} |
| |
| \begin{classdesc}{HTTPDigestAuthHandler}{\optional{password_mgr}} |
| Handle authentication with the remote host. |
| Valid \var{password_mgr}, if given, are the same as for |
| \class{AbstractBasicAuthHandler}. |
| \end{classdesc} |
| |
| \begin{methoddesc}[HTTPDigestAuthHandler]{http_error_401}{req, fp, code, |
| msg, hdrs} |
| Retry the request with authentication info, if available. |
| \end{methoddesc} |
| |
| \begin{classdesc}{ProxyDigestAuthHandler}{\optional{password_mgr}} |
| Handle authentication with the proxy. |
| \var{password_mgr}, if given, shoudl be the same as for |
| the constructor of \class{AbstractDigestAuthHandler}. |
| \end{classdesc} |
| |
| \begin{methoddesc}[ProxyDigestAuthHandler]{http_error_407}{req, fp, code, |
| msg, hdrs} |
| Retry the request with authentication info, if available. |
| \end{methoddesc} |
| |
| \begin{classdesc}{HTTPHandler}{} |
| A class to handle opening of HTTP URLs |
| \end{classdesc} |
| |
| \begin{methoddesc}[HTTPHandler]{http_open}{req} |
| Send an HTTP request (either GET or POST, depending on whether |
| \code{req.has_data()}. |
| \end{methoddesc} |
| |
| \begin{classdesc}{HTTPSHandler}{} |
| A class to handle opening of HTTPS URLs |
| \end{classdesc} |
| |
| \begin{methoddesc}[HTTPSHandler]{https_open}{req} |
| Send an HTTPS request (either GET or POST, depending on whether |
| \code{req.has_data()}. |
| \end{methoddesc} |
| |
| \begin{classdesc}{UknownHandler}{} |
| A catch-all class to handle unknown URLs. |
| \end{classdesc} |
| |
| \begin{methoddesc}[UknownHandler]{unknown_open} |
| Raise a \exception{URLError} exception |
| \end{methoddesc} |
| |
| \begin{classdesc}{FileHandler}{} |
| Open local files. |
| \end{classdesc} |
| |
| \begin{methoddesc}[FileHandler]{file_open}{req} |
| Open the file locally, if there is no host name, or |
| the host name is \code{"localhost"}. Change the |
| protocol to \code{ftp} otherwise, and retry opening |
| it using \member{parent}. |
| \end{methoddesc} |
| |
| \begin{classdesc}{FTPHandler}{} |
| Open FTP URLs. |
| \end{classdesc} |
| |
| \begin{methoddesc}[FTPHandler]{ftp_open}{req} |
| Open the FTP file indicated by \var{req}. |
| The login is always done with empty username and password. |
| \end{methoddesc} |
| |
| \begin{classdesc}{CacheFTPHandler}{} |
| Open FTP URLs, keeping a cache of open FTP connections to minimize |
| delays. |
| \end{classdesc} |
| |
| \begin{methoddesc}[CacheFTPHandler]{ftp_open}{req} |
| Open the FTP file indicated by \var{req}. |
| The login is always done with empty username and password. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[CacheFTPHandler]{setTimeout}{t} |
| Set timeout of connections to \var{t} seconds. |
| \end{methoddesc} |
| |
| \begin{methoddesc}[CacheFTPHandler]{setMaxConns}{m} |
| Set maximum number of cached connections to \var{m}. |
| \end{methoddesc} |
| |
| \begin{classdesc}{GopherHandler}{} |
| Open gopher URLs. |
| \end{classdesc} |
| |
| \begin{methoddesc}[GopherHandler]{gopher_open}{req} |
| Open the gopher resource indicated by \var{req}. |
| \end{methoddesc} |