blob: 73b77bda81e78d967b7c0b98a9b65c5e405d07ae [file] [log] [blame]
Moshe Zadka8a18e992001-03-01 08:40:42 +00001\section{\module{urllib2} ---
2 extensible library for opening URLs}
3
4\declaremodule{standard}{urllib2}
Moshe Zadka8a18e992001-03-01 08:40:42 +00005\moduleauthor{Jeremy Hylton}{jhylton@users.sourceforge.net}
6\sectionauthor{Moshe Zadka}{moshez@users.sourceforge.net}
7
8\modulesynopsis{An extensible library for opening URLs using a variety of
9 protocols}
10
11The \module{urllib2} module defines functions and classes which help
Fred Drake93c86712001-03-02 20:39:34 +000012in opening URLs (mostly HTTP) in a complex world --- basic and digest
Moshe Zadka8a18e992001-03-01 08:40:42 +000013authentication, redirections and more.
14
15The \module{urllib2} module defines the following functions:
16
17\begin{funcdesc}{urlopen}{url\optional{, data}}
Fred Drake399bc8c2001-11-09 03:49:29 +000018Open the URL \var{url}, which can be either a string or a \class{Request}
Moshe Zadka8a18e992001-03-01 08:40:42 +000019object (currently the code checks that it really is a \class{Request}
Fred Drake93c86712001-03-02 20:39:34 +000020instance, or an instance of a subclass of \class{Request}).
Moshe Zadka8a18e992001-03-01 08:40:42 +000021
22\var{data} should be a string, which specifies additional data to
23send to the server. In HTTP requests, which are the only ones that
24support \var{data}, it should be a buffer in the format of
Fred Drake93c86712001-03-02 20:39:34 +000025\mimetype{application/x-www-form-urlencoded}, for example one returned
26from \function{urllib.urlencode()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +000027
28This function returns a file-like object with two additional methods:
29
30\begin{itemize}
Fred Drake93c86712001-03-02 20:39:34 +000031 \item \method{geturl()} --- return the URL of the resource retrieved
32 \item \method{info()} --- return the meta-information of the page, as
33 a dictionary-like object
Moshe Zadka8a18e992001-03-01 08:40:42 +000034\end{itemize}
35
36Raises \exception{URLError} on errors.
37\end{funcdesc}
38
39\begin{funcdesc}{install_opener}{opener}
Fred Drake399bc8c2001-11-09 03:49:29 +000040Install an \class{OpenerDirector} instance as the default opener.
Moshe Zadka8a18e992001-03-01 08:40:42 +000041The code does not check for a real \class{OpenerDirector}, and any
42class with the appropriate interface will work.
43\end{funcdesc}
44
Fred Drake93c86712001-03-02 20:39:34 +000045\begin{funcdesc}{build_opener}{\optional{handler, \moreargs}}
Moshe Zadka8a18e992001-03-01 08:40:42 +000046Return an \class{OpenerDirector} instance, which chains the
47handlers in the order given. \var{handler}s can be either instances
48of \class{BaseHandler}, or subclasses of \class{BaseHandler} (in
49which case it must be possible to call the constructor without
Fred Drake399bc8c2001-11-09 03:49:29 +000050any parameters). Instances of the following classes will be in
51front of the \var{handler}s, unless the \var{handler}s contain
Moshe Zadka8a18e992001-03-01 08:40:42 +000052them, instances of them or subclasses of them:
53
54\code{ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler,
55 HTTPRedirectHandler, FTPHandler, FileHandler}
56
Fred Drake93c86712001-03-02 20:39:34 +000057If the Python installation has SSL support (\function{socket.ssl()}
58exists), \class{HTTPSHandler} will also be added.
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +000059
60Beginning in Python 2.3, a \class{BaseHandler} subclass may also change its
61\var{handler_order} member variable to modify its position in the handlers
62list. Besides \class{ProxyHandler}, which has \var{handler_order} of
63\code{100}, all handlers currently have it set to \code{500}.
Moshe Zadka8a18e992001-03-01 08:40:42 +000064\end{funcdesc}
65
Fred Drake93c86712001-03-02 20:39:34 +000066
67The following exceptions are raised as appropriate:
68
Moshe Zadka8a18e992001-03-01 08:40:42 +000069\begin{excdesc}{URLError}
Fred Drake399bc8c2001-11-09 03:49:29 +000070The handlers raise this exception (or derived exceptions) when they
71run into a problem. It is a subclass of \exception{IOError}.
Moshe Zadka8a18e992001-03-01 08:40:42 +000072\end{excdesc}
73
74\begin{excdesc}{HTTPError}
75A subclass of \exception{URLError}, it can also function as a
Fred Drake93c86712001-03-02 20:39:34 +000076non-exceptional file-like return value (the same thing that
77\function{urlopen()} returns). This is useful when handling exotic
78HTTP errors, such as requests for authentication.
Moshe Zadka8a18e992001-03-01 08:40:42 +000079\end{excdesc}
80
81\begin{excdesc}{GopherError}
82A subclass of \exception{URLError}, this is the error raised by the
83Gopher handler.
84\end{excdesc}
85
Fred Drake93c86712001-03-02 20:39:34 +000086
87The following classes are provided:
88
89\begin{classdesc}{Request}{url\optional{, data\optional{, headers}}}
Moshe Zadka8a18e992001-03-01 08:40:42 +000090This class is an abstraction of a URL request.
91
Fred Drake399bc8c2001-11-09 03:49:29 +000092\var{url} should be a string which is a valid URL. For a description
Fred Drake93c86712001-03-02 20:39:34 +000093of \var{data} see the \method{add_data()} description.
Moshe Zadka8a18e992001-03-01 08:40:42 +000094\var{headers} should be a dictionary, and will be treated as if
Fred Drake93c86712001-03-02 20:39:34 +000095\method{add_header()} was called with each key and value as arguments.
Moshe Zadka8a18e992001-03-01 08:40:42 +000096\end{classdesc}
97
Fred Drake93c86712001-03-02 20:39:34 +000098\begin{classdesc}{OpenerDirector}{}
99The \class{OpenerDirector} class opens URLs via \class{BaseHandler}s
100chained together. It manages the chaining of handlers, and recovery
101from errors.
102\end{classdesc}
103
104\begin{classdesc}{BaseHandler}{}
105This is the base class for all registered handlers --- and handles only
106the simple mechanics of registration.
107\end{classdesc}
108
109\begin{classdesc}{HTTPDefaultErrorHandler}{}
110A class which defines a default handler for HTTP error responses; all
111responses are turned into \exception{HTTPError} exceptions.
112\end{classdesc}
113
114\begin{classdesc}{HTTPRedirectHandler}{}
115A class to handle redirections.
116\end{classdesc}
117
118\begin{classdesc}{ProxyHandler}{\optional{proxies}}
119Cause requests to go through a proxy.
120If \var{proxies} is given, it must be a dictionary mapping
121protocol names to URLs of proxies.
122The default is to read the list of proxies from the environment
Fred Drake47852462001-05-11 15:46:45 +0000123variables \var{protocol}_proxy.
Fred Drake93c86712001-03-02 20:39:34 +0000124\end{classdesc}
125
126\begin{classdesc}{HTTPPasswordMgr}{}
127Keep a database of
128\code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})}
129mappings.
130\end{classdesc}
131
132\begin{classdesc}{HTTPPasswordMgrWithDefaultRealm}{}
133Keep a database of
134\code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})} mappings.
135A realm of \code{None} is considered a catch-all realm, which is searched
136if no other realm fits.
137\end{classdesc}
138
139\begin{classdesc}{AbstractBasicAuthHandler}{\optional{password_mgr}}
140This is a mixin class that helps with HTTP authentication, both
141to the remote host and to a proxy.
Fred Drake399bc8c2001-11-09 03:49:29 +0000142\var{password_mgr}, if given, should be something that is compatible
143with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
144for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000145\end{classdesc}
146
147\begin{classdesc}{HTTPBasicAuthHandler}{\optional{password_mgr}}
148Handle authentication with the remote host.
Fred Drake399bc8c2001-11-09 03:49:29 +0000149\var{password_mgr}, if given, should be something that is compatible
150with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
151for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000152\end{classdesc}
153
154\begin{classdesc}{ProxyBasicAuthHandler}{\optional{password_mgr}}
155Handle authentication with the proxy.
Fred Drake399bc8c2001-11-09 03:49:29 +0000156\var{password_mgr}, if given, should be something that is compatible
157with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
158for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000159\end{classdesc}
160
161\begin{classdesc}{AbstractDigestAuthHandler}{\optional{password_mgr}}
Fred Drake399bc8c2001-11-09 03:49:29 +0000162This is a mixin class that helps with HTTP authentication, both
Fred Drake93c86712001-03-02 20:39:34 +0000163to the remote host and to a proxy.
Fred Drake399bc8c2001-11-09 03:49:29 +0000164\var{password_mgr}, if given, should be something that is compatible
165with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
166for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000167\end{classdesc}
168
169\begin{classdesc}{HTTPDigestAuthHandler}{\optional{password_mgr}}
170Handle authentication with the remote host.
Fred Drake399bc8c2001-11-09 03:49:29 +0000171\var{password_mgr}, if given, should be something that is compatible
172with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
173for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000174\end{classdesc}
175
176\begin{classdesc}{ProxyDigestAuthHandler}{\optional{password_mgr}}
177Handle authentication with the proxy.
Fred Drake399bc8c2001-11-09 03:49:29 +0000178\var{password_mgr}, if given, should be something that is compatible
179with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
180for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000181\end{classdesc}
182
183\begin{classdesc}{HTTPHandler}{}
184A class to handle opening of HTTP URLs.
185\end{classdesc}
186
187\begin{classdesc}{HTTPSHandler}{}
188A class to handle opening of HTTPS URLs.
189\end{classdesc}
190
191\begin{classdesc}{FileHandler}{}
192Open local files.
193\end{classdesc}
194
195\begin{classdesc}{FTPHandler}{}
196Open FTP URLs.
197\end{classdesc}
198
199\begin{classdesc}{CacheFTPHandler}{}
200Open FTP URLs, keeping a cache of open FTP connections to minimize
201delays.
202\end{classdesc}
203
204\begin{classdesc}{GopherHandler}{}
205Open gopher URLs.
206\end{classdesc}
207
208\begin{classdesc}{UnknownHandler}{}
209A catch-all class to handle unknown URLs.
210\end{classdesc}
211
212
213\subsection{Request Objects \label{request-objects}}
214
Moshe Zadka8a18e992001-03-01 08:40:42 +0000215The following methods describe all of \class{Request}'s public interface,
216and so all must be overridden in subclasses.
217
218\begin{methoddesc}[Request]{add_data}{data}
Fred Drake399bc8c2001-11-09 03:49:29 +0000219Set the \class{Request} data to \var{data}. This is ignored
Moshe Zadka8a18e992001-03-01 08:40:42 +0000220by all handlers except HTTP handlers --- and there it should be an
Fred Drake93c86712001-03-02 20:39:34 +0000221\mimetype{application/x-www-form-encoded} buffer, and will change the
Fred Drake399bc8c2001-11-09 03:49:29 +0000222request to be \code{POST} rather than \code{GET}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000223\end{methoddesc}
224
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000225\begin{methoddesc}[Request]{get_method}{}
226Return a string indicating the HTTP request method. This is only
227meaningful for HTTP requests, and currently always takes one of the
228values ("GET", "POST").
229\end{methoddesc}
230
Fred Drake399bc8c2001-11-09 03:49:29 +0000231\begin{methoddesc}[Request]{has_data}{}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000232Return whether the instance has a non-\code{None} data.
233\end{methoddesc}
234
Fred Drake399bc8c2001-11-09 03:49:29 +0000235\begin{methoddesc}[Request]{get_data}{}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000236Return the instance's data.
237\end{methoddesc}
238
239\begin{methoddesc}[Request]{add_header}{key, val}
Fred Drake93c86712001-03-02 20:39:34 +0000240Add another header to the request. Headers are currently ignored by
241all handlers except HTTP handlers, where they are added to the list
Fred Drake399bc8c2001-11-09 03:49:29 +0000242of headers sent to the server. Note that there cannot be more than
Fred Drake93c86712001-03-02 20:39:34 +0000243one header with the same name, and later calls will overwrite
244previous calls in case the \var{key} collides. Currently, this is
245no loss of HTTP functionality, since all headers which have meaning
Fred Drake399bc8c2001-11-09 03:49:29 +0000246when used more than once have a (header-specific) way of gaining the
Moshe Zadka8a18e992001-03-01 08:40:42 +0000247same functionality using only one header.
248\end{methoddesc}
249
250\begin{methoddesc}[Request]{get_full_url}{}
251Return the URL given in the constructor.
252\end{methoddesc}
253
254\begin{methoddesc}[Request]{get_type}{}
Fred Drake93c86712001-03-02 20:39:34 +0000255Return the type of the URL --- also known as the scheme.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000256\end{methoddesc}
257
258\begin{methoddesc}[Request]{get_host}{}
Fred Drake399bc8c2001-11-09 03:49:29 +0000259Return the host to which a connection will be made.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000260\end{methoddesc}
261
262\begin{methoddesc}[Request]{get_selector}{}
263Return the selector --- the part of the URL that is sent to
264the server.
265\end{methoddesc}
266
267\begin{methoddesc}[Request]{set_proxy}{host, type}
Fred Drake399bc8c2001-11-09 03:49:29 +0000268Prepare the request by connecting to a proxy server. The \var{host}
269and \var{type} will replace those of the instance, and the instance's
Fred Drake93c86712001-03-02 20:39:34 +0000270selector will be the original URL given in the constructor.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000271\end{methoddesc}
272
Fred Drake93c86712001-03-02 20:39:34 +0000273
274\subsection{OpenerDirector Objects \label{opener-director-objects}}
275
276\class{OpenerDirector} instances have the following methods:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000277
278\begin{methoddesc}[OpenerDirector]{add_handler}{handler}
Fred Drake93c86712001-03-02 20:39:34 +0000279\var{handler} should be an instance of \class{BaseHandler}. The
280following methods are searched, and added to the possible chains.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000281
282\begin{itemize}
Fred Drake93c86712001-03-02 20:39:34 +0000283 \item \method{\var{protocol}_open()} ---
284 signal that the handler knows how to open \var{protocol} URLs.
285 \item \method{\var{protocol}_error_\var{type}()} ---
286 signal that the handler knows how to handle \var{type} errors from
287 \var{protocol}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000288\end{itemize}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000289\end{methoddesc}
290
291\begin{methoddesc}[OpenerDirector]{close}{}
292Explicitly break cycles, and delete all the handlers.
293Because the \class{OpenerDirector} needs to know the registered handlers,
294and a handler needs to know who the \class{OpenerDirector} who called
Fred Drake399bc8c2001-11-09 03:49:29 +0000295it is, there is a reference cycle. Even though recent versions of Python
Moshe Zadka8a18e992001-03-01 08:40:42 +0000296have cycle-collection, it is sometimes preferable to explicitly break
297the cycles.
298\end{methoddesc}
299
300\begin{methoddesc}[OpenerDirector]{open}{url\optional{, data}}
Fred Drake399bc8c2001-11-09 03:49:29 +0000301Open the given \var{url} (which can be a request object or a string),
Moshe Zadka8a18e992001-03-01 08:40:42 +0000302optionally passing the given \var{data}.
303Arguments, return values and exceptions raised are the same as those
Fred Drake93c86712001-03-02 20:39:34 +0000304of \function{urlopen()} (which simply calls the \method{open()} method
Raymond Hettinger0dfd7a92003-05-10 07:40:56 +0000305on the default installed \class{OpenerDirector}).
Moshe Zadka8a18e992001-03-01 08:40:42 +0000306\end{methoddesc}
307
Fred Drake93c86712001-03-02 20:39:34 +0000308\begin{methoddesc}[OpenerDirector]{error}{proto\optional{,
309 arg\optional{, \moreargs}}}
Fred Drake399bc8c2001-11-09 03:49:29 +0000310Handle an error in a given protocol. This will call the registered
311error handlers for the given protocol with the given arguments (which
312are protocol specific). The HTTP protocol is a special case which
313uses the HTTP response code to determine the specific error handler;
314refer to the \method{http_error_*()} methods of the handler classes.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000315
316Return values and exceptions raised are the same as those
Fred Drake93c86712001-03-02 20:39:34 +0000317of \function{urlopen()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000318\end{methoddesc}
319
Fred Drake93c86712001-03-02 20:39:34 +0000320
321\subsection{BaseHandler Objects \label{base-handler-objects}}
322
323\class{BaseHandler} objects provide a couple of methods that are
324directly useful, and others that are meant to be used by derived
325classes. These are intended for direct use:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000326
327\begin{methoddesc}[BaseHandler]{add_parent}{director}
328Add a director as parent.
329\end{methoddesc}
330
331\begin{methoddesc}[BaseHandler]{close}{}
332Remove any parents.
333\end{methoddesc}
334
Fred Drake399bc8c2001-11-09 03:49:29 +0000335The following members and methods should only be used by classes
Fred Drake93c86712001-03-02 20:39:34 +0000336derived from \class{BaseHandler}:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000337
338\begin{memberdesc}[BaseHandler]{parent}
Fred Drake93c86712001-03-02 20:39:34 +0000339A valid \class{OpenerDirector}, which can be used to open using a
340different protocol, or handle errors.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000341\end{memberdesc}
342
343\begin{methoddesc}[BaseHandler]{default_open}{req}
Fred Drake93c86712001-03-02 20:39:34 +0000344This method is \emph{not} defined in \class{BaseHandler}, but
345subclasses should define it if they want to catch all URLs.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000346
Fred Drake399bc8c2001-11-09 03:49:29 +0000347This method, if implemented, will be called by the parent
Fred Drake93c86712001-03-02 20:39:34 +0000348\class{OpenerDirector}. It should return a file-like object as
349described in the return value of the \method{open()} of
Fred Drake399bc8c2001-11-09 03:49:29 +0000350\class{OpenerDirector}, or \code{None}. It should raise
Fred Drake93c86712001-03-02 20:39:34 +0000351\exception{URLError}, unless a truly exceptional thing happens (for
352example, \exception{MemoryError} should not be mapped to
Fred Drake399bc8c2001-11-09 03:49:29 +0000353\exception{URLError}).
Moshe Zadka8a18e992001-03-01 08:40:42 +0000354
355This method will be called before any protocol-specific open method.
356\end{methoddesc}
357
Fred Drake47852462001-05-11 15:46:45 +0000358\begin{methoddescni}[BaseHandler]{\var{protocol}_open}{req}
Fred Drake93c86712001-03-02 20:39:34 +0000359This method is \emph{not} defined in \class{BaseHandler}, but
360subclasses should define it if they want to handle URLs with the given
361protocol.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000362
Fred Drake399bc8c2001-11-09 03:49:29 +0000363This method, if defined, will be called by the parent
Fred Drake93c86712001-03-02 20:39:34 +0000364\class{OpenerDirector}. Return values should be the same as for
365\method{default_open()}.
Fred Drake47852462001-05-11 15:46:45 +0000366\end{methoddescni}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000367
368\begin{methoddesc}[BaseHandler]{unknown_open}{req}
Fred Drake93c86712001-03-02 20:39:34 +0000369This method is \var{not} defined in \class{BaseHandler}, but
370subclasses should define it if they want to catch all URLs with no
Fred Drake399bc8c2001-11-09 03:49:29 +0000371specific registered handler to open it.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000372
Fred Drake399bc8c2001-11-09 03:49:29 +0000373This method, if implemented, will be called by the \member{parent}
Fred Drake93c86712001-03-02 20:39:34 +0000374\class{OpenerDirector}. Return values should be the same as for
375\method{default_open()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000376\end{methoddesc}
377
378\begin{methoddesc}[BaseHandler]{http_error_default}{req, fp, code, msg, hdrs}
Fred Drake93c86712001-03-02 20:39:34 +0000379This method is \emph{not} defined in \class{BaseHandler}, but
380subclasses should override it if they intend to provide a catch-all
381for otherwise unhandled HTTP errors. It will be called automatically
382by the \class{OpenerDirector} getting the error, and should not
383normally be called in other circumstances.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000384
Fred Drake93c86712001-03-02 20:39:34 +0000385\var{req} will be a \class{Request} object, \var{fp} will be a
386file-like object with the HTTP error body, \var{code} will be the
387three-digit code of the error, \var{msg} will be the user-visible
388explanation of the code and \var{hdrs} will be a mapping object with
389the headers of the error.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000390
391Return values and exceptions raised should be the same as those
Fred Drake93c86712001-03-02 20:39:34 +0000392of \function{urlopen()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000393\end{methoddesc}
394
Fred Drake93c86712001-03-02 20:39:34 +0000395\begin{methoddesc}[BaseHandler]{http_error_\var{nnn}}{req, fp, code, msg, hdrs}
396\var{nnn} should be a three-digit HTTP error code. This method is
397also not defined in \class{BaseHandler}, but will be called, if it
398exists, on an instance of a subclass, when an HTTP error with code
399\var{nnn} occurs.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000400
Fred Drake93c86712001-03-02 20:39:34 +0000401Subclasses should override this method to handle specific HTTP
402errors.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000403
Fred Drake93c86712001-03-02 20:39:34 +0000404Arguments, return values and exceptions raised should be the same as
405for \method{http_error_default()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000406\end{methoddesc}
407
Fred Drake93c86712001-03-02 20:39:34 +0000408\subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000409
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000410\note{Some HTTP redirections require action from this module's client
411 code. If this is the case, \exception{HTTPError} is raised. See
412 \rfc{2616} for details of the precise meanings of the various
413 redirection codes.}
414
415\begin{methoddesc}[HTTPRedirectHandler]{redirect_request}{req,
416 fp, code, msg, hdrs}
417Return a \class{Request} or \code{None} in response to a redirect.
418This is called by the default implementations of the
419\code{http_error_30x()} methods when a redirection is received from
420the server. If a redirection should take place, return a new
421\class{Request} to allow \code{http_error_30x()} to perform the
422redirect. Otherwise, raise \exception{HTTPError} if no other
423\class{Handler} should try to handle this URL, or return \code{None}
424if you can't but another \class{Handler} might.
425
426\note{The default implementation of this method does not strictly
Martin v. Löwis162f0812003-07-12 07:33:32 +0000427 follow \rfc{2616}, which says that 301 and 302 responses to POST
428 requests must not be automatically redirected without confirmation by
429 the user. In reality, browsers do allow automatic redirection of
430 these responses, changing the POST to a GET, and the default
431 implementation reproduces this behaviour.}
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000432
433\end{methoddesc}
434
Moshe Zadka8a18e992001-03-01 08:40:42 +0000435
Fred Drake93c86712001-03-02 20:39:34 +0000436\begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req,
437 fp, code, msg, hdrs}
438Redirect to the \code{Location:} URL. This method is called by
439the parent \class{OpenerDirector} when getting an HTTP
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000440`moved permanently' response.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000441\end{methoddesc}
442
Fred Drake93c86712001-03-02 20:39:34 +0000443\begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req,
444 fp, code, msg, hdrs}
445The same as \method{http_error_301()}, but called for the
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000446`found' response.
Fred Drake93c86712001-03-02 20:39:34 +0000447\end{methoddesc}
448
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000449\begin{methoddesc}[HTTPRedirectHandler]{http_error_303}{req,
450 fp, code, msg, hdrs}
451The same as \method{http_error_301()}, but called for the
Martin v. Löwis162f0812003-07-12 07:33:32 +0000452`see other' response.
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000453\end{methoddesc}
Fred Drake93c86712001-03-02 20:39:34 +0000454
Martin v. Löwis162f0812003-07-12 07:33:32 +0000455\begin{methoddesc}[HTTPRedirectHandler]{http_error_307}{req,
456 fp, code, msg, hdrs}
457The same as \method{http_error_301()}, but called for the
458`temporary redirect' response.
Fred Drake9753ae12003-07-14 20:53:57 +0000459\end{methoddesc}
460
Martin v. Löwis162f0812003-07-12 07:33:32 +0000461
Fred Drake93c86712001-03-02 20:39:34 +0000462\subsection{ProxyHandler Objects \label{proxy-handler}}
463
Fred Drake47852462001-05-11 15:46:45 +0000464\begin{methoddescni}[ProxyHandler]{\var{protocol}_open}{request}
Fred Drake93c86712001-03-02 20:39:34 +0000465The \class{ProxyHandler} will have a method
466\method{\var{protocol}_open()} for every \var{protocol} which has a
467proxy in the \var{proxies} dictionary given in the constructor. The
468method will modify requests to go through the proxy, by calling
469\code{request.set_proxy()}, and call the next handler in the chain to
470actually execute the protocol.
Fred Drake47852462001-05-11 15:46:45 +0000471\end{methoddescni}
Fred Drake93c86712001-03-02 20:39:34 +0000472
473
474\subsection{HTTPPasswordMgr Objects \label{http-password-mgr}}
475
476These methods are available on \class{HTTPPasswordMgr} and
477\class{HTTPPasswordMgrWithDefaultRealm} objects.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000478
479\begin{methoddesc}[HTTPPasswordMgr]{add_password}{realm, uri, user, passwd}
480\var{uri} can be either a single URI, or a sequene of URIs. \var{realm},
481\var{user} and \var{passwd} must be strings. This causes
Fred Drake93c86712001-03-02 20:39:34 +0000482\code{(\var{user}, \var{passwd})} to be used as authentication tokens
Moshe Zadka8a18e992001-03-01 08:40:42 +0000483when authentication for \var{realm} and a super-URI of any of the
484given URIs is given.
485\end{methoddesc}
486
487\begin{methoddesc}[HTTPPasswordMgr]{find_user_password}{realm, authuri}
Fred Drake93c86712001-03-02 20:39:34 +0000488Get user/password for given realm and URI, if any. This method will
489return \code{(None, None)} if there is no matching user/password.
490
491For \class{HTTPPasswordMgrWithDefaultRealm} objects, the realm
492\code{None} will be searched if the given \var{realm} has no matching
493user/password.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000494\end{methoddesc}
495
Moshe Zadka8a18e992001-03-01 08:40:42 +0000496
Fred Drake93c86712001-03-02 20:39:34 +0000497\subsection{AbstractBasicAuthHandler Objects
498 \label{abstract-basic-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000499
500\begin{methoddesc}[AbstractBasicAuthHandler]{handle_authentication_request}
501 {authreq, host, req, headers}
Fred Drake399bc8c2001-11-09 03:49:29 +0000502Handle an authentication request by getting a user/password pair, and
503re-trying the request. \var{authreq} should be the name of the header
504where the information about the realm is included in the request,
505\var{host} is the host to authenticate to, \var{req} should be the
506(failed) \class{Request} object, and \var{headers} should be the error
507headers.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000508\end{methoddesc}
509
Fred Drake93c86712001-03-02 20:39:34 +0000510
511\subsection{HTTPBasicAuthHandler Objects
512 \label{http-basic-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000513
514\begin{methoddesc}[HTTPBasicAuthHandler]{http_error_401}{req, fp, code,
515 msg, hdrs}
Fred Drake399bc8c2001-11-09 03:49:29 +0000516Retry the request with authentication information, if available.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000517\end{methoddesc}
518
Fred Drake93c86712001-03-02 20:39:34 +0000519
520\subsection{ProxyBasicAuthHandler Objects
521 \label{proxy-basic-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000522
523\begin{methoddesc}[ProxyBasicAuthHandler]{http_error_407}{req, fp, code,
524 msg, hdrs}
Fred Drake399bc8c2001-11-09 03:49:29 +0000525Retry the request with authentication information, if available.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000526\end{methoddesc}
527
Moshe Zadka8a18e992001-03-01 08:40:42 +0000528
Fred Drake93c86712001-03-02 20:39:34 +0000529\subsection{AbstractDigestAuthHandler Objects
530 \label{abstract-digest-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000531
Fred Drake93c86712001-03-02 20:39:34 +0000532\begin{methoddesc}[AbstractDigestAuthHandler]{handle_authentication_request}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000533 {authreq, host, req, headers}
534\var{authreq} should be the name of the header where the information about
Fred Drake399bc8c2001-11-09 03:49:29 +0000535the realm is included in the request, \var{host} should be the host to
536authenticate to, \var{req} should be the (failed) \class{Request}
537object, and \var{headers} should be the error headers.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000538\end{methoddesc}
539
Fred Drake93c86712001-03-02 20:39:34 +0000540
541\subsection{HTTPDigestAuthHandler Objects
542 \label{http-digest-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000543
544\begin{methoddesc}[HTTPDigestAuthHandler]{http_error_401}{req, fp, code,
545 msg, hdrs}
Fred Drake399bc8c2001-11-09 03:49:29 +0000546Retry the request with authentication information, if available.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000547\end{methoddesc}
548
Fred Drake93c86712001-03-02 20:39:34 +0000549
550\subsection{ProxyDigestAuthHandler Objects
551 \label{proxy-digest-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000552
553\begin{methoddesc}[ProxyDigestAuthHandler]{http_error_407}{req, fp, code,
554 msg, hdrs}
Fred Drake93c86712001-03-02 20:39:34 +0000555Retry the request with authentication information, if available.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000556\end{methoddesc}
557
Fred Drake93c86712001-03-02 20:39:34 +0000558
559\subsection{HTTPHandler Objects \label{http-handler-objects}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000560
561\begin{methoddesc}[HTTPHandler]{http_open}{req}
Fred Drake399bc8c2001-11-09 03:49:29 +0000562Send an HTTP request, which can be either GET or POST, depending on
Fred Drake93c86712001-03-02 20:39:34 +0000563\code{\var{req}.has_data()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000564\end{methoddesc}
565
Fred Drake93c86712001-03-02 20:39:34 +0000566
567\subsection{HTTPSHandler Objects \label{https-handler-objects}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000568
569\begin{methoddesc}[HTTPSHandler]{https_open}{req}
Fred Drake93c86712001-03-02 20:39:34 +0000570Send an HTTPS request, which can be either GET or POST, depending on
571\code{\var{req}.has_data()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000572\end{methoddesc}
573
Moshe Zadka8a18e992001-03-01 08:40:42 +0000574
Fred Drake93c86712001-03-02 20:39:34 +0000575\subsection{FileHandler Objects \label{file-handler-objects}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000576
577\begin{methoddesc}[FileHandler]{file_open}{req}
578Open the file locally, if there is no host name, or
Fred Drake93c86712001-03-02 20:39:34 +0000579the host name is \code{'localhost'}. Change the
Moshe Zadka8a18e992001-03-01 08:40:42 +0000580protocol to \code{ftp} otherwise, and retry opening
581it using \member{parent}.
582\end{methoddesc}
583
Fred Drake93c86712001-03-02 20:39:34 +0000584
585\subsection{FTPHandler Objects \label{ftp-handler-objects}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000586
587\begin{methoddesc}[FTPHandler]{ftp_open}{req}
588Open the FTP file indicated by \var{req}.
589The login is always done with empty username and password.
590\end{methoddesc}
591
Moshe Zadka8a18e992001-03-01 08:40:42 +0000592
Fred Drake93c86712001-03-02 20:39:34 +0000593\subsection{CacheFTPHandler Objects \label{cacheftp-handler-objects}}
594
595\class{CacheFTPHandler} objects are \class{FTPHandler} objects with
596the following additional methods:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000597
598\begin{methoddesc}[CacheFTPHandler]{setTimeout}{t}
599Set timeout of connections to \var{t} seconds.
600\end{methoddesc}
601
602\begin{methoddesc}[CacheFTPHandler]{setMaxConns}{m}
603Set maximum number of cached connections to \var{m}.
604\end{methoddesc}
605
Fred Drake93c86712001-03-02 20:39:34 +0000606
607\subsection{GopherHandler Objects \label{gopher-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000608
609\begin{methoddesc}[GopherHandler]{gopher_open}{req}
610Open the gopher resource indicated by \var{req}.
611\end{methoddesc}
Fred Drake93c86712001-03-02 20:39:34 +0000612
613
614\subsection{UnknownHandler Objects \label{unknown-handler-objects}}
615
Fred Drakea9399112001-07-05 21:14:03 +0000616\begin{methoddesc}[UnknownHandler]{unknown_open}{}
Fred Drake93c86712001-03-02 20:39:34 +0000617Raise a \exception{URLError} exception.
618\end{methoddesc}
Fred Drake53e5b712003-04-25 15:27:33 +0000619
620
621\subsection{Examples \label{urllib2-examples}}
622
623This example gets the python.org main page and displays the first 100
624bytes of it:
625
626\begin{verbatim}
627>>> import urllib2
628>>> f = urllib2.urlopen('http://www.python.org/')
629>>> print f.read(100)
630<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
631<?xml-stylesheet href="./css/ht2html
632\end{verbatim}
633
634Here we are sending a data-stream to the stdin of a CGI and reading
635the data it returns to us:
636
637\begin{verbatim}
638>>> import urllib2
639>>> req = urllib2.Request(url='https://localhost/cgi-bin/test.cgi',
640... data='This data is passed to stdin of the CGI')
641>>> f = urllib2.urlopen(req)
642>>> print f.read()
643Got Data: "This data is passed to stdin of the CGI"
644\end{verbatim}
645
646The code for the sample CGI used in the above example is:
647
648\begin{verbatim}
649#!/usr/bin/env python
650import sys
651data = sys.stdin.read()
652print 'Content-type: text-plain\n\nGot Data: "%s"' %
653data
654\end{verbatim}