blob: f88fd256dc99b948f240d46d22e627e9f2f583dd [file] [log] [blame]
Moshe Zadka8a18e992001-03-01 08:40:42 +00001\section{\module{urllib2} ---
2 extensible library for opening URLs}
3
4\declaremodule{standard}{urllib2}
Moshe Zadka8a18e992001-03-01 08:40:42 +00005\moduleauthor{Jeremy Hylton}{jhylton@users.sourceforge.net}
6\sectionauthor{Moshe Zadka}{moshez@users.sourceforge.net}
7
8\modulesynopsis{An extensible library for opening URLs using a variety of
9 protocols}
10
11The \module{urllib2} module defines functions and classes which help
Fred Drake93c86712001-03-02 20:39:34 +000012in opening URLs (mostly HTTP) in a complex world --- basic and digest
Moshe Zadka8a18e992001-03-01 08:40:42 +000013authentication, redirections and more.
14
15The \module{urllib2} module defines the following functions:
16
17\begin{funcdesc}{urlopen}{url\optional{, data}}
Fred Drake399bc8c2001-11-09 03:49:29 +000018Open the URL \var{url}, which can be either a string or a \class{Request}
Moshe Zadka8a18e992001-03-01 08:40:42 +000019object (currently the code checks that it really is a \class{Request}
Fred Drake93c86712001-03-02 20:39:34 +000020instance, or an instance of a subclass of \class{Request}).
Moshe Zadka8a18e992001-03-01 08:40:42 +000021
22\var{data} should be a string, which specifies additional data to
23send to the server. In HTTP requests, which are the only ones that
24support \var{data}, it should be a buffer in the format of
Fred Drake93c86712001-03-02 20:39:34 +000025\mimetype{application/x-www-form-urlencoded}, for example one returned
26from \function{urllib.urlencode()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +000027
28This function returns a file-like object with two additional methods:
29
30\begin{itemize}
Fred Drake93c86712001-03-02 20:39:34 +000031 \item \method{geturl()} --- return the URL of the resource retrieved
32 \item \method{info()} --- return the meta-information of the page, as
33 a dictionary-like object
Moshe Zadka8a18e992001-03-01 08:40:42 +000034\end{itemize}
35
36Raises \exception{URLError} on errors.
37\end{funcdesc}
38
39\begin{funcdesc}{install_opener}{opener}
Fred Drake399bc8c2001-11-09 03:49:29 +000040Install an \class{OpenerDirector} instance as the default opener.
Moshe Zadka8a18e992001-03-01 08:40:42 +000041The code does not check for a real \class{OpenerDirector}, and any
42class with the appropriate interface will work.
43\end{funcdesc}
44
Fred Drake93c86712001-03-02 20:39:34 +000045\begin{funcdesc}{build_opener}{\optional{handler, \moreargs}}
Moshe Zadka8a18e992001-03-01 08:40:42 +000046Return an \class{OpenerDirector} instance, which chains the
47handlers in the order given. \var{handler}s can be either instances
48of \class{BaseHandler}, or subclasses of \class{BaseHandler} (in
49which case it must be possible to call the constructor without
Fred Drake399bc8c2001-11-09 03:49:29 +000050any parameters). Instances of the following classes will be in
51front of the \var{handler}s, unless the \var{handler}s contain
Moshe Zadka8a18e992001-03-01 08:40:42 +000052them, instances of them or subclasses of them:
53
54\code{ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler,
55 HTTPRedirectHandler, FTPHandler, FileHandler}
56
Fred Drake93c86712001-03-02 20:39:34 +000057If the Python installation has SSL support (\function{socket.ssl()}
58exists), \class{HTTPSHandler} will also be added.
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +000059
60Beginning in Python 2.3, a \class{BaseHandler} subclass may also change its
61\var{handler_order} member variable to modify its position in the handlers
62list. Besides \class{ProxyHandler}, which has \var{handler_order} of
63\code{100}, all handlers currently have it set to \code{500}.
Moshe Zadka8a18e992001-03-01 08:40:42 +000064\end{funcdesc}
65
Fred Drake93c86712001-03-02 20:39:34 +000066
67The following exceptions are raised as appropriate:
68
Moshe Zadka8a18e992001-03-01 08:40:42 +000069\begin{excdesc}{URLError}
Fred Drake399bc8c2001-11-09 03:49:29 +000070The handlers raise this exception (or derived exceptions) when they
71run into a problem. It is a subclass of \exception{IOError}.
Moshe Zadka8a18e992001-03-01 08:40:42 +000072\end{excdesc}
73
74\begin{excdesc}{HTTPError}
75A subclass of \exception{URLError}, it can also function as a
Fred Drake93c86712001-03-02 20:39:34 +000076non-exceptional file-like return value (the same thing that
77\function{urlopen()} returns). This is useful when handling exotic
78HTTP errors, such as requests for authentication.
Moshe Zadka8a18e992001-03-01 08:40:42 +000079\end{excdesc}
80
81\begin{excdesc}{GopherError}
82A subclass of \exception{URLError}, this is the error raised by the
83Gopher handler.
84\end{excdesc}
85
Fred Drake93c86712001-03-02 20:39:34 +000086
87The following classes are provided:
88
89\begin{classdesc}{Request}{url\optional{, data\optional{, headers}}}
Moshe Zadka8a18e992001-03-01 08:40:42 +000090This class is an abstraction of a URL request.
91
Fred Drake399bc8c2001-11-09 03:49:29 +000092\var{url} should be a string which is a valid URL. For a description
Fred Drake93c86712001-03-02 20:39:34 +000093of \var{data} see the \method{add_data()} description.
Moshe Zadka8a18e992001-03-01 08:40:42 +000094\var{headers} should be a dictionary, and will be treated as if
Fred Drake93c86712001-03-02 20:39:34 +000095\method{add_header()} was called with each key and value as arguments.
Moshe Zadka8a18e992001-03-01 08:40:42 +000096\end{classdesc}
97
Fred Drake93c86712001-03-02 20:39:34 +000098\begin{classdesc}{OpenerDirector}{}
99The \class{OpenerDirector} class opens URLs via \class{BaseHandler}s
100chained together. It manages the chaining of handlers, and recovery
101from errors.
102\end{classdesc}
103
104\begin{classdesc}{BaseHandler}{}
105This is the base class for all registered handlers --- and handles only
106the simple mechanics of registration.
107\end{classdesc}
108
109\begin{classdesc}{HTTPDefaultErrorHandler}{}
110A class which defines a default handler for HTTP error responses; all
111responses are turned into \exception{HTTPError} exceptions.
112\end{classdesc}
113
114\begin{classdesc}{HTTPRedirectHandler}{}
115A class to handle redirections.
116\end{classdesc}
117
118\begin{classdesc}{ProxyHandler}{\optional{proxies}}
119Cause requests to go through a proxy.
120If \var{proxies} is given, it must be a dictionary mapping
121protocol names to URLs of proxies.
122The default is to read the list of proxies from the environment
Fred Drake47852462001-05-11 15:46:45 +0000123variables \var{protocol}_proxy.
Fred Drake93c86712001-03-02 20:39:34 +0000124\end{classdesc}
125
126\begin{classdesc}{HTTPPasswordMgr}{}
127Keep a database of
128\code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})}
129mappings.
130\end{classdesc}
131
132\begin{classdesc}{HTTPPasswordMgrWithDefaultRealm}{}
133Keep a database of
134\code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})} mappings.
135A realm of \code{None} is considered a catch-all realm, which is searched
136if no other realm fits.
137\end{classdesc}
138
139\begin{classdesc}{AbstractBasicAuthHandler}{\optional{password_mgr}}
140This is a mixin class that helps with HTTP authentication, both
141to the remote host and to a proxy.
Fred Drake399bc8c2001-11-09 03:49:29 +0000142\var{password_mgr}, if given, should be something that is compatible
143with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
144for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000145\end{classdesc}
146
147\begin{classdesc}{HTTPBasicAuthHandler}{\optional{password_mgr}}
148Handle authentication with the remote host.
Fred Drake399bc8c2001-11-09 03:49:29 +0000149\var{password_mgr}, if given, should be something that is compatible
150with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
151for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000152\end{classdesc}
153
154\begin{classdesc}{ProxyBasicAuthHandler}{\optional{password_mgr}}
155Handle authentication with the proxy.
Fred Drake399bc8c2001-11-09 03:49:29 +0000156\var{password_mgr}, if given, should be something that is compatible
157with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
158for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000159\end{classdesc}
160
161\begin{classdesc}{AbstractDigestAuthHandler}{\optional{password_mgr}}
Fred Drake399bc8c2001-11-09 03:49:29 +0000162This is a mixin class that helps with HTTP authentication, both
Fred Drake93c86712001-03-02 20:39:34 +0000163to the remote host and to a proxy.
Fred Drake399bc8c2001-11-09 03:49:29 +0000164\var{password_mgr}, if given, should be something that is compatible
165with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
166for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000167\end{classdesc}
168
169\begin{classdesc}{HTTPDigestAuthHandler}{\optional{password_mgr}}
170Handle authentication with the remote host.
Fred Drake399bc8c2001-11-09 03:49:29 +0000171\var{password_mgr}, if given, should be something that is compatible
172with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
173for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000174\end{classdesc}
175
176\begin{classdesc}{ProxyDigestAuthHandler}{\optional{password_mgr}}
177Handle authentication with the proxy.
Fred Drake399bc8c2001-11-09 03:49:29 +0000178\var{password_mgr}, if given, should be something that is compatible
179with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
180for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000181\end{classdesc}
182
183\begin{classdesc}{HTTPHandler}{}
184A class to handle opening of HTTP URLs.
185\end{classdesc}
186
187\begin{classdesc}{HTTPSHandler}{}
188A class to handle opening of HTTPS URLs.
189\end{classdesc}
190
191\begin{classdesc}{FileHandler}{}
192Open local files.
193\end{classdesc}
194
195\begin{classdesc}{FTPHandler}{}
196Open FTP URLs.
197\end{classdesc}
198
199\begin{classdesc}{CacheFTPHandler}{}
200Open FTP URLs, keeping a cache of open FTP connections to minimize
201delays.
202\end{classdesc}
203
204\begin{classdesc}{GopherHandler}{}
205Open gopher URLs.
206\end{classdesc}
207
208\begin{classdesc}{UnknownHandler}{}
209A catch-all class to handle unknown URLs.
210\end{classdesc}
211
212
213\subsection{Request Objects \label{request-objects}}
214
Moshe Zadka8a18e992001-03-01 08:40:42 +0000215The following methods describe all of \class{Request}'s public interface,
216and so all must be overridden in subclasses.
217
218\begin{methoddesc}[Request]{add_data}{data}
Fred Drake399bc8c2001-11-09 03:49:29 +0000219Set the \class{Request} data to \var{data}. This is ignored
Moshe Zadka8a18e992001-03-01 08:40:42 +0000220by all handlers except HTTP handlers --- and there it should be an
Fred Drake93c86712001-03-02 20:39:34 +0000221\mimetype{application/x-www-form-encoded} buffer, and will change the
Fred Drake399bc8c2001-11-09 03:49:29 +0000222request to be \code{POST} rather than \code{GET}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000223\end{methoddesc}
224
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000225\begin{methoddesc}[Request]{get_method}{}
226Return a string indicating the HTTP request method. This is only
227meaningful for HTTP requests, and currently always takes one of the
228values ("GET", "POST").
229\end{methoddesc}
230
Fred Drake399bc8c2001-11-09 03:49:29 +0000231\begin{methoddesc}[Request]{has_data}{}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000232Return whether the instance has a non-\code{None} data.
233\end{methoddesc}
234
Fred Drake399bc8c2001-11-09 03:49:29 +0000235\begin{methoddesc}[Request]{get_data}{}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000236Return the instance's data.
237\end{methoddesc}
238
239\begin{methoddesc}[Request]{add_header}{key, val}
Fred Drake93c86712001-03-02 20:39:34 +0000240Add another header to the request. Headers are currently ignored by
241all handlers except HTTP handlers, where they are added to the list
Fred Drake399bc8c2001-11-09 03:49:29 +0000242of headers sent to the server. Note that there cannot be more than
Fred Drake93c86712001-03-02 20:39:34 +0000243one header with the same name, and later calls will overwrite
244previous calls in case the \var{key} collides. Currently, this is
245no loss of HTTP functionality, since all headers which have meaning
Fred Drake399bc8c2001-11-09 03:49:29 +0000246when used more than once have a (header-specific) way of gaining the
Moshe Zadka8a18e992001-03-01 08:40:42 +0000247same functionality using only one header.
248\end{methoddesc}
249
250\begin{methoddesc}[Request]{get_full_url}{}
251Return the URL given in the constructor.
252\end{methoddesc}
253
254\begin{methoddesc}[Request]{get_type}{}
Fred Drake93c86712001-03-02 20:39:34 +0000255Return the type of the URL --- also known as the scheme.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000256\end{methoddesc}
257
258\begin{methoddesc}[Request]{get_host}{}
Fred Drake399bc8c2001-11-09 03:49:29 +0000259Return the host to which a connection will be made.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000260\end{methoddesc}
261
262\begin{methoddesc}[Request]{get_selector}{}
263Return the selector --- the part of the URL that is sent to
264the server.
265\end{methoddesc}
266
267\begin{methoddesc}[Request]{set_proxy}{host, type}
Fred Drake399bc8c2001-11-09 03:49:29 +0000268Prepare the request by connecting to a proxy server. The \var{host}
269and \var{type} will replace those of the instance, and the instance's
Fred Drake93c86712001-03-02 20:39:34 +0000270selector will be the original URL given in the constructor.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000271\end{methoddesc}
272
Fred Drake93c86712001-03-02 20:39:34 +0000273
274\subsection{OpenerDirector Objects \label{opener-director-objects}}
275
276\class{OpenerDirector} instances have the following methods:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000277
278\begin{methoddesc}[OpenerDirector]{add_handler}{handler}
Fred Drake93c86712001-03-02 20:39:34 +0000279\var{handler} should be an instance of \class{BaseHandler}. The
280following methods are searched, and added to the possible chains.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000281
282\begin{itemize}
Fred Drake93c86712001-03-02 20:39:34 +0000283 \item \method{\var{protocol}_open()} ---
284 signal that the handler knows how to open \var{protocol} URLs.
285 \item \method{\var{protocol}_error_\var{type}()} ---
286 signal that the handler knows how to handle \var{type} errors from
287 \var{protocol}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000288\end{itemize}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000289\end{methoddesc}
290
291\begin{methoddesc}[OpenerDirector]{close}{}
292Explicitly break cycles, and delete all the handlers.
293Because the \class{OpenerDirector} needs to know the registered handlers,
294and a handler needs to know who the \class{OpenerDirector} who called
Fred Drake399bc8c2001-11-09 03:49:29 +0000295it is, there is a reference cycle. Even though recent versions of Python
Moshe Zadka8a18e992001-03-01 08:40:42 +0000296have cycle-collection, it is sometimes preferable to explicitly break
297the cycles.
298\end{methoddesc}
299
300\begin{methoddesc}[OpenerDirector]{open}{url\optional{, data}}
Fred Drake399bc8c2001-11-09 03:49:29 +0000301Open the given \var{url} (which can be a request object or a string),
Moshe Zadka8a18e992001-03-01 08:40:42 +0000302optionally passing the given \var{data}.
303Arguments, return values and exceptions raised are the same as those
Fred Drake93c86712001-03-02 20:39:34 +0000304of \function{urlopen()} (which simply calls the \method{open()} method
Raymond Hettinger0dfd7a92003-05-10 07:40:56 +0000305on the default installed \class{OpenerDirector}).
Moshe Zadka8a18e992001-03-01 08:40:42 +0000306\end{methoddesc}
307
Fred Drake93c86712001-03-02 20:39:34 +0000308\begin{methoddesc}[OpenerDirector]{error}{proto\optional{,
309 arg\optional{, \moreargs}}}
Fred Drake399bc8c2001-11-09 03:49:29 +0000310Handle an error in a given protocol. This will call the registered
311error handlers for the given protocol with the given arguments (which
312are protocol specific). The HTTP protocol is a special case which
313uses the HTTP response code to determine the specific error handler;
314refer to the \method{http_error_*()} methods of the handler classes.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000315
316Return values and exceptions raised are the same as those
Fred Drake93c86712001-03-02 20:39:34 +0000317of \function{urlopen()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000318\end{methoddesc}
319
Fred Drake93c86712001-03-02 20:39:34 +0000320
321\subsection{BaseHandler Objects \label{base-handler-objects}}
322
323\class{BaseHandler} objects provide a couple of methods that are
324directly useful, and others that are meant to be used by derived
325classes. These are intended for direct use:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000326
327\begin{methoddesc}[BaseHandler]{add_parent}{director}
328Add a director as parent.
329\end{methoddesc}
330
331\begin{methoddesc}[BaseHandler]{close}{}
332Remove any parents.
333\end{methoddesc}
334
Fred Drake399bc8c2001-11-09 03:49:29 +0000335The following members and methods should only be used by classes
Fred Drake93c86712001-03-02 20:39:34 +0000336derived from \class{BaseHandler}:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000337
338\begin{memberdesc}[BaseHandler]{parent}
Fred Drake93c86712001-03-02 20:39:34 +0000339A valid \class{OpenerDirector}, which can be used to open using a
340different protocol, or handle errors.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000341\end{memberdesc}
342
343\begin{methoddesc}[BaseHandler]{default_open}{req}
Fred Drake93c86712001-03-02 20:39:34 +0000344This method is \emph{not} defined in \class{BaseHandler}, but
345subclasses should define it if they want to catch all URLs.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000346
Fred Drake399bc8c2001-11-09 03:49:29 +0000347This method, if implemented, will be called by the parent
Fred Drake93c86712001-03-02 20:39:34 +0000348\class{OpenerDirector}. It should return a file-like object as
349described in the return value of the \method{open()} of
Fred Drake399bc8c2001-11-09 03:49:29 +0000350\class{OpenerDirector}, or \code{None}. It should raise
Fred Drake93c86712001-03-02 20:39:34 +0000351\exception{URLError}, unless a truly exceptional thing happens (for
352example, \exception{MemoryError} should not be mapped to
Fred Drake399bc8c2001-11-09 03:49:29 +0000353\exception{URLError}).
Moshe Zadka8a18e992001-03-01 08:40:42 +0000354
355This method will be called before any protocol-specific open method.
356\end{methoddesc}
357
Fred Drake47852462001-05-11 15:46:45 +0000358\begin{methoddescni}[BaseHandler]{\var{protocol}_open}{req}
Fred Drake93c86712001-03-02 20:39:34 +0000359This method is \emph{not} defined in \class{BaseHandler}, but
360subclasses should define it if they want to handle URLs with the given
361protocol.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000362
Fred Drake399bc8c2001-11-09 03:49:29 +0000363This method, if defined, will be called by the parent
Fred Drake93c86712001-03-02 20:39:34 +0000364\class{OpenerDirector}. Return values should be the same as for
365\method{default_open()}.
Fred Drake47852462001-05-11 15:46:45 +0000366\end{methoddescni}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000367
368\begin{methoddesc}[BaseHandler]{unknown_open}{req}
Fred Drake93c86712001-03-02 20:39:34 +0000369This method is \var{not} defined in \class{BaseHandler}, but
370subclasses should define it if they want to catch all URLs with no
Fred Drake399bc8c2001-11-09 03:49:29 +0000371specific registered handler to open it.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000372
Fred Drake399bc8c2001-11-09 03:49:29 +0000373This method, if implemented, will be called by the \member{parent}
Fred Drake93c86712001-03-02 20:39:34 +0000374\class{OpenerDirector}. Return values should be the same as for
375\method{default_open()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000376\end{methoddesc}
377
378\begin{methoddesc}[BaseHandler]{http_error_default}{req, fp, code, msg, hdrs}
Fred Drake93c86712001-03-02 20:39:34 +0000379This method is \emph{not} defined in \class{BaseHandler}, but
380subclasses should override it if they intend to provide a catch-all
381for otherwise unhandled HTTP errors. It will be called automatically
382by the \class{OpenerDirector} getting the error, and should not
383normally be called in other circumstances.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000384
Fred Drake93c86712001-03-02 20:39:34 +0000385\var{req} will be a \class{Request} object, \var{fp} will be a
386file-like object with the HTTP error body, \var{code} will be the
387three-digit code of the error, \var{msg} will be the user-visible
388explanation of the code and \var{hdrs} will be a mapping object with
389the headers of the error.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000390
391Return values and exceptions raised should be the same as those
Fred Drake93c86712001-03-02 20:39:34 +0000392of \function{urlopen()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000393\end{methoddesc}
394
Fred Drake93c86712001-03-02 20:39:34 +0000395\begin{methoddesc}[BaseHandler]{http_error_\var{nnn}}{req, fp, code, msg, hdrs}
396\var{nnn} should be a three-digit HTTP error code. This method is
397also not defined in \class{BaseHandler}, but will be called, if it
398exists, on an instance of a subclass, when an HTTP error with code
399\var{nnn} occurs.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000400
Fred Drake93c86712001-03-02 20:39:34 +0000401Subclasses should override this method to handle specific HTTP
402errors.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000403
Fred Drake93c86712001-03-02 20:39:34 +0000404Arguments, return values and exceptions raised should be the same as
405for \method{http_error_default()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000406\end{methoddesc}
407
Fred Drake93c86712001-03-02 20:39:34 +0000408\subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000409
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000410\note{Some HTTP redirections require action from this module's client
411 code. If this is the case, \exception{HTTPError} is raised. See
412 \rfc{2616} for details of the precise meanings of the various
413 redirection codes.}
414
415\begin{methoddesc}[HTTPRedirectHandler]{redirect_request}{req,
416 fp, code, msg, hdrs}
417Return a \class{Request} or \code{None} in response to a redirect.
418This is called by the default implementations of the
419\code{http_error_30x()} methods when a redirection is received from
420the server. If a redirection should take place, return a new
421\class{Request} to allow \code{http_error_30x()} to perform the
422redirect. Otherwise, raise \exception{HTTPError} if no other
423\class{Handler} should try to handle this URL, or return \code{None}
424if you can't but another \class{Handler} might.
425
426\note{The default implementation of this method does not strictly
427 follow \rfc{2616}: it allows automatic 302 redirection of POST
428 requests, because essentially all HTTP clients do this.}
429
430\end{methoddesc}
431
Moshe Zadka8a18e992001-03-01 08:40:42 +0000432
Fred Drake93c86712001-03-02 20:39:34 +0000433\begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req,
434 fp, code, msg, hdrs}
435Redirect to the \code{Location:} URL. This method is called by
436the parent \class{OpenerDirector} when getting an HTTP
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000437`moved permanently' response.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000438\end{methoddesc}
439
Fred Drake93c86712001-03-02 20:39:34 +0000440\begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req,
441 fp, code, msg, hdrs}
442The same as \method{http_error_301()}, but called for the
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000443`found' response.
Fred Drake93c86712001-03-02 20:39:34 +0000444\end{methoddesc}
445
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000446\begin{methoddesc}[HTTPRedirectHandler]{http_error_303}{req,
447 fp, code, msg, hdrs}
448The same as \method{http_error_301()}, but called for the
449`see other' redirect response.
450\end{methoddesc}
Fred Drake93c86712001-03-02 20:39:34 +0000451
452\subsection{ProxyHandler Objects \label{proxy-handler}}
453
Fred Drake47852462001-05-11 15:46:45 +0000454\begin{methoddescni}[ProxyHandler]{\var{protocol}_open}{request}
Fred Drake93c86712001-03-02 20:39:34 +0000455The \class{ProxyHandler} will have a method
456\method{\var{protocol}_open()} for every \var{protocol} which has a
457proxy in the \var{proxies} dictionary given in the constructor. The
458method will modify requests to go through the proxy, by calling
459\code{request.set_proxy()}, and call the next handler in the chain to
460actually execute the protocol.
Fred Drake47852462001-05-11 15:46:45 +0000461\end{methoddescni}
Fred Drake93c86712001-03-02 20:39:34 +0000462
463
464\subsection{HTTPPasswordMgr Objects \label{http-password-mgr}}
465
466These methods are available on \class{HTTPPasswordMgr} and
467\class{HTTPPasswordMgrWithDefaultRealm} objects.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000468
469\begin{methoddesc}[HTTPPasswordMgr]{add_password}{realm, uri, user, passwd}
470\var{uri} can be either a single URI, or a sequene of URIs. \var{realm},
471\var{user} and \var{passwd} must be strings. This causes
Fred Drake93c86712001-03-02 20:39:34 +0000472\code{(\var{user}, \var{passwd})} to be used as authentication tokens
Moshe Zadka8a18e992001-03-01 08:40:42 +0000473when authentication for \var{realm} and a super-URI of any of the
474given URIs is given.
475\end{methoddesc}
476
477\begin{methoddesc}[HTTPPasswordMgr]{find_user_password}{realm, authuri}
Fred Drake93c86712001-03-02 20:39:34 +0000478Get user/password for given realm and URI, if any. This method will
479return \code{(None, None)} if there is no matching user/password.
480
481For \class{HTTPPasswordMgrWithDefaultRealm} objects, the realm
482\code{None} will be searched if the given \var{realm} has no matching
483user/password.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000484\end{methoddesc}
485
Moshe Zadka8a18e992001-03-01 08:40:42 +0000486
Fred Drake93c86712001-03-02 20:39:34 +0000487\subsection{AbstractBasicAuthHandler Objects
488 \label{abstract-basic-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000489
490\begin{methoddesc}[AbstractBasicAuthHandler]{handle_authentication_request}
491 {authreq, host, req, headers}
Fred Drake399bc8c2001-11-09 03:49:29 +0000492Handle an authentication request by getting a user/password pair, and
493re-trying the request. \var{authreq} should be the name of the header
494where the information about the realm is included in the request,
495\var{host} is the host to authenticate to, \var{req} should be the
496(failed) \class{Request} object, and \var{headers} should be the error
497headers.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000498\end{methoddesc}
499
Fred Drake93c86712001-03-02 20:39:34 +0000500
501\subsection{HTTPBasicAuthHandler Objects
502 \label{http-basic-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000503
504\begin{methoddesc}[HTTPBasicAuthHandler]{http_error_401}{req, fp, code,
505 msg, hdrs}
Fred Drake399bc8c2001-11-09 03:49:29 +0000506Retry the request with authentication information, if available.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000507\end{methoddesc}
508
Fred Drake93c86712001-03-02 20:39:34 +0000509
510\subsection{ProxyBasicAuthHandler Objects
511 \label{proxy-basic-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000512
513\begin{methoddesc}[ProxyBasicAuthHandler]{http_error_407}{req, fp, code,
514 msg, hdrs}
Fred Drake399bc8c2001-11-09 03:49:29 +0000515Retry the request with authentication information, if available.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000516\end{methoddesc}
517
Moshe Zadka8a18e992001-03-01 08:40:42 +0000518
Fred Drake93c86712001-03-02 20:39:34 +0000519\subsection{AbstractDigestAuthHandler Objects
520 \label{abstract-digest-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000521
Fred Drake93c86712001-03-02 20:39:34 +0000522\begin{methoddesc}[AbstractDigestAuthHandler]{handle_authentication_request}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000523 {authreq, host, req, headers}
524\var{authreq} should be the name of the header where the information about
Fred Drake399bc8c2001-11-09 03:49:29 +0000525the realm is included in the request, \var{host} should be the host to
526authenticate to, \var{req} should be the (failed) \class{Request}
527object, and \var{headers} should be the error headers.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000528\end{methoddesc}
529
Fred Drake93c86712001-03-02 20:39:34 +0000530
531\subsection{HTTPDigestAuthHandler Objects
532 \label{http-digest-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000533
534\begin{methoddesc}[HTTPDigestAuthHandler]{http_error_401}{req, fp, code,
535 msg, hdrs}
Fred Drake399bc8c2001-11-09 03:49:29 +0000536Retry the request with authentication information, if available.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000537\end{methoddesc}
538
Fred Drake93c86712001-03-02 20:39:34 +0000539
540\subsection{ProxyDigestAuthHandler Objects
541 \label{proxy-digest-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000542
543\begin{methoddesc}[ProxyDigestAuthHandler]{http_error_407}{req, fp, code,
544 msg, hdrs}
Fred Drake93c86712001-03-02 20:39:34 +0000545Retry the request with authentication information, if available.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000546\end{methoddesc}
547
Fred Drake93c86712001-03-02 20:39:34 +0000548
549\subsection{HTTPHandler Objects \label{http-handler-objects}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000550
551\begin{methoddesc}[HTTPHandler]{http_open}{req}
Fred Drake399bc8c2001-11-09 03:49:29 +0000552Send an HTTP request, which can be either GET or POST, depending on
Fred Drake93c86712001-03-02 20:39:34 +0000553\code{\var{req}.has_data()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000554\end{methoddesc}
555
Fred Drake93c86712001-03-02 20:39:34 +0000556
557\subsection{HTTPSHandler Objects \label{https-handler-objects}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000558
559\begin{methoddesc}[HTTPSHandler]{https_open}{req}
Fred Drake93c86712001-03-02 20:39:34 +0000560Send an HTTPS request, which can be either GET or POST, depending on
561\code{\var{req}.has_data()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000562\end{methoddesc}
563
Moshe Zadka8a18e992001-03-01 08:40:42 +0000564
Fred Drake93c86712001-03-02 20:39:34 +0000565\subsection{FileHandler Objects \label{file-handler-objects}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000566
567\begin{methoddesc}[FileHandler]{file_open}{req}
568Open the file locally, if there is no host name, or
Fred Drake93c86712001-03-02 20:39:34 +0000569the host name is \code{'localhost'}. Change the
Moshe Zadka8a18e992001-03-01 08:40:42 +0000570protocol to \code{ftp} otherwise, and retry opening
571it using \member{parent}.
572\end{methoddesc}
573
Fred Drake93c86712001-03-02 20:39:34 +0000574
575\subsection{FTPHandler Objects \label{ftp-handler-objects}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000576
577\begin{methoddesc}[FTPHandler]{ftp_open}{req}
578Open the FTP file indicated by \var{req}.
579The login is always done with empty username and password.
580\end{methoddesc}
581
Moshe Zadka8a18e992001-03-01 08:40:42 +0000582
Fred Drake93c86712001-03-02 20:39:34 +0000583\subsection{CacheFTPHandler Objects \label{cacheftp-handler-objects}}
584
585\class{CacheFTPHandler} objects are \class{FTPHandler} objects with
586the following additional methods:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000587
588\begin{methoddesc}[CacheFTPHandler]{setTimeout}{t}
589Set timeout of connections to \var{t} seconds.
590\end{methoddesc}
591
592\begin{methoddesc}[CacheFTPHandler]{setMaxConns}{m}
593Set maximum number of cached connections to \var{m}.
594\end{methoddesc}
595
Fred Drake93c86712001-03-02 20:39:34 +0000596
597\subsection{GopherHandler Objects \label{gopher-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000598
599\begin{methoddesc}[GopherHandler]{gopher_open}{req}
600Open the gopher resource indicated by \var{req}.
601\end{methoddesc}
Fred Drake93c86712001-03-02 20:39:34 +0000602
603
604\subsection{UnknownHandler Objects \label{unknown-handler-objects}}
605
Fred Drakea9399112001-07-05 21:14:03 +0000606\begin{methoddesc}[UnknownHandler]{unknown_open}{}
Fred Drake93c86712001-03-02 20:39:34 +0000607Raise a \exception{URLError} exception.
608\end{methoddesc}
Fred Drake53e5b712003-04-25 15:27:33 +0000609
610
611\subsection{Examples \label{urllib2-examples}}
612
613This example gets the python.org main page and displays the first 100
614bytes of it:
615
616\begin{verbatim}
617>>> import urllib2
618>>> f = urllib2.urlopen('http://www.python.org/')
619>>> print f.read(100)
620<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
621<?xml-stylesheet href="./css/ht2html
622\end{verbatim}
623
624Here we are sending a data-stream to the stdin of a CGI and reading
625the data it returns to us:
626
627\begin{verbatim}
628>>> import urllib2
629>>> req = urllib2.Request(url='https://localhost/cgi-bin/test.cgi',
630... data='This data is passed to stdin of the CGI')
631>>> f = urllib2.urlopen(req)
632>>> print f.read()
633Got Data: "This data is passed to stdin of the CGI"
634\end{verbatim}
635
636The code for the sample CGI used in the above example is:
637
638\begin{verbatim}
639#!/usr/bin/env python
640import sys
641data = sys.stdin.read()
642print 'Content-type: text-plain\n\nGot Data: "%s"' %
643data
644\end{verbatim}