blob: 783105f7a7b7bfc2e457a2326aa80815decdfd6d [file] [log] [blame]
Moshe Zadka8a18e992001-03-01 08:40:42 +00001\section{\module{urllib2} ---
2 extensible library for opening URLs}
3
4\declaremodule{standard}{urllib2}
Moshe Zadka8a18e992001-03-01 08:40:42 +00005\moduleauthor{Jeremy Hylton}{jhylton@users.sourceforge.net}
6\sectionauthor{Moshe Zadka}{moshez@users.sourceforge.net}
7
8\modulesynopsis{An extensible library for opening URLs using a variety of
9 protocols}
10
11The \module{urllib2} module defines functions and classes which help
Fred Drake93c86712001-03-02 20:39:34 +000012in opening URLs (mostly HTTP) in a complex world --- basic and digest
Moshe Zadka8a18e992001-03-01 08:40:42 +000013authentication, redirections and more.
14
15The \module{urllib2} module defines the following functions:
16
17\begin{funcdesc}{urlopen}{url\optional{, data}}
Fred Drake399bc8c2001-11-09 03:49:29 +000018Open the URL \var{url}, which can be either a string or a \class{Request}
Moshe Zadka8a18e992001-03-01 08:40:42 +000019object (currently the code checks that it really is a \class{Request}
Fred Drake93c86712001-03-02 20:39:34 +000020instance, or an instance of a subclass of \class{Request}).
Moshe Zadka8a18e992001-03-01 08:40:42 +000021
22\var{data} should be a string, which specifies additional data to
23send to the server. In HTTP requests, which are the only ones that
24support \var{data}, it should be a buffer in the format of
Fred Drake93c86712001-03-02 20:39:34 +000025\mimetype{application/x-www-form-urlencoded}, for example one returned
26from \function{urllib.urlencode()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +000027
28This function returns a file-like object with two additional methods:
29
30\begin{itemize}
Fred Drake93c86712001-03-02 20:39:34 +000031 \item \method{geturl()} --- return the URL of the resource retrieved
32 \item \method{info()} --- return the meta-information of the page, as
33 a dictionary-like object
Moshe Zadka8a18e992001-03-01 08:40:42 +000034\end{itemize}
35
36Raises \exception{URLError} on errors.
37\end{funcdesc}
38
39\begin{funcdesc}{install_opener}{opener}
Fred Drake399bc8c2001-11-09 03:49:29 +000040Install an \class{OpenerDirector} instance as the default opener.
Moshe Zadka8a18e992001-03-01 08:40:42 +000041The code does not check for a real \class{OpenerDirector}, and any
42class with the appropriate interface will work.
43\end{funcdesc}
44
Fred Drake93c86712001-03-02 20:39:34 +000045\begin{funcdesc}{build_opener}{\optional{handler, \moreargs}}
Moshe Zadka8a18e992001-03-01 08:40:42 +000046Return an \class{OpenerDirector} instance, which chains the
47handlers in the order given. \var{handler}s can be either instances
48of \class{BaseHandler}, or subclasses of \class{BaseHandler} (in
49which case it must be possible to call the constructor without
Fred Drake399bc8c2001-11-09 03:49:29 +000050any parameters). Instances of the following classes will be in
51front of the \var{handler}s, unless the \var{handler}s contain
Moshe Zadka8a18e992001-03-01 08:40:42 +000052them, instances of them or subclasses of them:
53
54\code{ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler,
55 HTTPRedirectHandler, FTPHandler, FileHandler}
56
Fred Drake93c86712001-03-02 20:39:34 +000057If the Python installation has SSL support (\function{socket.ssl()}
58exists), \class{HTTPSHandler} will also be added.
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +000059
60Beginning in Python 2.3, a \class{BaseHandler} subclass may also change its
61\var{handler_order} member variable to modify its position in the handlers
62list. Besides \class{ProxyHandler}, which has \var{handler_order} of
63\code{100}, all handlers currently have it set to \code{500}.
Moshe Zadka8a18e992001-03-01 08:40:42 +000064\end{funcdesc}
65
Fred Drake93c86712001-03-02 20:39:34 +000066
67The following exceptions are raised as appropriate:
68
Moshe Zadka8a18e992001-03-01 08:40:42 +000069\begin{excdesc}{URLError}
Fred Drake399bc8c2001-11-09 03:49:29 +000070The handlers raise this exception (or derived exceptions) when they
71run into a problem. It is a subclass of \exception{IOError}.
Moshe Zadka8a18e992001-03-01 08:40:42 +000072\end{excdesc}
73
74\begin{excdesc}{HTTPError}
75A subclass of \exception{URLError}, it can also function as a
Fred Drake93c86712001-03-02 20:39:34 +000076non-exceptional file-like return value (the same thing that
77\function{urlopen()} returns). This is useful when handling exotic
78HTTP errors, such as requests for authentication.
Moshe Zadka8a18e992001-03-01 08:40:42 +000079\end{excdesc}
80
81\begin{excdesc}{GopherError}
82A subclass of \exception{URLError}, this is the error raised by the
83Gopher handler.
84\end{excdesc}
85
Fred Drake93c86712001-03-02 20:39:34 +000086
87The following classes are provided:
88
89\begin{classdesc}{Request}{url\optional{, data\optional{, headers}}}
Moshe Zadka8a18e992001-03-01 08:40:42 +000090This class is an abstraction of a URL request.
91
Fred Drake399bc8c2001-11-09 03:49:29 +000092\var{url} should be a string which is a valid URL. For a description
Fred Drake93c86712001-03-02 20:39:34 +000093of \var{data} see the \method{add_data()} description.
Moshe Zadka8a18e992001-03-01 08:40:42 +000094\var{headers} should be a dictionary, and will be treated as if
Fred Drake93c86712001-03-02 20:39:34 +000095\method{add_header()} was called with each key and value as arguments.
Moshe Zadka8a18e992001-03-01 08:40:42 +000096\end{classdesc}
97
Fred Drake93c86712001-03-02 20:39:34 +000098\begin{classdesc}{OpenerDirector}{}
99The \class{OpenerDirector} class opens URLs via \class{BaseHandler}s
100chained together. It manages the chaining of handlers, and recovery
101from errors.
102\end{classdesc}
103
104\begin{classdesc}{BaseHandler}{}
105This is the base class for all registered handlers --- and handles only
106the simple mechanics of registration.
107\end{classdesc}
108
109\begin{classdesc}{HTTPDefaultErrorHandler}{}
110A class which defines a default handler for HTTP error responses; all
111responses are turned into \exception{HTTPError} exceptions.
112\end{classdesc}
113
114\begin{classdesc}{HTTPRedirectHandler}{}
115A class to handle redirections.
116\end{classdesc}
117
118\begin{classdesc}{ProxyHandler}{\optional{proxies}}
119Cause requests to go through a proxy.
120If \var{proxies} is given, it must be a dictionary mapping
121protocol names to URLs of proxies.
122The default is to read the list of proxies from the environment
Fred Drake47852462001-05-11 15:46:45 +0000123variables \var{protocol}_proxy.
Fred Drake93c86712001-03-02 20:39:34 +0000124\end{classdesc}
125
126\begin{classdesc}{HTTPPasswordMgr}{}
127Keep a database of
128\code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})}
129mappings.
130\end{classdesc}
131
132\begin{classdesc}{HTTPPasswordMgrWithDefaultRealm}{}
133Keep a database of
134\code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})} mappings.
135A realm of \code{None} is considered a catch-all realm, which is searched
136if no other realm fits.
137\end{classdesc}
138
139\begin{classdesc}{AbstractBasicAuthHandler}{\optional{password_mgr}}
140This is a mixin class that helps with HTTP authentication, both
141to the remote host and to a proxy.
Fred Drake399bc8c2001-11-09 03:49:29 +0000142\var{password_mgr}, if given, should be something that is compatible
143with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
144for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000145\end{classdesc}
146
147\begin{classdesc}{HTTPBasicAuthHandler}{\optional{password_mgr}}
148Handle authentication with the remote host.
Fred Drake399bc8c2001-11-09 03:49:29 +0000149\var{password_mgr}, if given, should be something that is compatible
150with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
151for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000152\end{classdesc}
153
154\begin{classdesc}{ProxyBasicAuthHandler}{\optional{password_mgr}}
155Handle authentication with the proxy.
Fred Drake399bc8c2001-11-09 03:49:29 +0000156\var{password_mgr}, if given, should be something that is compatible
157with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
158for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000159\end{classdesc}
160
161\begin{classdesc}{AbstractDigestAuthHandler}{\optional{password_mgr}}
Fred Drake399bc8c2001-11-09 03:49:29 +0000162This is a mixin class that helps with HTTP authentication, both
Fred Drake93c86712001-03-02 20:39:34 +0000163to the remote host and to a proxy.
Fred Drake399bc8c2001-11-09 03:49:29 +0000164\var{password_mgr}, if given, should be something that is compatible
165with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
166for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000167\end{classdesc}
168
169\begin{classdesc}{HTTPDigestAuthHandler}{\optional{password_mgr}}
170Handle authentication with the remote host.
Fred Drake399bc8c2001-11-09 03:49:29 +0000171\var{password_mgr}, if given, should be something that is compatible
172with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
173for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000174\end{classdesc}
175
176\begin{classdesc}{ProxyDigestAuthHandler}{\optional{password_mgr}}
177Handle authentication with the proxy.
Fred Drake399bc8c2001-11-09 03:49:29 +0000178\var{password_mgr}, if given, should be something that is compatible
179with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
180for information on the interface that must be supported.
Fred Drake93c86712001-03-02 20:39:34 +0000181\end{classdesc}
182
183\begin{classdesc}{HTTPHandler}{}
184A class to handle opening of HTTP URLs.
185\end{classdesc}
186
187\begin{classdesc}{HTTPSHandler}{}
188A class to handle opening of HTTPS URLs.
189\end{classdesc}
190
191\begin{classdesc}{FileHandler}{}
192Open local files.
193\end{classdesc}
194
195\begin{classdesc}{FTPHandler}{}
196Open FTP URLs.
197\end{classdesc}
198
199\begin{classdesc}{CacheFTPHandler}{}
200Open FTP URLs, keeping a cache of open FTP connections to minimize
201delays.
202\end{classdesc}
203
204\begin{classdesc}{GopherHandler}{}
205Open gopher URLs.
206\end{classdesc}
207
208\begin{classdesc}{UnknownHandler}{}
209A catch-all class to handle unknown URLs.
210\end{classdesc}
211
212
213\subsection{Request Objects \label{request-objects}}
214
Moshe Zadka8a18e992001-03-01 08:40:42 +0000215The following methods describe all of \class{Request}'s public interface,
216and so all must be overridden in subclasses.
217
218\begin{methoddesc}[Request]{add_data}{data}
Fred Drake399bc8c2001-11-09 03:49:29 +0000219Set the \class{Request} data to \var{data}. This is ignored
Moshe Zadka8a18e992001-03-01 08:40:42 +0000220by all handlers except HTTP handlers --- and there it should be an
Fred Drake93c86712001-03-02 20:39:34 +0000221\mimetype{application/x-www-form-encoded} buffer, and will change the
Fred Drake399bc8c2001-11-09 03:49:29 +0000222request to be \code{POST} rather than \code{GET}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000223\end{methoddesc}
224
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000225\begin{methoddesc}[Request]{get_method}{}
226Return a string indicating the HTTP request method. This is only
227meaningful for HTTP requests, and currently always takes one of the
228values ("GET", "POST").
229\end{methoddesc}
230
Fred Drake399bc8c2001-11-09 03:49:29 +0000231\begin{methoddesc}[Request]{has_data}{}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000232Return whether the instance has a non-\code{None} data.
233\end{methoddesc}
234
Fred Drake399bc8c2001-11-09 03:49:29 +0000235\begin{methoddesc}[Request]{get_data}{}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000236Return the instance's data.
237\end{methoddesc}
238
239\begin{methoddesc}[Request]{add_header}{key, val}
Fred Drake93c86712001-03-02 20:39:34 +0000240Add another header to the request. Headers are currently ignored by
241all handlers except HTTP handlers, where they are added to the list
Fred Drake399bc8c2001-11-09 03:49:29 +0000242of headers sent to the server. Note that there cannot be more than
Fred Drake93c86712001-03-02 20:39:34 +0000243one header with the same name, and later calls will overwrite
244previous calls in case the \var{key} collides. Currently, this is
245no loss of HTTP functionality, since all headers which have meaning
Fred Drake399bc8c2001-11-09 03:49:29 +0000246when used more than once have a (header-specific) way of gaining the
Moshe Zadka8a18e992001-03-01 08:40:42 +0000247same functionality using only one header.
248\end{methoddesc}
249
250\begin{methoddesc}[Request]{get_full_url}{}
251Return the URL given in the constructor.
252\end{methoddesc}
253
254\begin{methoddesc}[Request]{get_type}{}
Fred Drake93c86712001-03-02 20:39:34 +0000255Return the type of the URL --- also known as the scheme.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000256\end{methoddesc}
257
258\begin{methoddesc}[Request]{get_host}{}
Fred Drake399bc8c2001-11-09 03:49:29 +0000259Return the host to which a connection will be made.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000260\end{methoddesc}
261
262\begin{methoddesc}[Request]{get_selector}{}
263Return the selector --- the part of the URL that is sent to
264the server.
265\end{methoddesc}
266
267\begin{methoddesc}[Request]{set_proxy}{host, type}
Fred Drake399bc8c2001-11-09 03:49:29 +0000268Prepare the request by connecting to a proxy server. The \var{host}
269and \var{type} will replace those of the instance, and the instance's
Fred Drake93c86712001-03-02 20:39:34 +0000270selector will be the original URL given in the constructor.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000271\end{methoddesc}
272
Fred Drake93c86712001-03-02 20:39:34 +0000273
274\subsection{OpenerDirector Objects \label{opener-director-objects}}
275
276\class{OpenerDirector} instances have the following methods:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000277
278\begin{methoddesc}[OpenerDirector]{add_handler}{handler}
Fred Drake93c86712001-03-02 20:39:34 +0000279\var{handler} should be an instance of \class{BaseHandler}. The
280following methods are searched, and added to the possible chains.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000281
282\begin{itemize}
Fred Drake93c86712001-03-02 20:39:34 +0000283 \item \method{\var{protocol}_open()} ---
284 signal that the handler knows how to open \var{protocol} URLs.
285 \item \method{\var{protocol}_error_\var{type}()} ---
286 signal that the handler knows how to handle \var{type} errors from
287 \var{protocol}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000288\end{itemize}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000289\end{methoddesc}
290
291\begin{methoddesc}[OpenerDirector]{close}{}
292Explicitly break cycles, and delete all the handlers.
293Because the \class{OpenerDirector} needs to know the registered handlers,
294and a handler needs to know who the \class{OpenerDirector} who called
Fred Drake399bc8c2001-11-09 03:49:29 +0000295it is, there is a reference cycle. Even though recent versions of Python
Moshe Zadka8a18e992001-03-01 08:40:42 +0000296have cycle-collection, it is sometimes preferable to explicitly break
297the cycles.
298\end{methoddesc}
299
300\begin{methoddesc}[OpenerDirector]{open}{url\optional{, data}}
Fred Drake399bc8c2001-11-09 03:49:29 +0000301Open the given \var{url} (which can be a request object or a string),
Moshe Zadka8a18e992001-03-01 08:40:42 +0000302optionally passing the given \var{data}.
303Arguments, return values and exceptions raised are the same as those
Fred Drake93c86712001-03-02 20:39:34 +0000304of \function{urlopen()} (which simply calls the \method{open()} method
Raymond Hettinger0dfd7a92003-05-10 07:40:56 +0000305on the default installed \class{OpenerDirector}).
Moshe Zadka8a18e992001-03-01 08:40:42 +0000306\end{methoddesc}
307
Fred Drake93c86712001-03-02 20:39:34 +0000308\begin{methoddesc}[OpenerDirector]{error}{proto\optional{,
309 arg\optional{, \moreargs}}}
Fred Drake399bc8c2001-11-09 03:49:29 +0000310Handle an error in a given protocol. This will call the registered
311error handlers for the given protocol with the given arguments (which
312are protocol specific). The HTTP protocol is a special case which
313uses the HTTP response code to determine the specific error handler;
314refer to the \method{http_error_*()} methods of the handler classes.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000315
316Return values and exceptions raised are the same as those
Fred Drake93c86712001-03-02 20:39:34 +0000317of \function{urlopen()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000318\end{methoddesc}
319
Fred Drake93c86712001-03-02 20:39:34 +0000320
321\subsection{BaseHandler Objects \label{base-handler-objects}}
322
323\class{BaseHandler} objects provide a couple of methods that are
324directly useful, and others that are meant to be used by derived
325classes. These are intended for direct use:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000326
327\begin{methoddesc}[BaseHandler]{add_parent}{director}
328Add a director as parent.
329\end{methoddesc}
330
331\begin{methoddesc}[BaseHandler]{close}{}
332Remove any parents.
333\end{methoddesc}
334
Fred Drake399bc8c2001-11-09 03:49:29 +0000335The following members and methods should only be used by classes
Fred Drake93c86712001-03-02 20:39:34 +0000336derived from \class{BaseHandler}:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000337
338\begin{memberdesc}[BaseHandler]{parent}
Fred Drake93c86712001-03-02 20:39:34 +0000339A valid \class{OpenerDirector}, which can be used to open using a
340different protocol, or handle errors.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000341\end{memberdesc}
342
343\begin{methoddesc}[BaseHandler]{default_open}{req}
Fred Drake93c86712001-03-02 20:39:34 +0000344This method is \emph{not} defined in \class{BaseHandler}, but
345subclasses should define it if they want to catch all URLs.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000346
Fred Drake399bc8c2001-11-09 03:49:29 +0000347This method, if implemented, will be called by the parent
Fred Drake93c86712001-03-02 20:39:34 +0000348\class{OpenerDirector}. It should return a file-like object as
349described in the return value of the \method{open()} of
Fred Drake399bc8c2001-11-09 03:49:29 +0000350\class{OpenerDirector}, or \code{None}. It should raise
Fred Drake93c86712001-03-02 20:39:34 +0000351\exception{URLError}, unless a truly exceptional thing happens (for
352example, \exception{MemoryError} should not be mapped to
Fred Drake399bc8c2001-11-09 03:49:29 +0000353\exception{URLError}).
Moshe Zadka8a18e992001-03-01 08:40:42 +0000354
355This method will be called before any protocol-specific open method.
356\end{methoddesc}
357
Fred Drake47852462001-05-11 15:46:45 +0000358\begin{methoddescni}[BaseHandler]{\var{protocol}_open}{req}
Fred Drake93c86712001-03-02 20:39:34 +0000359This method is \emph{not} defined in \class{BaseHandler}, but
360subclasses should define it if they want to handle URLs with the given
361protocol.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000362
Fred Drake399bc8c2001-11-09 03:49:29 +0000363This method, if defined, will be called by the parent
Fred Drake93c86712001-03-02 20:39:34 +0000364\class{OpenerDirector}. Return values should be the same as for
365\method{default_open()}.
Fred Drake47852462001-05-11 15:46:45 +0000366\end{methoddescni}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000367
368\begin{methoddesc}[BaseHandler]{unknown_open}{req}
Fred Drake93c86712001-03-02 20:39:34 +0000369This method is \var{not} defined in \class{BaseHandler}, but
370subclasses should define it if they want to catch all URLs with no
Fred Drake399bc8c2001-11-09 03:49:29 +0000371specific registered handler to open it.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000372
Fred Drake399bc8c2001-11-09 03:49:29 +0000373This method, if implemented, will be called by the \member{parent}
Fred Drake93c86712001-03-02 20:39:34 +0000374\class{OpenerDirector}. Return values should be the same as for
375\method{default_open()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000376\end{methoddesc}
377
378\begin{methoddesc}[BaseHandler]{http_error_default}{req, fp, code, msg, hdrs}
Fred Drake93c86712001-03-02 20:39:34 +0000379This method is \emph{not} defined in \class{BaseHandler}, but
380subclasses should override it if they intend to provide a catch-all
381for otherwise unhandled HTTP errors. It will be called automatically
382by the \class{OpenerDirector} getting the error, and should not
383normally be called in other circumstances.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000384
Fred Drake93c86712001-03-02 20:39:34 +0000385\var{req} will be a \class{Request} object, \var{fp} will be a
386file-like object with the HTTP error body, \var{code} will be the
387three-digit code of the error, \var{msg} will be the user-visible
388explanation of the code and \var{hdrs} will be a mapping object with
389the headers of the error.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000390
391Return values and exceptions raised should be the same as those
Fred Drake93c86712001-03-02 20:39:34 +0000392of \function{urlopen()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000393\end{methoddesc}
394
Fred Drake93c86712001-03-02 20:39:34 +0000395\begin{methoddesc}[BaseHandler]{http_error_\var{nnn}}{req, fp, code, msg, hdrs}
396\var{nnn} should be a three-digit HTTP error code. This method is
397also not defined in \class{BaseHandler}, but will be called, if it
398exists, on an instance of a subclass, when an HTTP error with code
399\var{nnn} occurs.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000400
Fred Drake93c86712001-03-02 20:39:34 +0000401Subclasses should override this method to handle specific HTTP
402errors.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000403
Fred Drake93c86712001-03-02 20:39:34 +0000404Arguments, return values and exceptions raised should be the same as
405for \method{http_error_default()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000406\end{methoddesc}
407
Fred Drake93c86712001-03-02 20:39:34 +0000408\subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000409
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000410\note{Some HTTP redirections require action from this module's client
411 code. If this is the case, \exception{HTTPError} is raised. See
412 \rfc{2616} for details of the precise meanings of the various
413 redirection codes.}
414
415\begin{methoddesc}[HTTPRedirectHandler]{redirect_request}{req,
416 fp, code, msg, hdrs}
417Return a \class{Request} or \code{None} in response to a redirect.
418This is called by the default implementations of the
419\code{http_error_30x()} methods when a redirection is received from
420the server. If a redirection should take place, return a new
421\class{Request} to allow \code{http_error_30x()} to perform the
422redirect. Otherwise, raise \exception{HTTPError} if no other
423\class{Handler} should try to handle this URL, or return \code{None}
424if you can't but another \class{Handler} might.
425
426\note{The default implementation of this method does not strictly
Martin v. Löwis162f0812003-07-12 07:33:32 +0000427 follow \rfc{2616}, which says that 301 and 302 responses to POST
428 requests must not be automatically redirected without confirmation by
429 the user. In reality, browsers do allow automatic redirection of
430 these responses, changing the POST to a GET, and the default
431 implementation reproduces this behaviour.}
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000432
433\end{methoddesc}
434
Moshe Zadka8a18e992001-03-01 08:40:42 +0000435
Fred Drake93c86712001-03-02 20:39:34 +0000436\begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req,
437 fp, code, msg, hdrs}
438Redirect to the \code{Location:} URL. This method is called by
439the parent \class{OpenerDirector} when getting an HTTP
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000440`moved permanently' response.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000441\end{methoddesc}
442
Fred Drake93c86712001-03-02 20:39:34 +0000443\begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req,
444 fp, code, msg, hdrs}
445The same as \method{http_error_301()}, but called for the
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000446`found' response.
Fred Drake93c86712001-03-02 20:39:34 +0000447\end{methoddesc}
448
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000449\begin{methoddesc}[HTTPRedirectHandler]{http_error_303}{req,
450 fp, code, msg, hdrs}
451The same as \method{http_error_301()}, but called for the
Martin v. Löwis162f0812003-07-12 07:33:32 +0000452`see other' response.
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000453\end{methoddesc}
Fred Drake93c86712001-03-02 20:39:34 +0000454
Martin v. Löwis162f0812003-07-12 07:33:32 +0000455\begin{methoddesc}[HTTPRedirectHandler]{http_error_307}{req,
456 fp, code, msg, hdrs}
457The same as \method{http_error_301()}, but called for the
458`temporary redirect' response.
459
Fred Drake93c86712001-03-02 20:39:34 +0000460\subsection{ProxyHandler Objects \label{proxy-handler}}
461
Fred Drake47852462001-05-11 15:46:45 +0000462\begin{methoddescni}[ProxyHandler]{\var{protocol}_open}{request}
Fred Drake93c86712001-03-02 20:39:34 +0000463The \class{ProxyHandler} will have a method
464\method{\var{protocol}_open()} for every \var{protocol} which has a
465proxy in the \var{proxies} dictionary given in the constructor. The
466method will modify requests to go through the proxy, by calling
467\code{request.set_proxy()}, and call the next handler in the chain to
468actually execute the protocol.
Fred Drake47852462001-05-11 15:46:45 +0000469\end{methoddescni}
Fred Drake93c86712001-03-02 20:39:34 +0000470
471
472\subsection{HTTPPasswordMgr Objects \label{http-password-mgr}}
473
474These methods are available on \class{HTTPPasswordMgr} and
475\class{HTTPPasswordMgrWithDefaultRealm} objects.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000476
477\begin{methoddesc}[HTTPPasswordMgr]{add_password}{realm, uri, user, passwd}
478\var{uri} can be either a single URI, or a sequene of URIs. \var{realm},
479\var{user} and \var{passwd} must be strings. This causes
Fred Drake93c86712001-03-02 20:39:34 +0000480\code{(\var{user}, \var{passwd})} to be used as authentication tokens
Moshe Zadka8a18e992001-03-01 08:40:42 +0000481when authentication for \var{realm} and a super-URI of any of the
482given URIs is given.
483\end{methoddesc}
484
485\begin{methoddesc}[HTTPPasswordMgr]{find_user_password}{realm, authuri}
Fred Drake93c86712001-03-02 20:39:34 +0000486Get user/password for given realm and URI, if any. This method will
487return \code{(None, None)} if there is no matching user/password.
488
489For \class{HTTPPasswordMgrWithDefaultRealm} objects, the realm
490\code{None} will be searched if the given \var{realm} has no matching
491user/password.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000492\end{methoddesc}
493
Moshe Zadka8a18e992001-03-01 08:40:42 +0000494
Fred Drake93c86712001-03-02 20:39:34 +0000495\subsection{AbstractBasicAuthHandler Objects
496 \label{abstract-basic-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000497
498\begin{methoddesc}[AbstractBasicAuthHandler]{handle_authentication_request}
499 {authreq, host, req, headers}
Fred Drake399bc8c2001-11-09 03:49:29 +0000500Handle an authentication request by getting a user/password pair, and
501re-trying the request. \var{authreq} should be the name of the header
502where the information about the realm is included in the request,
503\var{host} is the host to authenticate to, \var{req} should be the
504(failed) \class{Request} object, and \var{headers} should be the error
505headers.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000506\end{methoddesc}
507
Fred Drake93c86712001-03-02 20:39:34 +0000508
509\subsection{HTTPBasicAuthHandler Objects
510 \label{http-basic-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000511
512\begin{methoddesc}[HTTPBasicAuthHandler]{http_error_401}{req, fp, code,
513 msg, hdrs}
Fred Drake399bc8c2001-11-09 03:49:29 +0000514Retry the request with authentication information, if available.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000515\end{methoddesc}
516
Fred Drake93c86712001-03-02 20:39:34 +0000517
518\subsection{ProxyBasicAuthHandler Objects
519 \label{proxy-basic-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000520
521\begin{methoddesc}[ProxyBasicAuthHandler]{http_error_407}{req, fp, code,
522 msg, hdrs}
Fred Drake399bc8c2001-11-09 03:49:29 +0000523Retry the request with authentication information, if available.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000524\end{methoddesc}
525
Moshe Zadka8a18e992001-03-01 08:40:42 +0000526
Fred Drake93c86712001-03-02 20:39:34 +0000527\subsection{AbstractDigestAuthHandler Objects
528 \label{abstract-digest-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000529
Fred Drake93c86712001-03-02 20:39:34 +0000530\begin{methoddesc}[AbstractDigestAuthHandler]{handle_authentication_request}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000531 {authreq, host, req, headers}
532\var{authreq} should be the name of the header where the information about
Fred Drake399bc8c2001-11-09 03:49:29 +0000533the realm is included in the request, \var{host} should be the host to
534authenticate to, \var{req} should be the (failed) \class{Request}
535object, and \var{headers} should be the error headers.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000536\end{methoddesc}
537
Fred Drake93c86712001-03-02 20:39:34 +0000538
539\subsection{HTTPDigestAuthHandler Objects
540 \label{http-digest-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000541
542\begin{methoddesc}[HTTPDigestAuthHandler]{http_error_401}{req, fp, code,
543 msg, hdrs}
Fred Drake399bc8c2001-11-09 03:49:29 +0000544Retry the request with authentication information, if available.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000545\end{methoddesc}
546
Fred Drake93c86712001-03-02 20:39:34 +0000547
548\subsection{ProxyDigestAuthHandler Objects
549 \label{proxy-digest-auth-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000550
551\begin{methoddesc}[ProxyDigestAuthHandler]{http_error_407}{req, fp, code,
552 msg, hdrs}
Fred Drake93c86712001-03-02 20:39:34 +0000553Retry the request with authentication information, if available.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000554\end{methoddesc}
555
Fred Drake93c86712001-03-02 20:39:34 +0000556
557\subsection{HTTPHandler Objects \label{http-handler-objects}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000558
559\begin{methoddesc}[HTTPHandler]{http_open}{req}
Fred Drake399bc8c2001-11-09 03:49:29 +0000560Send an HTTP request, which can be either GET or POST, depending on
Fred Drake93c86712001-03-02 20:39:34 +0000561\code{\var{req}.has_data()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000562\end{methoddesc}
563
Fred Drake93c86712001-03-02 20:39:34 +0000564
565\subsection{HTTPSHandler Objects \label{https-handler-objects}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000566
567\begin{methoddesc}[HTTPSHandler]{https_open}{req}
Fred Drake93c86712001-03-02 20:39:34 +0000568Send an HTTPS request, which can be either GET or POST, depending on
569\code{\var{req}.has_data()}.
Moshe Zadka8a18e992001-03-01 08:40:42 +0000570\end{methoddesc}
571
Moshe Zadka8a18e992001-03-01 08:40:42 +0000572
Fred Drake93c86712001-03-02 20:39:34 +0000573\subsection{FileHandler Objects \label{file-handler-objects}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000574
575\begin{methoddesc}[FileHandler]{file_open}{req}
576Open the file locally, if there is no host name, or
Fred Drake93c86712001-03-02 20:39:34 +0000577the host name is \code{'localhost'}. Change the
Moshe Zadka8a18e992001-03-01 08:40:42 +0000578protocol to \code{ftp} otherwise, and retry opening
579it using \member{parent}.
580\end{methoddesc}
581
Fred Drake93c86712001-03-02 20:39:34 +0000582
583\subsection{FTPHandler Objects \label{ftp-handler-objects}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000584
585\begin{methoddesc}[FTPHandler]{ftp_open}{req}
586Open the FTP file indicated by \var{req}.
587The login is always done with empty username and password.
588\end{methoddesc}
589
Moshe Zadka8a18e992001-03-01 08:40:42 +0000590
Fred Drake93c86712001-03-02 20:39:34 +0000591\subsection{CacheFTPHandler Objects \label{cacheftp-handler-objects}}
592
593\class{CacheFTPHandler} objects are \class{FTPHandler} objects with
594the following additional methods:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000595
596\begin{methoddesc}[CacheFTPHandler]{setTimeout}{t}
597Set timeout of connections to \var{t} seconds.
598\end{methoddesc}
599
600\begin{methoddesc}[CacheFTPHandler]{setMaxConns}{m}
601Set maximum number of cached connections to \var{m}.
602\end{methoddesc}
603
Fred Drake93c86712001-03-02 20:39:34 +0000604
605\subsection{GopherHandler Objects \label{gopher-handler}}
Moshe Zadka8a18e992001-03-01 08:40:42 +0000606
607\begin{methoddesc}[GopherHandler]{gopher_open}{req}
608Open the gopher resource indicated by \var{req}.
609\end{methoddesc}
Fred Drake93c86712001-03-02 20:39:34 +0000610
611
612\subsection{UnknownHandler Objects \label{unknown-handler-objects}}
613
Fred Drakea9399112001-07-05 21:14:03 +0000614\begin{methoddesc}[UnknownHandler]{unknown_open}{}
Fred Drake93c86712001-03-02 20:39:34 +0000615Raise a \exception{URLError} exception.
616\end{methoddesc}
Fred Drake53e5b712003-04-25 15:27:33 +0000617
618
619\subsection{Examples \label{urllib2-examples}}
620
621This example gets the python.org main page and displays the first 100
622bytes of it:
623
624\begin{verbatim}
625>>> import urllib2
626>>> f = urllib2.urlopen('http://www.python.org/')
627>>> print f.read(100)
628<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
629<?xml-stylesheet href="./css/ht2html
630\end{verbatim}
631
632Here we are sending a data-stream to the stdin of a CGI and reading
633the data it returns to us:
634
635\begin{verbatim}
636>>> import urllib2
637>>> req = urllib2.Request(url='https://localhost/cgi-bin/test.cgi',
638... data='This data is passed to stdin of the CGI')
639>>> f = urllib2.urlopen(req)
640>>> print f.read()
641Got Data: "This data is passed to stdin of the CGI"
642\end{verbatim}
643
644The code for the sample CGI used in the above example is:
645
646\begin{verbatim}
647#!/usr/bin/env python
648import sys
649data = sys.stdin.read()
650print 'Content-type: text-plain\n\nGot Data: "%s"' %
651data
652\end{verbatim}