| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 1 | """HTTP client class | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 2 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 3 | See the following URL for a description of the HTTP/1.0 protocol: | 
 | 4 | http://www.w3.org/hypertext/WWW/Protocols/ | 
 | 5 | (I actually implemented it from a much earlier draft.) | 
 | 6 |  | 
 | 7 | Example: | 
 | 8 |  | 
 | 9 | >>> from httplib import HTTP | 
 | 10 | >>> h = HTTP('www.python.org') | 
 | 11 | >>> h.putrequest('GET', '/index.html') | 
| Guido van Rossum | 974f70d | 2000-05-19 23:06:45 +0000 | [diff] [blame] | 12 | >>> h.putheader('Host', 'www.python.org') | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 13 | >>> h.putheader('Accept', 'text/html') | 
 | 14 | >>> h.putheader('Accept', 'text/plain') | 
 | 15 | >>> h.endheaders() | 
 | 16 | >>> errcode, errmsg, headers = h.getreply() | 
 | 17 | >>> if errcode == 200: | 
 | 18 | ...     f = h.getfile() | 
 | 19 | ...     print f.read() # Print the raw HTML | 
 | 20 | ... | 
 | 21 | <HEAD> | 
 | 22 | <TITLE>Python Language Home Page</TITLE> | 
 | 23 | [...many more lines...] | 
 | 24 | >>> | 
 | 25 |  | 
 | 26 | Note that an HTTP object is used for a single request -- to issue a | 
 | 27 | second request to the same server, you create a new HTTP object. | 
 | 28 | (This is in accordance with the protocol, which uses a new TCP | 
 | 29 | connection for each request.) | 
 | 30 | """ | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 31 |  | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 32 | import os | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 33 | import socket | 
 | 34 | import string | 
| Guido van Rossum | 65ab98c | 1995-08-07 20:13:02 +0000 | [diff] [blame] | 35 | import mimetools | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 36 |  | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 37 | try: | 
 | 38 |     from cStringIO import StringIO | 
 | 39 | except: | 
 | 40 |     from StringIO import StringIO | 
 | 41 |  | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 42 | HTTP_VERSION = 'HTTP/1.0' | 
 | 43 | HTTP_PORT = 80 | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 44 | HTTPS_PORT = 443 | 
 | 45 |  | 
 | 46 | class FakeSocket: | 
 | 47 |     def __init__(self, sock, ssl): | 
| Fred Drake | 13a2c27 | 2000-02-10 17:17:14 +0000 | [diff] [blame] | 48 |         self.__sock = sock | 
 | 49 |         self.__ssl = ssl | 
 | 50 |         return | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 51 |  | 
| Fred Drake | 13a2c27 | 2000-02-10 17:17:14 +0000 | [diff] [blame] | 52 |     def makefile(self, mode):           # hopefully, never have to write | 
 | 53 |         msgbuf = "" | 
 | 54 |         while 1: | 
 | 55 |             try: | 
 | 56 |                 msgbuf = msgbuf + self.__ssl.read() | 
 | 57 |             except socket.sslerror, msg: | 
 | 58 |                 break | 
 | 59 |         return StringIO(msgbuf) | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 60 |  | 
 | 61 |     def send(self, stuff, flags = 0): | 
| Fred Drake | 13a2c27 | 2000-02-10 17:17:14 +0000 | [diff] [blame] | 62 |         return self.__ssl.write(stuff) | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 63 |  | 
 | 64 |     def recv(self, len = 1024, flags = 0): | 
| Fred Drake | 13a2c27 | 2000-02-10 17:17:14 +0000 | [diff] [blame] | 65 |         return self.__ssl.read(len) | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 66 |  | 
 | 67 |     def __getattr__(self, attr): | 
| Fred Drake | 13a2c27 | 2000-02-10 17:17:14 +0000 | [diff] [blame] | 68 |         return getattr(self.__sock, attr) | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 69 |  | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 70 | class HTTP: | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 71 |     """This class manages a connection to an HTTP server.""" | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 72 |  | 
 | 73 |     def __init__(self, host = '', port = 0, **x509): | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 74 |         """Initialize a new instance. | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 75 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 76 |         If specified, `host' is the name of the remote host to which | 
 | 77 |         to connect.  If specified, `port' specifies the port to which | 
 | 78 |         to connect.  By default, httplib.HTTP_PORT is used. | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 79 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 80 |         """ | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 81 |         self.key_file = x509.get('key_file') | 
 | 82 |         self.cert_file = x509.get('cert_file') | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 83 |         self.debuglevel = 0 | 
 | 84 |         self.file = None | 
 | 85 |         if host: self.connect(host, port) | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 86 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 87 |     def set_debuglevel(self, debuglevel): | 
 | 88 |         """Set the debug output level. | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 89 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 90 |         A non-false value results in debug messages for connection and | 
 | 91 |         for all messages sent to and received from the server. | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 92 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 93 |         """ | 
 | 94 |         self.debuglevel = debuglevel | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 95 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 96 |     def connect(self, host, port = 0): | 
 | 97 |         """Connect to a host on a given port. | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 98 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 99 |         Note:  This method is automatically invoked by __init__, | 
 | 100 |         if a host is specified during instantiation. | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 101 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 102 |         """ | 
 | 103 |         if not port: | 
 | 104 |             i = string.find(host, ':') | 
 | 105 |             if i >= 0: | 
 | 106 |                 host, port = host[:i], host[i+1:] | 
 | 107 |                 try: port = string.atoi(port) | 
 | 108 |                 except string.atoi_error: | 
 | 109 |                     raise socket.error, "nonnumeric port" | 
 | 110 |         if not port: port = HTTP_PORT | 
 | 111 |         self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | 
 | 112 |         if self.debuglevel > 0: print 'connect:', (host, port) | 
| Guido van Rossum | 93a7c0f | 2000-03-28 21:45:46 +0000 | [diff] [blame] | 113 |         self.sock.connect((host, port)) | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 114 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 115 |     def send(self, str): | 
 | 116 |         """Send `str' to the server.""" | 
 | 117 |         if self.debuglevel > 0: print 'send:', `str` | 
 | 118 |         self.sock.send(str) | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 119 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 120 |     def putrequest(self, request, selector): | 
 | 121 |         """Send a request to the server. | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 122 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 123 |         `request' specifies an HTTP request method, e.g. 'GET'. | 
 | 124 |         `selector' specifies the object being requested, e.g. | 
 | 125 |         '/index.html'. | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 126 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 127 |         """ | 
 | 128 |         if not selector: selector = '/' | 
 | 129 |         str = '%s %s %s\r\n' % (request, selector, HTTP_VERSION) | 
 | 130 |         self.send(str) | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 131 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 132 |     def putheader(self, header, *args): | 
 | 133 |         """Send a request header line to the server. | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 134 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 135 |         For example: h.putheader('Accept', 'text/html') | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 136 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 137 |         """ | 
 | 138 |         str = '%s: %s\r\n' % (header, string.joinfields(args,'\r\n\t')) | 
 | 139 |         self.send(str) | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 140 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 141 |     def endheaders(self): | 
 | 142 |         """Indicate that the last header line has been sent to the server.""" | 
 | 143 |         self.send('\r\n') | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 144 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 145 |     def getreply(self): | 
 | 146 |         """Get a reply from the server. | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 147 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 148 |         Returns a tuple consisting of: | 
 | 149 |         - server response code (e.g. '200' if all goes well) | 
 | 150 |         - server response string corresponding to response code | 
 | 151 |         - any RFC822 headers in the response from the server | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 152 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 153 |         """ | 
 | 154 |         self.file = self.sock.makefile('rb') | 
 | 155 |         line = self.file.readline() | 
 | 156 |         if self.debuglevel > 0: print 'reply:', `line` | 
 | 157 |         try: | 
 | 158 |             [ver, code, msg] = string.split(line, None, 2) | 
 | 159 |         except ValueError: | 
| Guido van Rossum | 29c4688 | 1998-01-19 22:25:24 +0000 | [diff] [blame] | 160 |           try: | 
 | 161 |               [ver, code] = string.split(line, None, 1) | 
 | 162 |               msg = "" | 
 | 163 |           except ValueError: | 
 | 164 |               self.headers = None | 
 | 165 |               return -1, line, self.headers | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 166 |         if ver[:5] != 'HTTP/': | 
 | 167 |             self.headers = None | 
 | 168 |             return -1, line, self.headers | 
 | 169 |         errcode = string.atoi(code) | 
 | 170 |         errmsg = string.strip(msg) | 
 | 171 |         self.headers = mimetools.Message(self.file, 0) | 
 | 172 |         return errcode, errmsg, self.headers | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 173 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 174 |     def getfile(self): | 
 | 175 |         """Get a file object from which to receive data from the HTTP server. | 
 | 176 |  | 
 | 177 |         NOTE:  This method must not be invoked until getreplies | 
 | 178 |         has been invoked. | 
 | 179 |  | 
 | 180 |         """ | 
 | 181 |         return self.file | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 182 |  | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 183 |     def close(self): | 
 | 184 |         """Close the connection to the HTTP server.""" | 
 | 185 |         if self.file: | 
 | 186 |             self.file.close() | 
 | 187 |         self.file = None | 
 | 188 |         if self.sock: | 
 | 189 |             self.sock.close() | 
 | 190 |         self.sock = None | 
| Guido van Rossum | 65ab98c | 1995-08-07 20:13:02 +0000 | [diff] [blame] | 191 |  | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 192 | if hasattr(socket, "ssl"): | 
 | 193 |     class HTTPS(HTTP): | 
 | 194 |         """This class allows communication via SSL.""" | 
 | 195 |  | 
 | 196 |         def connect(self, host, port = 0): | 
 | 197 |             """Connect to a host on a given port. | 
 | 198 |  | 
 | 199 |             Note:  This method is automatically invoked by __init__, | 
 | 200 |             if a host is specified during instantiation. | 
 | 201 |  | 
 | 202 |             """ | 
 | 203 |             if not port: | 
 | 204 |                 i = string.find(host, ':') | 
 | 205 |                 if i >= 0: | 
 | 206 |                     host, port = host[:i], host[i+1:] | 
 | 207 |                     try: port = string.atoi(port) | 
 | 208 |                     except string.atoi_error: | 
 | 209 |                         raise socket.error, "nonnumeric port" | 
 | 210 |             if not port: port = HTTPS_PORT | 
 | 211 |             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | 
 | 212 |             if self.debuglevel > 0: print 'connect:', (host, port) | 
| Guido van Rossum | 93a7c0f | 2000-03-28 21:45:46 +0000 | [diff] [blame] | 213 |             sock.connect((host, port)) | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 214 |             ssl = socket.ssl(sock, self.key_file, self.cert_file) | 
 | 215 |             self.sock = FakeSocket(sock, ssl) | 
 | 216 |  | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 217 |  | 
 | 218 | def test(): | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 219 |     """Test this module. | 
 | 220 |  | 
 | 221 |     The test consists of retrieving and displaying the Python | 
 | 222 |     home page, along with the error code and error string returned | 
 | 223 |     by the www.python.org server. | 
 | 224 |  | 
 | 225 |     """ | 
 | 226 |     import sys | 
 | 227 |     import getopt | 
 | 228 |     opts, args = getopt.getopt(sys.argv[1:], 'd') | 
 | 229 |     dl = 0 | 
 | 230 |     for o, a in opts: | 
 | 231 |         if o == '-d': dl = dl + 1 | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 232 |     print "testing HTTP..." | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 233 |     host = 'www.python.org' | 
 | 234 |     selector = '/' | 
 | 235 |     if args[0:]: host = args[0] | 
 | 236 |     if args[1:]: selector = args[1] | 
 | 237 |     h = HTTP() | 
 | 238 |     h.set_debuglevel(dl) | 
 | 239 |     h.connect(host) | 
 | 240 |     h.putrequest('GET', selector) | 
 | 241 |     h.endheaders() | 
 | 242 |     errcode, errmsg, headers = h.getreply() | 
 | 243 |     print 'errcode =', errcode | 
 | 244 |     print 'errmsg  =', errmsg | 
 | 245 |     print | 
 | 246 |     if headers: | 
 | 247 |         for header in headers.headers: print string.strip(header) | 
 | 248 |     print | 
 | 249 |     print h.getfile().read() | 
| Guido van Rossum | 09c8b6c | 1999-12-07 21:37:17 +0000 | [diff] [blame] | 250 |     if hasattr(socket, "ssl"): | 
 | 251 |         print "-"*40 | 
 | 252 |         print "testing HTTPS..." | 
 | 253 |         host = 'synergy.as.cmu.edu' | 
 | 254 |         selector = '/~geek/' | 
 | 255 |         if args[0:]: host = args[0] | 
 | 256 |         if args[1:]: selector = args[1] | 
 | 257 |         h = HTTPS() | 
 | 258 |         h.set_debuglevel(dl) | 
 | 259 |         h.connect(host) | 
 | 260 |         h.putrequest('GET', selector) | 
 | 261 |         h.endheaders() | 
 | 262 |         errcode, errmsg, headers = h.getreply() | 
 | 263 |         print 'errcode =', errcode | 
 | 264 |         print 'errmsg  =', errmsg | 
 | 265 |         print | 
 | 266 |         if headers: | 
 | 267 |             for header in headers.headers: print string.strip(header) | 
 | 268 |         print | 
 | 269 |         print h.getfile().read() | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 270 |  | 
| Guido van Rossum | a0dfc7a | 1995-09-07 19:28:19 +0000 | [diff] [blame] | 271 |  | 
| Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 272 | if __name__ == '__main__': | 
| Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 273 |     test() |