Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 1 | """HTTP client class |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 2 | |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 3 | See the following URL for a description of the HTTP/1.0 protocol: |
| 4 | http://www.w3.org/hypertext/WWW/Protocols/ |
| 5 | (I actually implemented it from a much earlier draft.) |
| 6 | |
| 7 | Example: |
| 8 | |
| 9 | >>> from httplib import HTTP |
| 10 | >>> h = HTTP('www.python.org') |
| 11 | >>> h.putrequest('GET', '/index.html') |
| 12 | >>> h.putheader('Accept', 'text/html') |
| 13 | >>> h.putheader('Accept', 'text/plain') |
| 14 | >>> h.endheaders() |
| 15 | >>> errcode, errmsg, headers = h.getreply() |
| 16 | >>> if errcode == 200: |
| 17 | ... f = h.getfile() |
| 18 | ... print f.read() # Print the raw HTML |
| 19 | ... |
| 20 | <HEAD> |
| 21 | <TITLE>Python Language Home Page</TITLE> |
| 22 | [...many more lines...] |
| 23 | >>> |
| 24 | |
| 25 | Note that an HTTP object is used for a single request -- to issue a |
| 26 | second request to the same server, you create a new HTTP object. |
| 27 | (This is in accordance with the protocol, which uses a new TCP |
| 28 | connection for each request.) |
| 29 | """ |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 30 | |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 31 | import socket |
| 32 | import string |
Guido van Rossum | 65ab98c | 1995-08-07 20:13:02 +0000 | [diff] [blame] | 33 | import mimetools |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 34 | |
| 35 | HTTP_VERSION = 'HTTP/1.0' |
| 36 | HTTP_PORT = 80 |
| 37 | |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 38 | class HTTP: |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 39 | """This class manages a connection to an HTTP server.""" |
| 40 | |
| 41 | def __init__(self, host = '', port = 0): |
| 42 | """Initialize a new instance. |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 43 | |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 44 | If specified, `host' is the name of the remote host to which |
| 45 | to connect. If specified, `port' specifies the port to which |
| 46 | to connect. By default, httplib.HTTP_PORT is used. |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 47 | |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 48 | """ |
| 49 | self.debuglevel = 0 |
| 50 | self.file = None |
| 51 | if host: self.connect(host, port) |
| 52 | |
| 53 | def set_debuglevel(self, debuglevel): |
| 54 | """Set the debug output level. |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 55 | |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 56 | A non-false value results in debug messages for connection and |
| 57 | for all messages sent to and received from the server. |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 58 | |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 59 | """ |
| 60 | self.debuglevel = debuglevel |
| 61 | |
| 62 | def connect(self, host, port = 0): |
| 63 | """Connect to a host on a given port. |
| 64 | |
| 65 | Note: This method is automatically invoked by __init__, |
| 66 | if a host is specified during instantiation. |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 67 | |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 68 | """ |
| 69 | if not port: |
| 70 | i = string.find(host, ':') |
| 71 | if i >= 0: |
| 72 | host, port = host[:i], host[i+1:] |
| 73 | try: port = string.atoi(port) |
| 74 | except string.atoi_error: |
| 75 | raise socket.error, "nonnumeric port" |
| 76 | if not port: port = HTTP_PORT |
| 77 | self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
| 78 | if self.debuglevel > 0: print 'connect:', (host, port) |
| 79 | self.sock.connect(host, port) |
| 80 | |
| 81 | def send(self, str): |
| 82 | """Send `str' to the server.""" |
| 83 | if self.debuglevel > 0: print 'send:', `str` |
| 84 | self.sock.send(str) |
| 85 | |
| 86 | def putrequest(self, request, selector): |
| 87 | """Send a request to the server. |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 88 | |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 89 | `request' specifies an HTTP request method, e.g. 'GET'. |
| 90 | `selector' specifies the object being requested, e.g. |
| 91 | '/index.html'. |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 92 | |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 93 | """ |
| 94 | if not selector: selector = '/' |
| 95 | str = '%s %s %s\r\n' % (request, selector, HTTP_VERSION) |
| 96 | self.send(str) |
| 97 | |
| 98 | def putheader(self, header, *args): |
| 99 | """Send a request header line to the server. |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 100 | |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 101 | For example: h.putheader('Accept', 'text/html') |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 102 | |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 103 | """ |
| 104 | str = '%s: %s\r\n' % (header, string.joinfields(args,'\r\n\t')) |
| 105 | self.send(str) |
| 106 | |
| 107 | def endheaders(self): |
| 108 | """Indicate that the last header line has been sent to the server.""" |
| 109 | self.send('\r\n') |
| 110 | |
| 111 | def getreply(self): |
| 112 | """Get a reply from the server. |
| 113 | |
| 114 | Returns a tuple consisting of: |
| 115 | - server response code (e.g. '200' if all goes well) |
| 116 | - server response string corresponding to response code |
| 117 | - any RFC822 headers in the response from the server |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 118 | |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 119 | """ |
| 120 | self.file = self.sock.makefile('rb') |
| 121 | line = self.file.readline() |
| 122 | if self.debuglevel > 0: print 'reply:', `line` |
| 123 | try: |
| 124 | [ver, code, msg] = string.split(line, None, 2) |
| 125 | except ValueError: |
| 126 | self.headers = None |
| 127 | return -1, line, self.headers |
| 128 | if ver[:5] != 'HTTP/': |
| 129 | self.headers = None |
| 130 | return -1, line, self.headers |
| 131 | errcode = string.atoi(code) |
| 132 | errmsg = string.strip(msg) |
| 133 | self.headers = mimetools.Message(self.file, 0) |
| 134 | return errcode, errmsg, self.headers |
| 135 | |
| 136 | def getfile(self): |
| 137 | """Get a file object from which to receive data from the HTTP server. |
| 138 | |
| 139 | NOTE: This method must not be invoked until getreplies |
| 140 | has been invoked. |
| 141 | |
| 142 | """ |
| 143 | return self.file |
| 144 | |
| 145 | def close(self): |
| 146 | """Close the connection to the HTTP server.""" |
| 147 | if self.file: |
| 148 | self.file.close() |
| 149 | self.file = None |
| 150 | if self.sock: |
| 151 | self.sock.close() |
| 152 | self.sock = None |
Guido van Rossum | 65ab98c | 1995-08-07 20:13:02 +0000 | [diff] [blame] | 153 | |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 154 | |
| 155 | def test(): |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 156 | """Test this module. |
| 157 | |
| 158 | The test consists of retrieving and displaying the Python |
| 159 | home page, along with the error code and error string returned |
| 160 | by the www.python.org server. |
| 161 | |
| 162 | """ |
| 163 | import sys |
| 164 | import getopt |
| 165 | opts, args = getopt.getopt(sys.argv[1:], 'd') |
| 166 | dl = 0 |
| 167 | for o, a in opts: |
| 168 | if o == '-d': dl = dl + 1 |
| 169 | host = 'www.python.org' |
| 170 | selector = '/' |
| 171 | if args[0:]: host = args[0] |
| 172 | if args[1:]: selector = args[1] |
| 173 | h = HTTP() |
| 174 | h.set_debuglevel(dl) |
| 175 | h.connect(host) |
| 176 | h.putrequest('GET', selector) |
| 177 | h.endheaders() |
| 178 | errcode, errmsg, headers = h.getreply() |
| 179 | print 'errcode =', errcode |
| 180 | print 'errmsg =', errmsg |
| 181 | print |
| 182 | if headers: |
| 183 | for header in headers.headers: print string.strip(header) |
| 184 | print |
| 185 | print h.getfile().read() |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 186 | |
Guido van Rossum | a0dfc7a | 1995-09-07 19:28:19 +0000 | [diff] [blame] | 187 | |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 188 | if __name__ == '__main__': |
Guido van Rossum | 41999c1 | 1997-12-09 00:12:23 +0000 | [diff] [blame] | 189 | test() |