Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 1 | # HTTP client class |
| 2 | # |
Guido van Rossum | 928fced | 1995-09-30 16:50:46 +0000 | [diff] [blame] | 3 | # See the following URL for a description of the HTTP/1.0 protocol: |
| 4 | # http://www.w3.org/hypertext/WWW/Protocols/ |
| 5 | # (I actually implemented it from a much earlier draft.) |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 6 | # |
| 7 | # Example: |
| 8 | # |
| 9 | # >>> from httplib import HTTP |
Guido van Rossum | 928fced | 1995-09-30 16:50:46 +0000 | [diff] [blame] | 10 | # >>> h = HTTP('www.python.org') |
| 11 | # >>> h.putrequest('GET', '/index.html') |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 12 | # >>> h.putheader('Accept', 'text/html') |
| 13 | # >>> h.putheader('Accept', 'text/plain') |
Guido van Rossum | 4cdcef7 | 1995-06-22 18:48:48 +0000 | [diff] [blame] | 14 | # >>> h.endheaders() |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 15 | # >>> errcode, errmsg, headers = h.getreply() |
| 16 | # >>> if errcode == 200: |
| 17 | # ... f = h.getfile() |
| 18 | # ... print f.read() # Print the raw HTML |
| 19 | # ... |
Guido van Rossum | 928fced | 1995-09-30 16:50:46 +0000 | [diff] [blame] | 20 | # <HEAD> |
| 21 | # <TITLE>Python Language Home Page</TITLE> |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 22 | # [...many more lines...] |
| 23 | # >>> |
| 24 | # |
| 25 | # Note that an HTTP object is used for a single request -- to issue a |
| 26 | # second request to the same server, you create a new HTTP object. |
| 27 | # (This is in accordance with the protocol, which uses a new TCP |
| 28 | # connection for each request.) |
| 29 | |
| 30 | |
| 31 | import os |
| 32 | import socket |
| 33 | import string |
| 34 | import regex |
| 35 | import regsub |
Guido van Rossum | 65ab98c | 1995-08-07 20:13:02 +0000 | [diff] [blame] | 36 | import mimetools |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 37 | |
| 38 | HTTP_VERSION = 'HTTP/1.0' |
| 39 | HTTP_PORT = 80 |
| 40 | |
| 41 | replypat = regsub.gsub('\\.', '\\\\.', HTTP_VERSION) + \ |
| 42 | '[ \t]+\([0-9][0-9][0-9]\)\(.*\)' |
| 43 | replyprog = regex.compile(replypat) |
| 44 | |
| 45 | class HTTP: |
| 46 | |
Guido van Rossum | 2922c6d | 1994-05-06 14:28:19 +0000 | [diff] [blame] | 47 | def __init__(self, host = '', port = 0): |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 48 | self.debuglevel = 0 |
Guido van Rossum | 65ab98c | 1995-08-07 20:13:02 +0000 | [diff] [blame] | 49 | self.file = None |
Guido van Rossum | 2922c6d | 1994-05-06 14:28:19 +0000 | [diff] [blame] | 50 | if host: self.connect(host, port) |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 51 | |
| 52 | def set_debuglevel(self, debuglevel): |
| 53 | self.debuglevel = debuglevel |
| 54 | |
Guido van Rossum | 2922c6d | 1994-05-06 14:28:19 +0000 | [diff] [blame] | 55 | def connect(self, host, port = 0): |
| 56 | if not port: |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 57 | i = string.find(host, ':') |
| 58 | if i >= 0: |
| 59 | host, port = host[:i], host[i+1:] |
| 60 | try: port = string.atoi(port) |
Guido van Rossum | 928fced | 1995-09-30 16:50:46 +0000 | [diff] [blame] | 61 | except string.atoi_error: |
| 62 | raise socket.error, "nonnumeric port" |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 63 | if not port: port = HTTP_PORT |
| 64 | self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
| 65 | if self.debuglevel > 0: print 'connect:', (host, port) |
| 66 | self.sock.connect(host, port) |
| 67 | |
| 68 | def send(self, str): |
| 69 | if self.debuglevel > 0: print 'send:', `str` |
| 70 | self.sock.send(str) |
| 71 | |
| 72 | def putrequest(self, request, selector): |
Guido van Rossum | 4cdcef7 | 1995-06-22 18:48:48 +0000 | [diff] [blame] | 73 | if not selector: selector = '/' |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 74 | str = '%s %s %s\r\n' % (request, selector, HTTP_VERSION) |
| 75 | self.send(str) |
| 76 | |
| 77 | def putheader(self, header, *args): |
| 78 | str = '%s: %s\r\n' % (header, string.joinfields(args,'\r\n\t')) |
| 79 | self.send(str) |
| 80 | |
| 81 | def endheaders(self): |
| 82 | self.send('\r\n') |
| 83 | |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 84 | def getreply(self): |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 85 | self.file = self.sock.makefile('r') |
Guido van Rossum | 4cdcef7 | 1995-06-22 18:48:48 +0000 | [diff] [blame] | 86 | self.sock = None |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 87 | line = self.file.readline() |
| 88 | if self.debuglevel > 0: print 'reply:', `line` |
| 89 | if replyprog.match(line) < 0: |
| 90 | self.headers = None |
| 91 | return -1, line, self.headers |
| 92 | errcode, errmsg = replyprog.group(1, 2) |
| 93 | errcode = string.atoi(errcode) |
| 94 | errmsg = string.strip(errmsg) |
Guido van Rossum | 65ab98c | 1995-08-07 20:13:02 +0000 | [diff] [blame] | 95 | self.headers = mimetools.Message(self.file, 0) |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 96 | return errcode, errmsg, self.headers |
| 97 | |
| 98 | def getfile(self): |
| 99 | return self.file |
| 100 | |
Guido van Rossum | 65ab98c | 1995-08-07 20:13:02 +0000 | [diff] [blame] | 101 | def close(self): |
| 102 | if self.file: |
| 103 | self.file.close() |
| 104 | self.file = None |
| 105 | |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 106 | |
| 107 | def test(): |
| 108 | import sys |
| 109 | import getopt |
| 110 | opts, args = getopt.getopt(sys.argv[1:], 'd') |
| 111 | dl = 0 |
| 112 | for o, a in opts: |
| 113 | if o == '-d': dl = dl + 1 |
Guido van Rossum | a0dfc7a | 1995-09-07 19:28:19 +0000 | [diff] [blame] | 114 | host = 'www.python.org' |
| 115 | selector = '/' |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 116 | if args[0:]: host = args[0] |
| 117 | if args[1:]: selector = args[1] |
| 118 | h = HTTP() |
| 119 | h.set_debuglevel(dl) |
| 120 | h.connect(host) |
| 121 | h.putrequest('GET', selector) |
Guido van Rossum | a0dfc7a | 1995-09-07 19:28:19 +0000 | [diff] [blame] | 122 | h.endheaders() |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 123 | errcode, errmsg, headers = h.getreply() |
| 124 | print 'errcode =', errcode |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 125 | print 'errmsg =', errmsg |
Guido van Rossum | a0dfc7a | 1995-09-07 19:28:19 +0000 | [diff] [blame] | 126 | print |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 127 | if headers: |
| 128 | for header in headers.headers: print string.strip(header) |
Guido van Rossum | a0dfc7a | 1995-09-07 19:28:19 +0000 | [diff] [blame] | 129 | print |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 130 | print h.getfile().read() |
| 131 | |
Guido van Rossum | a0dfc7a | 1995-09-07 19:28:19 +0000 | [diff] [blame] | 132 | |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 133 | if __name__ == '__main__': |
| 134 | test() |