Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 1 | # HTTP client class |
| 2 | # |
| 3 | # See the following document for a tentative protocol description: |
| 4 | # Hypertext Transfer Protocol (HTTP) Tim Berners-Lee, CERN |
| 5 | # Internet Draft 5 Nov 1993 |
| 6 | # draft-ietf-iiir-http-00.txt Expires 5 May 1994 |
| 7 | # |
| 8 | # Example: |
| 9 | # |
| 10 | # >>> from httplib import HTTP |
| 11 | # >>> h = HTTP('www.cwi.nl') |
| 12 | # >>> h.putreqest('GET', '/index.html') |
| 13 | # >>> h.putheader('Accept', 'text/html') |
| 14 | # >>> h.putheader('Accept', 'text/plain') |
Guido van Rossum | 4cdcef7 | 1995-06-22 18:48:48 +0000 | [diff] [blame] | 15 | # >>> h.endheaders() |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 16 | # >>> errcode, errmsg, headers = h.getreply() |
| 17 | # >>> if errcode == 200: |
| 18 | # ... f = h.getfile() |
| 19 | # ... print f.read() # Print the raw HTML |
| 20 | # ... |
| 21 | # <TITLE>Home Page of CWI, Amsterdam</TITLE> |
| 22 | # [...many more lines...] |
| 23 | # >>> |
| 24 | # |
| 25 | # Note that an HTTP object is used for a single request -- to issue a |
| 26 | # second request to the same server, you create a new HTTP object. |
| 27 | # (This is in accordance with the protocol, which uses a new TCP |
| 28 | # connection for each request.) |
| 29 | |
| 30 | |
| 31 | import os |
| 32 | import socket |
| 33 | import string |
| 34 | import regex |
| 35 | import regsub |
Guido van Rossum | 65ab98c | 1995-08-07 20:13:02 +0000 | [diff] [blame] | 36 | import mimetools |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 37 | |
| 38 | HTTP_VERSION = 'HTTP/1.0' |
| 39 | HTTP_PORT = 80 |
| 40 | |
| 41 | replypat = regsub.gsub('\\.', '\\\\.', HTTP_VERSION) + \ |
| 42 | '[ \t]+\([0-9][0-9][0-9]\)\(.*\)' |
| 43 | replyprog = regex.compile(replypat) |
| 44 | |
| 45 | class HTTP: |
| 46 | |
Guido van Rossum | 2922c6d | 1994-05-06 14:28:19 +0000 | [diff] [blame] | 47 | def __init__(self, host = '', port = 0): |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 48 | self.debuglevel = 0 |
Guido van Rossum | 65ab98c | 1995-08-07 20:13:02 +0000 | [diff] [blame] | 49 | self.file = None |
Guido van Rossum | 2922c6d | 1994-05-06 14:28:19 +0000 | [diff] [blame] | 50 | if host: self.connect(host, port) |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 51 | |
| 52 | def set_debuglevel(self, debuglevel): |
| 53 | self.debuglevel = debuglevel |
| 54 | |
Guido van Rossum | 2922c6d | 1994-05-06 14:28:19 +0000 | [diff] [blame] | 55 | def connect(self, host, port = 0): |
| 56 | if not port: |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 57 | i = string.find(host, ':') |
| 58 | if i >= 0: |
| 59 | host, port = host[:i], host[i+1:] |
| 60 | try: port = string.atoi(port) |
Guido van Rossum | 76ca3c1 | 1994-02-22 16:06:02 +0000 | [diff] [blame] | 61 | except string.atoi_error: pass |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 62 | if not port: port = HTTP_PORT |
| 63 | self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
| 64 | if self.debuglevel > 0: print 'connect:', (host, port) |
| 65 | self.sock.connect(host, port) |
| 66 | |
| 67 | def send(self, str): |
| 68 | if self.debuglevel > 0: print 'send:', `str` |
| 69 | self.sock.send(str) |
| 70 | |
| 71 | def putrequest(self, request, selector): |
Guido van Rossum | 4cdcef7 | 1995-06-22 18:48:48 +0000 | [diff] [blame] | 72 | if not selector: selector = '/' |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 73 | str = '%s %s %s\r\n' % (request, selector, HTTP_VERSION) |
| 74 | self.send(str) |
| 75 | |
| 76 | def putheader(self, header, *args): |
| 77 | str = '%s: %s\r\n' % (header, string.joinfields(args,'\r\n\t')) |
| 78 | self.send(str) |
| 79 | |
| 80 | def endheaders(self): |
| 81 | self.send('\r\n') |
| 82 | |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 83 | def getreply(self): |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 84 | self.file = self.sock.makefile('r') |
Guido van Rossum | 4cdcef7 | 1995-06-22 18:48:48 +0000 | [diff] [blame] | 85 | self.sock = None |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 86 | line = self.file.readline() |
| 87 | if self.debuglevel > 0: print 'reply:', `line` |
| 88 | if replyprog.match(line) < 0: |
| 89 | self.headers = None |
| 90 | return -1, line, self.headers |
| 91 | errcode, errmsg = replyprog.group(1, 2) |
| 92 | errcode = string.atoi(errcode) |
| 93 | errmsg = string.strip(errmsg) |
Guido van Rossum | 65ab98c | 1995-08-07 20:13:02 +0000 | [diff] [blame] | 94 | self.headers = mimetools.Message(self.file, 0) |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 95 | return errcode, errmsg, self.headers |
| 96 | |
| 97 | def getfile(self): |
| 98 | return self.file |
| 99 | |
Guido van Rossum | 65ab98c | 1995-08-07 20:13:02 +0000 | [diff] [blame] | 100 | def close(self): |
| 101 | if self.file: |
| 102 | self.file.close() |
| 103 | self.file = None |
| 104 | |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 105 | |
| 106 | def test(): |
| 107 | import sys |
| 108 | import getopt |
| 109 | opts, args = getopt.getopt(sys.argv[1:], 'd') |
| 110 | dl = 0 |
| 111 | for o, a in opts: |
| 112 | if o == '-d': dl = dl + 1 |
Guido van Rossum | a0dfc7a | 1995-09-07 19:28:19 +0000 | [diff] [blame] | 113 | host = 'www.python.org' |
| 114 | selector = '/' |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 115 | if args[0:]: host = args[0] |
| 116 | if args[1:]: selector = args[1] |
| 117 | h = HTTP() |
| 118 | h.set_debuglevel(dl) |
| 119 | h.connect(host) |
| 120 | h.putrequest('GET', selector) |
Guido van Rossum | a0dfc7a | 1995-09-07 19:28:19 +0000 | [diff] [blame] | 121 | h.endheaders() |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 122 | errcode, errmsg, headers = h.getreply() |
| 123 | print 'errcode =', errcode |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 124 | print 'errmsg =', errmsg |
Guido van Rossum | a0dfc7a | 1995-09-07 19:28:19 +0000 | [diff] [blame] | 125 | print |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 126 | if headers: |
| 127 | for header in headers.headers: print string.strip(header) |
Guido van Rossum | a0dfc7a | 1995-09-07 19:28:19 +0000 | [diff] [blame] | 128 | print |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 129 | print h.getfile().read() |
| 130 | |
Guido van Rossum | a0dfc7a | 1995-09-07 19:28:19 +0000 | [diff] [blame] | 131 | |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 132 | if __name__ == '__main__': |
| 133 | test() |