Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 1 | # HTTP client class |
| 2 | # |
| 3 | # See the following document for a tentative protocol description: |
| 4 | # Hypertext Transfer Protocol (HTTP) Tim Berners-Lee, CERN |
| 5 | # Internet Draft 5 Nov 1993 |
| 6 | # draft-ietf-iiir-http-00.txt Expires 5 May 1994 |
| 7 | # |
| 8 | # Example: |
| 9 | # |
| 10 | # >>> from httplib import HTTP |
| 11 | # >>> h = HTTP('www.cwi.nl') |
| 12 | # >>> h.putreqest('GET', '/index.html') |
| 13 | # >>> h.putheader('Accept', 'text/html') |
| 14 | # >>> h.putheader('Accept', 'text/plain') |
Guido van Rossum | 4cdcef7 | 1995-06-22 18:48:48 +0000 | [diff] [blame^] | 15 | # >>> h.endheaders() |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 16 | # >>> errcode, errmsg, headers = h.getreply() |
| 17 | # >>> if errcode == 200: |
| 18 | # ... f = h.getfile() |
| 19 | # ... print f.read() # Print the raw HTML |
| 20 | # ... |
| 21 | # <TITLE>Home Page of CWI, Amsterdam</TITLE> |
| 22 | # [...many more lines...] |
| 23 | # >>> |
| 24 | # |
| 25 | # Note that an HTTP object is used for a single request -- to issue a |
| 26 | # second request to the same server, you create a new HTTP object. |
| 27 | # (This is in accordance with the protocol, which uses a new TCP |
| 28 | # connection for each request.) |
| 29 | |
| 30 | |
| 31 | import os |
| 32 | import socket |
| 33 | import string |
| 34 | import regex |
| 35 | import regsub |
| 36 | import rfc822 |
| 37 | |
| 38 | HTTP_VERSION = 'HTTP/1.0' |
| 39 | HTTP_PORT = 80 |
| 40 | |
| 41 | replypat = regsub.gsub('\\.', '\\\\.', HTTP_VERSION) + \ |
| 42 | '[ \t]+\([0-9][0-9][0-9]\)\(.*\)' |
| 43 | replyprog = regex.compile(replypat) |
| 44 | |
| 45 | class HTTP: |
| 46 | |
Guido van Rossum | 2922c6d | 1994-05-06 14:28:19 +0000 | [diff] [blame] | 47 | def __init__(self, host = '', port = 0): |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 48 | self.debuglevel = 0 |
Guido van Rossum | 2922c6d | 1994-05-06 14:28:19 +0000 | [diff] [blame] | 49 | if host: self.connect(host, port) |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 50 | |
| 51 | def set_debuglevel(self, debuglevel): |
| 52 | self.debuglevel = debuglevel |
| 53 | |
Guido van Rossum | 2922c6d | 1994-05-06 14:28:19 +0000 | [diff] [blame] | 54 | def connect(self, host, port = 0): |
| 55 | if not port: |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 56 | i = string.find(host, ':') |
| 57 | if i >= 0: |
| 58 | host, port = host[:i], host[i+1:] |
| 59 | try: port = string.atoi(port) |
Guido van Rossum | 76ca3c1 | 1994-02-22 16:06:02 +0000 | [diff] [blame] | 60 | except string.atoi_error: pass |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 61 | if not port: port = HTTP_PORT |
| 62 | self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
| 63 | if self.debuglevel > 0: print 'connect:', (host, port) |
| 64 | self.sock.connect(host, port) |
| 65 | |
| 66 | def send(self, str): |
| 67 | if self.debuglevel > 0: print 'send:', `str` |
| 68 | self.sock.send(str) |
| 69 | |
| 70 | def putrequest(self, request, selector): |
Guido van Rossum | 4cdcef7 | 1995-06-22 18:48:48 +0000 | [diff] [blame^] | 71 | if not selector: selector = '/' |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 72 | str = '%s %s %s\r\n' % (request, selector, HTTP_VERSION) |
| 73 | self.send(str) |
| 74 | |
| 75 | def putheader(self, header, *args): |
| 76 | str = '%s: %s\r\n' % (header, string.joinfields(args,'\r\n\t')) |
| 77 | self.send(str) |
| 78 | |
| 79 | def endheaders(self): |
| 80 | self.send('\r\n') |
| 81 | |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 82 | def getreply(self): |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 83 | self.file = self.sock.makefile('r') |
Guido van Rossum | 4cdcef7 | 1995-06-22 18:48:48 +0000 | [diff] [blame^] | 84 | self.sock = None |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 85 | line = self.file.readline() |
| 86 | if self.debuglevel > 0: print 'reply:', `line` |
| 87 | if replyprog.match(line) < 0: |
| 88 | self.headers = None |
| 89 | return -1, line, self.headers |
| 90 | errcode, errmsg = replyprog.group(1, 2) |
| 91 | errcode = string.atoi(errcode) |
| 92 | errmsg = string.strip(errmsg) |
Guido van Rossum | 4cdcef7 | 1995-06-22 18:48:48 +0000 | [diff] [blame^] | 93 | self.headers = rfc822.Message(self.file, 0) |
Guido van Rossum | 23acc95 | 1994-02-21 16:36:04 +0000 | [diff] [blame] | 94 | return errcode, errmsg, self.headers |
| 95 | |
| 96 | def getfile(self): |
| 97 | return self.file |
| 98 | |
| 99 | |
| 100 | def test(): |
| 101 | import sys |
| 102 | import getopt |
| 103 | opts, args = getopt.getopt(sys.argv[1:], 'd') |
| 104 | dl = 0 |
| 105 | for o, a in opts: |
| 106 | if o == '-d': dl = dl + 1 |
| 107 | host = 'www.cwi.nl:80' |
| 108 | selector = '/index.html' |
| 109 | if args[0:]: host = args[0] |
| 110 | if args[1:]: selector = args[1] |
| 111 | h = HTTP() |
| 112 | h.set_debuglevel(dl) |
| 113 | h.connect(host) |
| 114 | h.putrequest('GET', selector) |
| 115 | errcode, errmsg, headers = h.getreply() |
| 116 | print 'errcode =', errcode |
| 117 | print 'headers =', headers |
| 118 | print 'errmsg =', errmsg |
| 119 | if headers: |
| 120 | for header in headers.headers: print string.strip(header) |
| 121 | print h.getfile().read() |
| 122 | |
| 123 | if __name__ == '__main__': |
| 124 | test() |