blob: d494e21ef04c3bc8e04ae6cad550f1ba5a66b099 [file] [log] [blame]
Guido van Rossum23acc951994-02-21 16:36:04 +00001# HTTP client class
2#
3# See the following document for a tentative protocol description:
4# Hypertext Transfer Protocol (HTTP) Tim Berners-Lee, CERN
5# Internet Draft 5 Nov 1993
6# draft-ietf-iiir-http-00.txt Expires 5 May 1994
7#
8# Example:
9#
10# >>> from httplib import HTTP
11# >>> h = HTTP('www.cwi.nl')
12# >>> h.putreqest('GET', '/index.html')
13# >>> h.putheader('Accept', 'text/html')
14# >>> h.putheader('Accept', 'text/plain')
15# >>> errcode, errmsg, headers = h.getreply()
16# >>> if errcode == 200:
17# ... f = h.getfile()
18# ... print f.read() # Print the raw HTML
19# ...
20# <TITLE>Home Page of CWI, Amsterdam</TITLE>
21# [...many more lines...]
22# >>>
23#
24# Note that an HTTP object is used for a single request -- to issue a
25# second request to the same server, you create a new HTTP object.
26# (This is in accordance with the protocol, which uses a new TCP
27# connection for each request.)
28
29
30import os
31import socket
32import string
33import regex
34import regsub
35import rfc822
36
37HTTP_VERSION = 'HTTP/1.0'
38HTTP_PORT = 80
39
40replypat = regsub.gsub('\\.', '\\\\.', HTTP_VERSION) + \
41 '[ \t]+\([0-9][0-9][0-9]\)\(.*\)'
42replyprog = regex.compile(replypat)
43
44class HTTP:
45
46 def __init__(self, *args):
47 self.debuglevel = 0
48 if args: apply(self.connect, args)
49
50 def set_debuglevel(self, debuglevel):
51 self.debuglevel = debuglevel
52
53 def connect(self, host, *args):
54 if args:
55 if args[1:]: raise TypeError, 'too many args'
56 port = args[0]
57 else:
58 i = string.find(host, ':')
Guido van Rossum76ca3c11994-02-22 16:06:02 +000059 port = None
Guido van Rossum23acc951994-02-21 16:36:04 +000060 if i >= 0:
61 host, port = host[:i], host[i+1:]
62 try: port = string.atoi(port)
Guido van Rossum76ca3c11994-02-22 16:06:02 +000063 except string.atoi_error: pass
Guido van Rossum23acc951994-02-21 16:36:04 +000064 if not port: port = HTTP_PORT
65 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
66 if self.debuglevel > 0: print 'connect:', (host, port)
67 self.sock.connect(host, port)
68
69 def send(self, str):
70 if self.debuglevel > 0: print 'send:', `str`
71 self.sock.send(str)
72
73 def putrequest(self, request, selector):
74 str = '%s %s %s\r\n' % (request, selector, HTTP_VERSION)
75 self.send(str)
76
77 def putheader(self, header, *args):
78 str = '%s: %s\r\n' % (header, string.joinfields(args,'\r\n\t'))
79 self.send(str)
80
81 def endheaders(self):
82 self.send('\r\n')
83
84 def endrequest(self):
85 if self.debuglevel > 0: print 'shutdown: 1'
86 self.sock.shutdown(1)
87
88 def getreply(self):
89 self.endrequest()
90 self.file = self.sock.makefile('r')
91 line = self.file.readline()
92 if self.debuglevel > 0: print 'reply:', `line`
93 if replyprog.match(line) < 0:
94 self.headers = None
95 return -1, line, self.headers
96 errcode, errmsg = replyprog.group(1, 2)
97 errcode = string.atoi(errcode)
98 errmsg = string.strip(errmsg)
99 self.headers = rfc822.Message(self.file)
100 return errcode, errmsg, self.headers
101
102 def getfile(self):
103 return self.file
104
105
106def test():
107 import sys
108 import getopt
109 opts, args = getopt.getopt(sys.argv[1:], 'd')
110 dl = 0
111 for o, a in opts:
112 if o == '-d': dl = dl + 1
113 host = 'www.cwi.nl:80'
114 selector = '/index.html'
115 if args[0:]: host = args[0]
116 if args[1:]: selector = args[1]
117 h = HTTP()
118 h.set_debuglevel(dl)
119 h.connect(host)
120 h.putrequest('GET', selector)
121 errcode, errmsg, headers = h.getreply()
122 print 'errcode =', errcode
123 print 'headers =', headers
124 print 'errmsg =', errmsg
125 if headers:
126 for header in headers.headers: print string.strip(header)
127 print h.getfile().read()
128
129if __name__ == '__main__':
130 test()