blob: 1bb6bf2100878fcf35a0936cb7179a24b0b4b694 [file] [log] [blame]
Guido van Rossum23acc951994-02-21 16:36:04 +00001# HTTP client class
2#
3# See the following document for a tentative protocol description:
4# Hypertext Transfer Protocol (HTTP) Tim Berners-Lee, CERN
5# Internet Draft 5 Nov 1993
6# draft-ietf-iiir-http-00.txt Expires 5 May 1994
7#
8# Example:
9#
10# >>> from httplib import HTTP
11# >>> h = HTTP('www.cwi.nl')
12# >>> h.putreqest('GET', '/index.html')
13# >>> h.putheader('Accept', 'text/html')
14# >>> h.putheader('Accept', 'text/plain')
Guido van Rossum4cdcef71995-06-22 18:48:48 +000015# >>> h.endheaders()
Guido van Rossum23acc951994-02-21 16:36:04 +000016# >>> errcode, errmsg, headers = h.getreply()
17# >>> if errcode == 200:
18# ... f = h.getfile()
19# ... print f.read() # Print the raw HTML
20# ...
21# <TITLE>Home Page of CWI, Amsterdam</TITLE>
22# [...many more lines...]
23# >>>
24#
25# Note that an HTTP object is used for a single request -- to issue a
26# second request to the same server, you create a new HTTP object.
27# (This is in accordance with the protocol, which uses a new TCP
28# connection for each request.)
29
30
31import os
32import socket
33import string
34import regex
35import regsub
36import rfc822
37
38HTTP_VERSION = 'HTTP/1.0'
39HTTP_PORT = 80
40
41replypat = regsub.gsub('\\.', '\\\\.', HTTP_VERSION) + \
42 '[ \t]+\([0-9][0-9][0-9]\)\(.*\)'
43replyprog = regex.compile(replypat)
44
45class HTTP:
46
Guido van Rossum2922c6d1994-05-06 14:28:19 +000047 def __init__(self, host = '', port = 0):
Guido van Rossum23acc951994-02-21 16:36:04 +000048 self.debuglevel = 0
Guido van Rossum2922c6d1994-05-06 14:28:19 +000049 if host: self.connect(host, port)
Guido van Rossum23acc951994-02-21 16:36:04 +000050
51 def set_debuglevel(self, debuglevel):
52 self.debuglevel = debuglevel
53
Guido van Rossum2922c6d1994-05-06 14:28:19 +000054 def connect(self, host, port = 0):
55 if not port:
Guido van Rossum23acc951994-02-21 16:36:04 +000056 i = string.find(host, ':')
57 if i >= 0:
58 host, port = host[:i], host[i+1:]
59 try: port = string.atoi(port)
Guido van Rossum76ca3c11994-02-22 16:06:02 +000060 except string.atoi_error: pass
Guido van Rossum23acc951994-02-21 16:36:04 +000061 if not port: port = HTTP_PORT
62 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
63 if self.debuglevel > 0: print 'connect:', (host, port)
64 self.sock.connect(host, port)
65
66 def send(self, str):
67 if self.debuglevel > 0: print 'send:', `str`
68 self.sock.send(str)
69
70 def putrequest(self, request, selector):
Guido van Rossum4cdcef71995-06-22 18:48:48 +000071 if not selector: selector = '/'
Guido van Rossum23acc951994-02-21 16:36:04 +000072 str = '%s %s %s\r\n' % (request, selector, HTTP_VERSION)
73 self.send(str)
74
75 def putheader(self, header, *args):
76 str = '%s: %s\r\n' % (header, string.joinfields(args,'\r\n\t'))
77 self.send(str)
78
79 def endheaders(self):
80 self.send('\r\n')
81
Guido van Rossum23acc951994-02-21 16:36:04 +000082 def getreply(self):
Guido van Rossum23acc951994-02-21 16:36:04 +000083 self.file = self.sock.makefile('r')
Guido van Rossum4cdcef71995-06-22 18:48:48 +000084 self.sock = None
Guido van Rossum23acc951994-02-21 16:36:04 +000085 line = self.file.readline()
86 if self.debuglevel > 0: print 'reply:', `line`
87 if replyprog.match(line) < 0:
88 self.headers = None
89 return -1, line, self.headers
90 errcode, errmsg = replyprog.group(1, 2)
91 errcode = string.atoi(errcode)
92 errmsg = string.strip(errmsg)
Guido van Rossum4cdcef71995-06-22 18:48:48 +000093 self.headers = rfc822.Message(self.file, 0)
Guido van Rossum23acc951994-02-21 16:36:04 +000094 return errcode, errmsg, self.headers
95
96 def getfile(self):
97 return self.file
98
99
100def test():
101 import sys
102 import getopt
103 opts, args = getopt.getopt(sys.argv[1:], 'd')
104 dl = 0
105 for o, a in opts:
106 if o == '-d': dl = dl + 1
107 host = 'www.cwi.nl:80'
108 selector = '/index.html'
109 if args[0:]: host = args[0]
110 if args[1:]: selector = args[1]
111 h = HTTP()
112 h.set_debuglevel(dl)
113 h.connect(host)
114 h.putrequest('GET', selector)
115 errcode, errmsg, headers = h.getreply()
116 print 'errcode =', errcode
117 print 'headers =', headers
118 print 'errmsg =', errmsg
119 if headers:
120 for header in headers.headers: print string.strip(header)
121 print h.getfile().read()
122
123if __name__ == '__main__':
124 test()