blob: 226eda6e9826303e0865ba9156e9b97559dc4726 [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Gopher protocol client interface."""
Guido van Rossum23acc951994-02-21 16:36:04 +00002
Skip Montanaro2dd42762001-01-23 15:35:05 +00003__all__ = ["send_selector","send_query"]
4
Georg Brandlb86a54f2006-02-17 11:29:04 +00005import warnings
6warnings.warn("the gopherlib module is deprecated", DeprecationWarning)
7
Guido van Rossum23acc951994-02-21 16:36:04 +00008# Default selector, host and port
9DEF_SELECTOR = '1/'
10DEF_HOST = 'gopher.micro.umn.edu'
11DEF_PORT = 70
12
13# Recognized file types
14A_TEXT = '0'
15A_MENU = '1'
16A_CSO = '2'
17A_ERROR = '3'
18A_MACBINHEX = '4'
19A_PCBINHEX = '5'
20A_UUENCODED = '6'
21A_INDEX = '7'
22A_TELNET = '8'
23A_BINARY = '9'
24A_DUPLICATE = '+'
25A_SOUND = 's'
26A_EVENT = 'e'
27A_CALENDAR = 'c'
28A_HTML = 'h'
29A_TN3270 = 'T'
30A_MIME = 'M'
31A_IMAGE = 'I'
32A_WHOIS = 'w'
33A_QUERY = 'q'
34A_GIF = 'g'
Guido van Rossum54f22ed2000-02-04 15:10:34 +000035A_HTML = 'h' # HTML file
36A_WWW = 'w' # WWW address
Guido van Rossum23acc951994-02-21 16:36:04 +000037A_PLUS_IMAGE = ':'
38A_PLUS_MOVIE = ';'
39A_PLUS_SOUND = '<'
40
41
Guido van Rossum23acc951994-02-21 16:36:04 +000042_names = dir()
Guido van Rossumd2dd9a81998-01-19 21:59:48 +000043_type_to_name_map = {}
Guido van Rossum23acc951994-02-21 16:36:04 +000044def type_to_name(gtype):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000045 """Map all file types to strings; unknown types become TYPE='x'."""
46 global _type_to_name_map
47 if _type_to_name_map=={}:
48 for name in _names:
49 if name[:2] == 'A_':
50 _type_to_name_map[eval(name)] = name[2:]
Raymond Hettinger54f02222002-06-01 14:18:47 +000051 if gtype in _type_to_name_map:
Guido van Rossum54f22ed2000-02-04 15:10:34 +000052 return _type_to_name_map[gtype]
Walter Dörwald70a6b492004-02-12 17:35:32 +000053 return 'TYPE=%r' % (gtype,)
Guido van Rossum23acc951994-02-21 16:36:04 +000054
55# Names for characters and strings
56CRLF = '\r\n'
57TAB = '\t'
58
Guido van Rossum2922c6d1994-05-06 14:28:19 +000059def send_selector(selector, host, port = 0):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000060 """Send a selector to a given host and port, return a file with the reply."""
61 import socket
Guido van Rossum54f22ed2000-02-04 15:10:34 +000062 if not port:
Eric S. Raymond19e6d622001-02-09 10:10:02 +000063 i = host.find(':')
Guido van Rossum54f22ed2000-02-04 15:10:34 +000064 if i >= 0:
Eric S. Raymond19e6d622001-02-09 10:10:02 +000065 host, port = host[:i], int(host[i+1:])
Guido van Rossum54f22ed2000-02-04 15:10:34 +000066 if not port:
67 port = DEF_PORT
68 elif type(port) == type(''):
Eric S. Raymond19e6d622001-02-09 10:10:02 +000069 port = int(port)
Guido van Rossum54f22ed2000-02-04 15:10:34 +000070 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Guido van Rossum93a7c0f2000-03-28 21:45:46 +000071 s.connect((host, port))
Martin v. Löwise12454f2002-02-16 23:06:19 +000072 s.sendall(selector + CRLF)
Guido van Rossum54f22ed2000-02-04 15:10:34 +000073 s.shutdown(1)
74 return s.makefile('rb')
Guido van Rossum23acc951994-02-21 16:36:04 +000075
Guido van Rossum2922c6d1994-05-06 14:28:19 +000076def send_query(selector, query, host, port = 0):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000077 """Send a selector and a query string."""
78 return send_selector(selector + '\t' + query, host, port)
Guido van Rossum23acc951994-02-21 16:36:04 +000079
Guido van Rossumd2dd9a81998-01-19 21:59:48 +000080def path_to_selector(path):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000081 """Takes a path as returned by urlparse and returns the appropriate selector."""
82 if path=="/":
83 return "/"
84 else:
85 return path[2:] # Cuts initial slash and data type identifier
Guido van Rossumd2dd9a81998-01-19 21:59:48 +000086
Guido van Rossumd2dd9a81998-01-19 21:59:48 +000087def path_to_datatype_name(path):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000088 """Takes a path as returned by urlparse and maps it to a string.
89 See section 3.4 of RFC 1738 for details."""
90 if path=="/":
91 # No way to tell, although "INDEX" is likely
92 return "TYPE='unknown'"
93 else:
94 return type_to_name(path[1])
Guido van Rossum8ca84201998-03-26 20:56:10 +000095
Guido van Rossum23acc951994-02-21 16:36:04 +000096# The following functions interpret the data returned by the gopher
97# server according to the expected type, e.g. textfile or directory
98
Guido van Rossum23acc951994-02-21 16:36:04 +000099def get_directory(f):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000100 """Get a directory in the form of a list of entries."""
Walter Dörwalde46d14c2003-09-22 12:43:16 +0000101 entries = []
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000102 while 1:
103 line = f.readline()
104 if not line:
105 print '(Unexpected EOF from server)'
106 break
107 if line[-2:] == CRLF:
108 line = line[:-2]
109 elif line[-1:] in CRLF:
110 line = line[:-1]
111 if line == '.':
112 break
113 if not line:
114 print '(Empty line from server)'
115 continue
116 gtype = line[0]
Eric S. Raymond19e6d622001-02-09 10:10:02 +0000117 parts = line[1:].split(TAB)
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000118 if len(parts) < 4:
Walter Dörwald70a6b492004-02-12 17:35:32 +0000119 print '(Bad line from server: %r)' % (line,)
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000120 continue
121 if len(parts) > 4:
122 if parts[4:] != ['+']:
123 print '(Extra info from server:',
124 print parts[4:], ')'
125 else:
126 parts.append('')
127 parts.insert(0, gtype)
Walter Dörwalde46d14c2003-09-22 12:43:16 +0000128 entries.append(parts)
129 return entries
Guido van Rossum23acc951994-02-21 16:36:04 +0000130
Guido van Rossum23acc951994-02-21 16:36:04 +0000131def get_textfile(f):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000132 """Get a text file as a list of lines, with trailing CRLF stripped."""
Walter Dörwalde46d14c2003-09-22 12:43:16 +0000133 lines = []
134 get_alt_textfile(f, lines.append)
135 return lines
Guido van Rossum23acc951994-02-21 16:36:04 +0000136
Guido van Rossum23acc951994-02-21 16:36:04 +0000137def get_alt_textfile(f, func):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000138 """Get a text file and pass each line to a function, with trailing CRLF stripped."""
139 while 1:
140 line = f.readline()
141 if not line:
142 print '(Unexpected EOF from server)'
143 break
144 if line[-2:] == CRLF:
145 line = line[:-2]
146 elif line[-1:] in CRLF:
147 line = line[:-1]
148 if line == '.':
149 break
150 if line[:2] == '..':
151 line = line[1:]
152 func(line)
Guido van Rossum23acc951994-02-21 16:36:04 +0000153
Guido van Rossum23acc951994-02-21 16:36:04 +0000154def get_binary(f):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000155 """Get a binary file as one solid data block."""
156 data = f.read()
157 return data
Guido van Rossum23acc951994-02-21 16:36:04 +0000158
Guido van Rossum23acc951994-02-21 16:36:04 +0000159def get_alt_binary(f, func, blocksize):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000160 """Get a binary file and pass each block to a function."""
161 while 1:
162 data = f.read(blocksize)
163 if not data:
164 break
165 func(data)
Guido van Rossum23acc951994-02-21 16:36:04 +0000166
Guido van Rossum23acc951994-02-21 16:36:04 +0000167def test():
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000168 """Trivial test program."""
169 import sys
170 import getopt
171 opts, args = getopt.getopt(sys.argv[1:], '')
172 selector = DEF_SELECTOR
173 type = selector[0]
174 host = DEF_HOST
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000175 if args:
176 host = args[0]
177 args = args[1:]
178 if args:
179 type = args[0]
180 args = args[1:]
181 if len(type) > 1:
182 type, selector = type[0], type
183 else:
184 selector = ''
185 if args:
186 selector = args[0]
187 args = args[1:]
188 query = ''
189 if args:
190 query = args[0]
191 args = args[1:]
192 if type == A_INDEX:
193 f = send_query(selector, query, host)
194 else:
195 f = send_selector(selector, host)
196 if type == A_TEXT:
Walter Dörwalde46d14c2003-09-22 12:43:16 +0000197 lines = get_textfile(f)
198 for item in lines: print item
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000199 elif type in (A_MENU, A_INDEX):
Walter Dörwalde46d14c2003-09-22 12:43:16 +0000200 entries = get_directory(f)
201 for item in entries: print item
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000202 else:
203 data = get_binary(f)
Walter Dörwald70a6b492004-02-12 17:35:32 +0000204 print 'binary data:', len(data), 'bytes:', repr(data[:100])[:40]
Guido van Rossum23acc951994-02-21 16:36:04 +0000205
206# Run the test when run as script
207if __name__ == '__main__':
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000208 test()