blob: d789161e60a2cc0f470aaeca8e0af288034cdd97 [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Gopher protocol client interface."""
Guido van Rossum23acc951994-02-21 16:36:04 +00002
Skip Montanaro2dd42762001-01-23 15:35:05 +00003__all__ = ["send_selector","send_query"]
4
Georg Brandlb86a54f2006-02-17 11:29:04 +00005import warnings
Tim Peters24f7cad2006-02-18 04:00:23 +00006warnings.warn("the gopherlib module is deprecated", DeprecationWarning,
7 stacklevel=2)
Georg Brandlb86a54f2006-02-17 11:29:04 +00008
Guido van Rossum23acc951994-02-21 16:36:04 +00009# Default selector, host and port
10DEF_SELECTOR = '1/'
11DEF_HOST = 'gopher.micro.umn.edu'
12DEF_PORT = 70
13
14# Recognized file types
15A_TEXT = '0'
16A_MENU = '1'
17A_CSO = '2'
18A_ERROR = '3'
19A_MACBINHEX = '4'
20A_PCBINHEX = '5'
21A_UUENCODED = '6'
22A_INDEX = '7'
23A_TELNET = '8'
24A_BINARY = '9'
25A_DUPLICATE = '+'
26A_SOUND = 's'
27A_EVENT = 'e'
28A_CALENDAR = 'c'
29A_HTML = 'h'
30A_TN3270 = 'T'
31A_MIME = 'M'
32A_IMAGE = 'I'
33A_WHOIS = 'w'
34A_QUERY = 'q'
35A_GIF = 'g'
Guido van Rossum54f22ed2000-02-04 15:10:34 +000036A_HTML = 'h' # HTML file
37A_WWW = 'w' # WWW address
Guido van Rossum23acc951994-02-21 16:36:04 +000038A_PLUS_IMAGE = ':'
39A_PLUS_MOVIE = ';'
40A_PLUS_SOUND = '<'
41
42
Guido van Rossum23acc951994-02-21 16:36:04 +000043_names = dir()
Guido van Rossumd2dd9a81998-01-19 21:59:48 +000044_type_to_name_map = {}
Guido van Rossum23acc951994-02-21 16:36:04 +000045def type_to_name(gtype):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000046 """Map all file types to strings; unknown types become TYPE='x'."""
47 global _type_to_name_map
48 if _type_to_name_map=={}:
49 for name in _names:
50 if name[:2] == 'A_':
51 _type_to_name_map[eval(name)] = name[2:]
Raymond Hettinger54f02222002-06-01 14:18:47 +000052 if gtype in _type_to_name_map:
Guido van Rossum54f22ed2000-02-04 15:10:34 +000053 return _type_to_name_map[gtype]
Walter Dörwald70a6b492004-02-12 17:35:32 +000054 return 'TYPE=%r' % (gtype,)
Guido van Rossum23acc951994-02-21 16:36:04 +000055
56# Names for characters and strings
57CRLF = '\r\n'
58TAB = '\t'
59
Guido van Rossum2922c6d1994-05-06 14:28:19 +000060def send_selector(selector, host, port = 0):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000061 """Send a selector to a given host and port, return a file with the reply."""
62 import socket
Guido van Rossum54f22ed2000-02-04 15:10:34 +000063 if not port:
Eric S. Raymond19e6d622001-02-09 10:10:02 +000064 i = host.find(':')
Guido van Rossum54f22ed2000-02-04 15:10:34 +000065 if i >= 0:
Eric S. Raymond19e6d622001-02-09 10:10:02 +000066 host, port = host[:i], int(host[i+1:])
Guido van Rossum54f22ed2000-02-04 15:10:34 +000067 if not port:
68 port = DEF_PORT
69 elif type(port) == type(''):
Eric S. Raymond19e6d622001-02-09 10:10:02 +000070 port = int(port)
Guido van Rossum54f22ed2000-02-04 15:10:34 +000071 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Guido van Rossum93a7c0f2000-03-28 21:45:46 +000072 s.connect((host, port))
Martin v. Löwise12454f2002-02-16 23:06:19 +000073 s.sendall(selector + CRLF)
Guido van Rossum54f22ed2000-02-04 15:10:34 +000074 s.shutdown(1)
75 return s.makefile('rb')
Guido van Rossum23acc951994-02-21 16:36:04 +000076
Guido van Rossum2922c6d1994-05-06 14:28:19 +000077def send_query(selector, query, host, port = 0):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000078 """Send a selector and a query string."""
79 return send_selector(selector + '\t' + query, host, port)
Guido van Rossum23acc951994-02-21 16:36:04 +000080
Guido van Rossumd2dd9a81998-01-19 21:59:48 +000081def path_to_selector(path):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000082 """Takes a path as returned by urlparse and returns the appropriate selector."""
83 if path=="/":
84 return "/"
85 else:
86 return path[2:] # Cuts initial slash and data type identifier
Guido van Rossumd2dd9a81998-01-19 21:59:48 +000087
Guido van Rossumd2dd9a81998-01-19 21:59:48 +000088def path_to_datatype_name(path):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000089 """Takes a path as returned by urlparse and maps it to a string.
90 See section 3.4 of RFC 1738 for details."""
91 if path=="/":
92 # No way to tell, although "INDEX" is likely
93 return "TYPE='unknown'"
94 else:
95 return type_to_name(path[1])
Guido van Rossum8ca84201998-03-26 20:56:10 +000096
Guido van Rossum23acc951994-02-21 16:36:04 +000097# The following functions interpret the data returned by the gopher
98# server according to the expected type, e.g. textfile or directory
99
Guido van Rossum23acc951994-02-21 16:36:04 +0000100def get_directory(f):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000101 """Get a directory in the form of a list of entries."""
Walter Dörwalde46d14c2003-09-22 12:43:16 +0000102 entries = []
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000103 while 1:
104 line = f.readline()
105 if not line:
106 print '(Unexpected EOF from server)'
107 break
108 if line[-2:] == CRLF:
109 line = line[:-2]
110 elif line[-1:] in CRLF:
111 line = line[:-1]
112 if line == '.':
113 break
114 if not line:
115 print '(Empty line from server)'
116 continue
117 gtype = line[0]
Eric S. Raymond19e6d622001-02-09 10:10:02 +0000118 parts = line[1:].split(TAB)
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000119 if len(parts) < 4:
Walter Dörwald70a6b492004-02-12 17:35:32 +0000120 print '(Bad line from server: %r)' % (line,)
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000121 continue
122 if len(parts) > 4:
123 if parts[4:] != ['+']:
124 print '(Extra info from server:',
125 print parts[4:], ')'
126 else:
127 parts.append('')
128 parts.insert(0, gtype)
Walter Dörwalde46d14c2003-09-22 12:43:16 +0000129 entries.append(parts)
130 return entries
Guido van Rossum23acc951994-02-21 16:36:04 +0000131
Guido van Rossum23acc951994-02-21 16:36:04 +0000132def get_textfile(f):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000133 """Get a text file as a list of lines, with trailing CRLF stripped."""
Walter Dörwalde46d14c2003-09-22 12:43:16 +0000134 lines = []
135 get_alt_textfile(f, lines.append)
136 return lines
Guido van Rossum23acc951994-02-21 16:36:04 +0000137
Guido van Rossum23acc951994-02-21 16:36:04 +0000138def get_alt_textfile(f, func):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000139 """Get a text file and pass each line to a function, with trailing CRLF stripped."""
140 while 1:
141 line = f.readline()
142 if not line:
143 print '(Unexpected EOF from server)'
144 break
145 if line[-2:] == CRLF:
146 line = line[:-2]
147 elif line[-1:] in CRLF:
148 line = line[:-1]
149 if line == '.':
150 break
151 if line[:2] == '..':
152 line = line[1:]
153 func(line)
Guido van Rossum23acc951994-02-21 16:36:04 +0000154
Guido van Rossum23acc951994-02-21 16:36:04 +0000155def get_binary(f):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000156 """Get a binary file as one solid data block."""
157 data = f.read()
158 return data
Guido van Rossum23acc951994-02-21 16:36:04 +0000159
Guido van Rossum23acc951994-02-21 16:36:04 +0000160def get_alt_binary(f, func, blocksize):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000161 """Get a binary file and pass each block to a function."""
162 while 1:
163 data = f.read(blocksize)
164 if not data:
165 break
166 func(data)
Guido van Rossum23acc951994-02-21 16:36:04 +0000167
Guido van Rossum23acc951994-02-21 16:36:04 +0000168def test():
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000169 """Trivial test program."""
170 import sys
171 import getopt
172 opts, args = getopt.getopt(sys.argv[1:], '')
173 selector = DEF_SELECTOR
174 type = selector[0]
175 host = DEF_HOST
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000176 if args:
177 host = args[0]
178 args = args[1:]
179 if args:
180 type = args[0]
181 args = args[1:]
182 if len(type) > 1:
183 type, selector = type[0], type
184 else:
185 selector = ''
186 if args:
187 selector = args[0]
188 args = args[1:]
189 query = ''
190 if args:
191 query = args[0]
192 args = args[1:]
193 if type == A_INDEX:
194 f = send_query(selector, query, host)
195 else:
196 f = send_selector(selector, host)
197 if type == A_TEXT:
Walter Dörwalde46d14c2003-09-22 12:43:16 +0000198 lines = get_textfile(f)
199 for item in lines: print item
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000200 elif type in (A_MENU, A_INDEX):
Walter Dörwalde46d14c2003-09-22 12:43:16 +0000201 entries = get_directory(f)
202 for item in entries: print item
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000203 else:
204 data = get_binary(f)
Walter Dörwald70a6b492004-02-12 17:35:32 +0000205 print 'binary data:', len(data), 'bytes:', repr(data[:100])[:40]
Guido van Rossum23acc951994-02-21 16:36:04 +0000206
207# Run the test when run as script
208if __name__ == '__main__':
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000209 test()