blob: 4635f1d8d6e3c200ba9f2ca48471215a4be31b65 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossumb25c4021994-05-27 13:32:41 +00002#######################################################################
3# Newslist $Revision$
4#
5# Syntax:
6# newslist [ -a ]
7#
Tim Peterse6ddc8b2004-07-18 05:56:09 +00008# This is a program to create a directory full of HTML pages
Guido van Rossumb25c4021994-05-27 13:32:41 +00009# which between them contain links to all the newsgroups available
10# on your server.
11#
Tim Peterse6ddc8b2004-07-18 05:56:09 +000012# The -a option causes a complete list of all groups to be read from
Guido van Rossumb25c4021994-05-27 13:32:41 +000013# the server rather than just the ones which have appeared since last
14# execution. This recreates the local list from scratch. Use this on
Guido van Rossume6b79791994-05-27 13:33:17 +000015# the first invocation of the program, and from time to time thereafter.
Tim Peterse6ddc8b2004-07-18 05:56:09 +000016# When new groups are first created they may appear on your server as
Guido van Rossume6b79791994-05-27 13:33:17 +000017# empty groups. By default, empty groups are ignored by the -a option.
18# However, these new groups will not be created again, and so will not
19# appear in the server's list of 'new groups' at a later date. Hence it
20# won't appear until you do a '-a' after some articles have appeared.
Tim Peterse6ddc8b2004-07-18 05:56:09 +000021#
Guido van Rossume6b79791994-05-27 13:33:17 +000022# I should really keep a list of ignored empty groups and re-check them
23# for articles on every run, but I haven't got around to it yet.
Guido van Rossumb25c4021994-05-27 13:32:41 +000024#
25# This assumes an NNTP news feed.
26#
Tim Peterse6ddc8b2004-07-18 05:56:09 +000027# Feel free to copy, distribute and modify this code for
28# non-commercial use. If you make any useful modifications, let me
Guido van Rossumb25c4021994-05-27 13:32:41 +000029# know!
30#
31# (c) Quentin Stafford-Fraser 1994
32# fraser@europarc.xerox.com qs101@cl.cam.ac.uk
33# #
34#######################################################################
35import sys,nntplib, string, marshal, time, os, posix, string
36
37#######################################################################
38# Check these variables before running! #
39
40# Top directory.
41# Filenames which don't start with / are taken as being relative to this.
Guido van Rossume6b79791994-05-27 13:33:17 +000042topdir='/anfs/qsbigdisc/web/html/newspage'
Guido van Rossumb25c4021994-05-27 13:32:41 +000043
44# The name of your NNTP host
Tim Peterse6ddc8b2004-07-18 05:56:09 +000045# eg.
Guido van Rossume6b79791994-05-27 13:33:17 +000046# newshost = 'nntp-serv.cl.cam.ac.uk'
Tim Peterse6ddc8b2004-07-18 05:56:09 +000047# or use following to get the name from the NNTPSERVER environment
Guido van Rossumb25c4021994-05-27 13:32:41 +000048# variable:
Guido van Rossume6b79791994-05-27 13:33:17 +000049# newshost = posix.environ['NNTPSERVER']
50newshost = 'nntp-serv.cl.cam.ac.uk'
Guido van Rossumb25c4021994-05-27 13:32:41 +000051
52# The filename for a local cache of the newsgroup list
53treefile = 'grouptree'
54
55# The filename for descriptions of newsgroups
56# I found a suitable one at ftp.uu.net in /uunet-info/newgroups.gz
57# You can set this to '' if you don't wish to use one.
Guido van Rossume6b79791994-05-27 13:33:17 +000058descfile = 'newsgroups'
Guido van Rossumb25c4021994-05-27 13:32:41 +000059
60# The directory in which HTML pages should be created
61# eg.
62# pagedir = '/usr/local/lib/html/newspage'
Tim Peterse6ddc8b2004-07-18 05:56:09 +000063# pagedir = 'pages'
Guido van Rossumb25c4021994-05-27 13:32:41 +000064pagedir = topdir
65
66# The html prefix which will refer to this directory
Tim Peterse6ddc8b2004-07-18 05:56:09 +000067# eg.
68# httppref = '/newspage/',
Guido van Rossume6b79791994-05-27 13:33:17 +000069# or leave blank for relative links between pages: (Recommended)
70# httppref = ''
Guido van Rossumb25c4021994-05-27 13:32:41 +000071httppref = ''
72
Tim Peterse6ddc8b2004-07-18 05:56:09 +000073# The name of the 'root' news page in this directory.
Guido van Rossumb25c4021994-05-27 13:32:41 +000074# A .html suffix will be added.
Guido van Rossume6b79791994-05-27 13:33:17 +000075rootpage = 'root'
Guido van Rossumb25c4021994-05-27 13:32:41 +000076
77# Set skipempty to 0 if you wish to see links to empty groups as well.
78# Only affects the -a option.
Guido van Rossume6b79791994-05-27 13:33:17 +000079skipempty = 1
80
81# pagelinkicon can contain html to put an icon after links to
82# further pages. This helps to make important links stand out.
83# Set to '' if not wanted, or '...' is quite a good one.
84pagelinkicon='... <img src="http://pelican.cl.cam.ac.uk/icons/page.xbm"> '
Guido van Rossumb25c4021994-05-27 13:32:41 +000085
86# ---------------------------------------------------------------------
87# Less important personal preferences:
88
89# Sublistsize controls the maximum number of items the will appear as
90# an indented sub-list before the whole thing is moved onto a different
Tim Peterse6ddc8b2004-07-18 05:56:09 +000091# page. The smaller this is, the more pages you will have, but the
Guido van Rossumb25c4021994-05-27 13:32:41 +000092# shorter each will be.
93sublistsize = 4
94
95# That should be all. #
96#######################################################################
97
Guido van Rossum9af22a01994-08-19 15:02:57 +000098for dir in os.curdir, os.environ['HOME']:
Guido van Rossum4117e541998-09-14 16:44:15 +000099 rcfile = os.path.join(dir, '.newslistrc.py')
100 if os.path.exists(rcfile):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000101 print(rcfile)
Guido van Rossum4117e541998-09-14 16:44:15 +0000102 execfile(rcfile)
103 break
Guido van Rossum9af22a01994-08-19 15:02:57 +0000104
Guido van Rossumb25c4021994-05-27 13:32:41 +0000105from nntplib import NNTP
106from stat import *
107
Guido van Rossumba179051997-12-09 19:39:12 +0000108rcsrev = '$Revision$'
Collin Winter6f2df4d2007-07-17 20:59:35 +0000109rcsrev = string.join([s for s in string.split(rcsrev) if '$' not in s])
Guido van Rossumb25c4021994-05-27 13:32:41 +0000110desc = {}
111
112# Make (possibly) relative filenames into absolute ones
113treefile = os.path.join(topdir,treefile)
114descfile = os.path.join(topdir,descfile)
115page = os.path.join(topdir,pagedir)
116
117# First the bits for creating trees ---------------------------
118
119# Addtotree creates/augments a tree from a list of group names
120def addtotree(tree, groups):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000121 print('Updating tree...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000122 for i in groups:
Guido van Rossum4117e541998-09-14 16:44:15 +0000123 parts = string.splitfields(i,'.')
124 makeleaf(tree, parts)
Guido van Rossumb25c4021994-05-27 13:32:41 +0000125
126# Makeleaf makes a leaf and the branch leading to it if necessary
127def makeleaf(tree,path):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000128 j = path[0]
129 l = len(path)
Guido van Rossumb25c4021994-05-27 13:32:41 +0000130
Collin Winter6f2df4d2007-07-17 20:59:35 +0000131 if j not in tree:
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000132 tree[j] = {}
133 if l == 1:
134 tree[j]['.'] = '.'
135 if l > 1:
136 makeleaf(tree[j],path[1:])
Guido van Rossumb25c4021994-05-27 13:32:41 +0000137
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000138# Then the bits for outputting trees as pages ----------------
Guido van Rossumb25c4021994-05-27 13:32:41 +0000139
140# Createpage creates an HTML file named <root>.html containing links
141# to those groups beginning with <root>.
142
143def createpage(root, tree, p):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000144 filename = os.path.join(pagedir,root+'.html')
145 if root == rootpage:
146 detail = ''
147 else:
148 detail = ' under ' + root
149 f = open(filename,'w')
150 # f.write('Content-Type: text/html\n')
151 f.write('<TITLE>Newsgroups available' + detail + '</TITLE>\n')
152 f.write('<H1>Newsgroups available' + detail +'</H1>\n')
153 f.write('<A HREF="'+httppref+rootpage+'.html">Back to top level</A><P>\n')
154 printtree(f,tree,0,p)
155 f.write('<I>This page automatically created by \'newslist\' v. '+rcsrev+'.')
156 f.write(time.ctime(time.time()) + '</I><P>')
157 f.close()
Guido van Rossumb25c4021994-05-27 13:32:41 +0000158
159# Printtree prints the groups as a bulleted list. Groups with
160# more than <sublistsize> subgroups will be put on a separate page.
161# Other sets of subgroups are just indented.
162
163def printtree(f, tree, indent, p):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000164 global desc
165 l = len(tree)
Guido van Rossumb25c4021994-05-27 13:32:41 +0000166
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000167 if l > sublistsize and indent>0:
168 # Create a new page and a link to it
169 f.write('<LI><B><A HREF="'+httppref+p[1:]+'.html">')
170 f.write(p[1:]+'.*')
171 f.write('</A></B>'+pagelinkicon+'\n')
172 createpage(p[1:], tree, p)
173 return
Guido van Rossumb25c4021994-05-27 13:32:41 +0000174
Collin Winter6f2df4d2007-07-17 20:59:35 +0000175 kl = list(tree.keys())
Guido van Rossumb25c4021994-05-27 13:32:41 +0000176
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000177 if l > 1:
178 kl.sort()
179 if indent > 0:
180 # Create a sub-list
181 f.write('<LI>'+p[1:]+'\n<UL>')
182 else:
183 # Create a main list
184 f.write('<UL>')
185 indent = indent + 1
Guido van Rossumb25c4021994-05-27 13:32:41 +0000186
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000187 for i in kl:
188 if i == '.':
189 # Output a newsgroup
190 f.write('<LI><A HREF="news:' + p[1:] + '">'+ p[1:] + '</A> ')
Collin Winter6f2df4d2007-07-17 20:59:35 +0000191 if p[1:] in desc:
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000192 f.write(' <I>'+desc[p[1:]]+'</I>\n')
193 else:
194 f.write('\n')
195 else:
196 # Output a hierarchy
197 printtree(f,tree[i], indent, p+'.'+i)
198
199 if l > 1:
200 f.write('\n</UL>')
Guido van Rossumb25c4021994-05-27 13:32:41 +0000201
202# Reading descriptions file ---------------------------------------
203
204# This returns an array mapping group name to its description
205
Guido van Rossume6b79791994-05-27 13:33:17 +0000206def readdesc(descfile):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000207 global desc
Guido van Rossumb25c4021994-05-27 13:32:41 +0000208
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000209 desc = {}
Guido van Rossumb25c4021994-05-27 13:32:41 +0000210
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000211 if descfile == '':
Guido van Rossum4117e541998-09-14 16:44:15 +0000212 return
Guido van Rossumb25c4021994-05-27 13:32:41 +0000213
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000214 try:
215 d = open(descfile, 'r')
Collin Winter6f2df4d2007-07-17 20:59:35 +0000216 print('Reading descriptions...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000217 except (IOError):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000218 print('Failed to open description file ' + descfile)
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000219 return
220 l = d.readline()
221 while l != '':
222 bits = string.split(l)
223 try:
224 grp = bits[0]
225 dsc = string.join(bits[1:])
226 if len(dsc)>1:
227 desc[grp] = dsc
228 except (IndexError):
229 pass
230 l = d.readline()
Guido van Rossumb25c4021994-05-27 13:32:41 +0000231
Guido van Rossume6b79791994-05-27 13:33:17 +0000232# Check that ouput directory exists, ------------------------------
233# and offer to create it if not
Guido van Rossumb25c4021994-05-27 13:32:41 +0000234
Guido van Rossume6b79791994-05-27 13:33:17 +0000235def checkopdir(pagedir):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000236 if not os.path.isdir(pagedir):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000237 print('Directory '+pagedir+' does not exist.')
238 print('Shall I create it for you? (y/n)')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000239 if sys.stdin.readline()[0] == 'y':
240 try:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000241 os.mkdir(pagedir,0o777)
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000242 except:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000243 print('Sorry - failed!')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000244 sys.exit(1)
245 else:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000246 print('OK. Exiting.')
Guido van Rossum4117e541998-09-14 16:44:15 +0000247 sys.exit(1)
Guido van Rossumb25c4021994-05-27 13:32:41 +0000248
Guido van Rossume6b79791994-05-27 13:33:17 +0000249# Read and write current local tree ----------------------------------
Guido van Rossumb25c4021994-05-27 13:32:41 +0000250
Guido van Rossume6b79791994-05-27 13:33:17 +0000251def readlocallist(treefile):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000252 print('Reading current local group list...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000253 tree = {}
254 try:
255 treetime = time.localtime(os.stat(treefile)[ST_MTIME])
256 except:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000257 print('\n*** Failed to open local group cache '+treefile)
258 print('If this is the first time you have run newslist, then')
259 print('use the -a option to create it.')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000260 sys.exit(1)
261 treedate = '%02d%02d%02d' % (treetime[0] % 100 ,treetime[1], treetime[2])
262 try:
263 dump = open(treefile,'r')
264 tree = marshal.load(dump)
265 dump.close()
266 except (IOError):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000267 print('Cannot open local group list ' + treefile)
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000268 return (tree, treedate)
Guido van Rossumb25c4021994-05-27 13:32:41 +0000269
Guido van Rossume6b79791994-05-27 13:33:17 +0000270def writelocallist(treefile, tree):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000271 try:
272 dump = open(treefile,'w')
273 groups = marshal.dump(tree,dump)
274 dump.close()
Collin Winter6f2df4d2007-07-17 20:59:35 +0000275 print('Saved list to '+treefile+'\n')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000276 except:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000277 print('Sorry - failed to write to local group cache '+treefile)
278 print('Does it (or its directory) have the correct permissions?')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000279 sys.exit(1)
Guido van Rossume6b79791994-05-27 13:33:17 +0000280
281# Return list of all groups on server -----------------------------
282
283def getallgroups(server):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000284 print('Getting list of all groups...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000285 treedate='010101'
286 info = server.list()[1]
287 groups = []
Collin Winter6f2df4d2007-07-17 20:59:35 +0000288 print('Processing...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000289 if skipempty:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000290 print('\nIgnoring following empty groups:')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000291 for i in info:
292 grpname = string.split(i[0])[0]
293 if skipempty and string.atoi(i[1]) < string.atoi(i[2]):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000294 print(grpname+' ', end=' ')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000295 else:
296 groups.append(grpname)
Collin Winter6f2df4d2007-07-17 20:59:35 +0000297 print('\n')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000298 if skipempty:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000299 print('(End of empty groups)')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000300 return groups
Guido van Rossume6b79791994-05-27 13:33:17 +0000301
302# Return list of new groups on server -----------------------------
303
304def getnewgroups(server, treedate):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000305 print('Getting list of new groups since start of '+treedate+'...', end=' ')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000306 info = server.newgroups(treedate,'000001')[1]
Collin Winter6f2df4d2007-07-17 20:59:35 +0000307 print('got %d.' % len(info))
308 print('Processing...', end=' ')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000309 groups = []
310 for i in info:
311 grpname = string.split(i)[0]
312 groups.append(grpname)
Collin Winter6f2df4d2007-07-17 20:59:35 +0000313 print('Done')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000314 return groups
Guido van Rossume6b79791994-05-27 13:33:17 +0000315
316# Now the main program --------------------------------------------
317
318def main():
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000319 global desc
Guido van Rossume6b79791994-05-27 13:33:17 +0000320
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000321 tree={}
Guido van Rossume6b79791994-05-27 13:33:17 +0000322
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000323 # Check that the output directory exists
324 checkopdir(pagedir);
Guido van Rossume6b79791994-05-27 13:33:17 +0000325
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000326 try:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000327 print('Connecting to '+newshost+'...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000328 if sys.version[0] == '0':
329 s = NNTP.init(newshost)
330 else:
331 s = NNTP(newshost)
332 connected = 1
Guido van Rossumb940e112007-01-10 16:19:56 +0000333 except (nntplib.error_temp, nntplib.error_perm) as x:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000334 print('Error connecting to host:', x)
335 print('I\'ll try to use just the local list.')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000336 connected = 0
Guido van Rossume6b79791994-05-27 13:33:17 +0000337
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000338 # If -a is specified, read the full list of groups from server
339 if connected and len(sys.argv) > 1 and sys.argv[1] == '-a':
Guido van Rossume6b79791994-05-27 13:33:17 +0000340
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000341 groups = getallgroups(s)
Guido van Rossume6b79791994-05-27 13:33:17 +0000342
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000343 # Otherwise just read the local file and then add
344 # groups created since local file last modified.
345 else:
Guido van Rossume6b79791994-05-27 13:33:17 +0000346
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000347 (tree, treedate) = readlocallist(treefile)
348 if connected:
349 groups = getnewgroups(s, treedate)
Guido van Rossumb25c4021994-05-27 13:32:41 +0000350
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000351 if connected:
352 addtotree(tree, groups)
353 writelocallist(treefile,tree)
Guido van Rossume6b79791994-05-27 13:33:17 +0000354
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000355 # Read group descriptions
356 readdesc(descfile)
357
Collin Winter6f2df4d2007-07-17 20:59:35 +0000358 print('Creating pages...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000359 createpage(rootpage, tree, '')
Collin Winter6f2df4d2007-07-17 20:59:35 +0000360 print('Done')
Guido van Rossumb25c4021994-05-27 13:32:41 +0000361
Johannes Gijsbers7a8c43e2004-09-11 16:34:35 +0000362if __name__ == "__main__":
363 main()
Guido van Rossumb25c4021994-05-27 13:32:41 +0000364
365# That's all folks
366######################################################################