blob: 9cea1b4d20cb97e64e2f194ec9fd13a392622c09 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossumb25c4021994-05-27 13:32:41 +00002#######################################################################
3# Newslist $Revision$
4#
5# Syntax:
6# newslist [ -a ]
7#
Tim Peterse6ddc8b2004-07-18 05:56:09 +00008# This is a program to create a directory full of HTML pages
Guido van Rossumb25c4021994-05-27 13:32:41 +00009# which between them contain links to all the newsgroups available
10# on your server.
11#
Tim Peterse6ddc8b2004-07-18 05:56:09 +000012# The -a option causes a complete list of all groups to be read from
Guido van Rossumb25c4021994-05-27 13:32:41 +000013# the server rather than just the ones which have appeared since last
14# execution. This recreates the local list from scratch. Use this on
Guido van Rossume6b79791994-05-27 13:33:17 +000015# the first invocation of the program, and from time to time thereafter.
Tim Peterse6ddc8b2004-07-18 05:56:09 +000016# When new groups are first created they may appear on your server as
Guido van Rossume6b79791994-05-27 13:33:17 +000017# empty groups. By default, empty groups are ignored by the -a option.
18# However, these new groups will not be created again, and so will not
19# appear in the server's list of 'new groups' at a later date. Hence it
20# won't appear until you do a '-a' after some articles have appeared.
Tim Peterse6ddc8b2004-07-18 05:56:09 +000021#
Guido van Rossume6b79791994-05-27 13:33:17 +000022# I should really keep a list of ignored empty groups and re-check them
23# for articles on every run, but I haven't got around to it yet.
Guido van Rossumb25c4021994-05-27 13:32:41 +000024#
25# This assumes an NNTP news feed.
26#
Tim Peterse6ddc8b2004-07-18 05:56:09 +000027# Feel free to copy, distribute and modify this code for
28# non-commercial use. If you make any useful modifications, let me
Guido van Rossumb25c4021994-05-27 13:32:41 +000029# know!
30#
31# (c) Quentin Stafford-Fraser 1994
32# fraser@europarc.xerox.com qs101@cl.cam.ac.uk
33# #
34#######################################################################
Georg Brandl22fff432009-10-27 20:19:02 +000035import sys, nntplib, marshal, time, os
Guido van Rossumb25c4021994-05-27 13:32:41 +000036
37#######################################################################
38# Check these variables before running! #
39
40# Top directory.
41# Filenames which don't start with / are taken as being relative to this.
Georg Brandl22fff432009-10-27 20:19:02 +000042topdir = os.path.expanduser('~/newspage')
Guido van Rossumb25c4021994-05-27 13:32:41 +000043
44# The name of your NNTP host
Tim Peterse6ddc8b2004-07-18 05:56:09 +000045# eg.
Guido van Rossume6b79791994-05-27 13:33:17 +000046# newshost = 'nntp-serv.cl.cam.ac.uk'
Tim Peterse6ddc8b2004-07-18 05:56:09 +000047# or use following to get the name from the NNTPSERVER environment
Guido van Rossumb25c4021994-05-27 13:32:41 +000048# variable:
Georg Brandl22fff432009-10-27 20:19:02 +000049# newshost = os.environ['NNTPSERVER']
50newshost = 'news.example.com'
Guido van Rossumb25c4021994-05-27 13:32:41 +000051
52# The filename for a local cache of the newsgroup list
53treefile = 'grouptree'
54
55# The filename for descriptions of newsgroups
56# I found a suitable one at ftp.uu.net in /uunet-info/newgroups.gz
57# You can set this to '' if you don't wish to use one.
Guido van Rossume6b79791994-05-27 13:33:17 +000058descfile = 'newsgroups'
Guido van Rossumb25c4021994-05-27 13:32:41 +000059
60# The directory in which HTML pages should be created
61# eg.
62# pagedir = '/usr/local/lib/html/newspage'
Tim Peterse6ddc8b2004-07-18 05:56:09 +000063# pagedir = 'pages'
Guido van Rossumb25c4021994-05-27 13:32:41 +000064pagedir = topdir
65
66# The html prefix which will refer to this directory
Tim Peterse6ddc8b2004-07-18 05:56:09 +000067# eg.
68# httppref = '/newspage/',
Guido van Rossume6b79791994-05-27 13:33:17 +000069# or leave blank for relative links between pages: (Recommended)
70# httppref = ''
Guido van Rossumb25c4021994-05-27 13:32:41 +000071httppref = ''
72
Tim Peterse6ddc8b2004-07-18 05:56:09 +000073# The name of the 'root' news page in this directory.
Guido van Rossumb25c4021994-05-27 13:32:41 +000074# A .html suffix will be added.
Guido van Rossume6b79791994-05-27 13:33:17 +000075rootpage = 'root'
Guido van Rossumb25c4021994-05-27 13:32:41 +000076
77# Set skipempty to 0 if you wish to see links to empty groups as well.
78# Only affects the -a option.
Guido van Rossume6b79791994-05-27 13:33:17 +000079skipempty = 1
80
81# pagelinkicon can contain html to put an icon after links to
82# further pages. This helps to make important links stand out.
83# Set to '' if not wanted, or '...' is quite a good one.
Georg Brandl22fff432009-10-27 20:19:02 +000084pagelinkicon = '... <img src="http://pelican.cl.cam.ac.uk/icons/page.xbm"> '
Guido van Rossumb25c4021994-05-27 13:32:41 +000085
86# ---------------------------------------------------------------------
87# Less important personal preferences:
88
89# Sublistsize controls the maximum number of items the will appear as
90# an indented sub-list before the whole thing is moved onto a different
Tim Peterse6ddc8b2004-07-18 05:56:09 +000091# page. The smaller this is, the more pages you will have, but the
Guido van Rossumb25c4021994-05-27 13:32:41 +000092# shorter each will be.
93sublistsize = 4
94
95# That should be all. #
96#######################################################################
97
Guido van Rossum9af22a01994-08-19 15:02:57 +000098for dir in os.curdir, os.environ['HOME']:
Guido van Rossum4117e541998-09-14 16:44:15 +000099 rcfile = os.path.join(dir, '.newslistrc.py')
100 if os.path.exists(rcfile):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000101 print(rcfile)
Neal Norwitz01688022007-08-12 00:43:29 +0000102 exec(open(rcfile).read())
Guido van Rossum4117e541998-09-14 16:44:15 +0000103 break
Guido van Rossum9af22a01994-08-19 15:02:57 +0000104
Guido van Rossumb25c4021994-05-27 13:32:41 +0000105from nntplib import NNTP
106from stat import *
107
Guido van Rossumba179051997-12-09 19:39:12 +0000108rcsrev = '$Revision$'
Georg Brandl22fff432009-10-27 20:19:02 +0000109rcsrev = ' '.join([s for s in rcsrev.split() if '$' not in s])
Guido van Rossumb25c4021994-05-27 13:32:41 +0000110desc = {}
111
112# Make (possibly) relative filenames into absolute ones
113treefile = os.path.join(topdir,treefile)
114descfile = os.path.join(topdir,descfile)
115page = os.path.join(topdir,pagedir)
116
117# First the bits for creating trees ---------------------------
118
119# Addtotree creates/augments a tree from a list of group names
120def addtotree(tree, groups):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000121 print('Updating tree...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000122 for i in groups:
Georg Brandl22fff432009-10-27 20:19:02 +0000123 parts = i.split('.')
Guido van Rossum4117e541998-09-14 16:44:15 +0000124 makeleaf(tree, parts)
Guido van Rossumb25c4021994-05-27 13:32:41 +0000125
126# Makeleaf makes a leaf and the branch leading to it if necessary
127def makeleaf(tree,path):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000128 j = path[0]
129 l = len(path)
Guido van Rossumb25c4021994-05-27 13:32:41 +0000130
Collin Winter6f2df4d2007-07-17 20:59:35 +0000131 if j not in tree:
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000132 tree[j] = {}
133 if l == 1:
134 tree[j]['.'] = '.'
135 if l > 1:
136 makeleaf(tree[j],path[1:])
Guido van Rossumb25c4021994-05-27 13:32:41 +0000137
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000138# Then the bits for outputting trees as pages ----------------
Guido van Rossumb25c4021994-05-27 13:32:41 +0000139
140# Createpage creates an HTML file named <root>.html containing links
141# to those groups beginning with <root>.
142
143def createpage(root, tree, p):
Georg Brandl22fff432009-10-27 20:19:02 +0000144 filename = os.path.join(pagedir, root+'.html')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000145 if root == rootpage:
146 detail = ''
147 else:
148 detail = ' under ' + root
Georg Brandl22fff432009-10-27 20:19:02 +0000149 with open(filename, 'w') as f:
150 # f.write('Content-Type: text/html\n')
151 f.write('<html>\n<head>\n')
152 f.write('<title>Newsgroups available%s</title>\n' % detail)
153 f.write('</head>\n<body>\n')
154 f.write('<h1>Newsgroups available%s</h1>\n' % detail)
155 f.write('<a href="%s%s.html">Back to top level</a><p>\n' %
156 (httppref, rootpage))
157 printtree(f, tree, 0, p)
158 f.write('\n<p>')
159 f.write("<i>This page automatically created by 'newslist' v. %s." %
160 rcsrev)
161 f.write(time.ctime(time.time()) + '</i>\n')
162 f.write('</body>\n</html>\n')
Guido van Rossumb25c4021994-05-27 13:32:41 +0000163
164# Printtree prints the groups as a bulleted list. Groups with
165# more than <sublistsize> subgroups will be put on a separate page.
166# Other sets of subgroups are just indented.
167
168def printtree(f, tree, indent, p):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000169 l = len(tree)
Guido van Rossumb25c4021994-05-27 13:32:41 +0000170
Georg Brandl22fff432009-10-27 20:19:02 +0000171 if l > sublistsize and indent > 0:
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000172 # Create a new page and a link to it
Georg Brandl22fff432009-10-27 20:19:02 +0000173 f.write('<li><b><a href="%s%s.html">' % (httppref, p[1:]))
174 f.write(p[1:] + '.*')
175 f.write('</a></b>%s\n' % pagelinkicon)
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000176 createpage(p[1:], tree, p)
177 return
Guido van Rossumb25c4021994-05-27 13:32:41 +0000178
Skip Montanaro1e8ce582007-08-06 21:07:53 +0000179 kl = sorted(tree.keys())
Guido van Rossumb25c4021994-05-27 13:32:41 +0000180
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000181 if l > 1:
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000182 if indent > 0:
183 # Create a sub-list
Georg Brandl22fff432009-10-27 20:19:02 +0000184 f.write('<li>%s\n<ul>' % p[1:])
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000185 else:
186 # Create a main list
Georg Brandl22fff432009-10-27 20:19:02 +0000187 f.write('<ul>')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000188 indent = indent + 1
Guido van Rossumb25c4021994-05-27 13:32:41 +0000189
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000190 for i in kl:
191 if i == '.':
192 # Output a newsgroup
Georg Brandl22fff432009-10-27 20:19:02 +0000193 f.write('<li><a href="news:%s">%s</a> ' % (p[1:], p[1:]))
Collin Winter6f2df4d2007-07-17 20:59:35 +0000194 if p[1:] in desc:
Georg Brandl22fff432009-10-27 20:19:02 +0000195 f.write(' <i>%s</i>\n' % desc[p[1:]])
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000196 else:
197 f.write('\n')
198 else:
199 # Output a hierarchy
Georg Brandl22fff432009-10-27 20:19:02 +0000200 printtree(f, tree[i], indent, p+'.'+i)
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000201
202 if l > 1:
Georg Brandl22fff432009-10-27 20:19:02 +0000203 f.write('\n</ul>')
Guido van Rossumb25c4021994-05-27 13:32:41 +0000204
205# Reading descriptions file ---------------------------------------
206
Georg Brandl22fff432009-10-27 20:19:02 +0000207# This returns a dict mapping group name to its description
Guido van Rossumb25c4021994-05-27 13:32:41 +0000208
Guido van Rossume6b79791994-05-27 13:33:17 +0000209def readdesc(descfile):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000210 global desc
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000211 desc = {}
Guido van Rossumb25c4021994-05-27 13:32:41 +0000212
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000213 if descfile == '':
Guido van Rossum4117e541998-09-14 16:44:15 +0000214 return
Guido van Rossumb25c4021994-05-27 13:32:41 +0000215
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000216 try:
Georg Brandl22fff432009-10-27 20:19:02 +0000217 with open(descfile, 'r') as d:
218 print('Reading descriptions...')
219 for l in d:
220 bits = l.split()
221 try:
222 grp = bits[0]
223 dsc = ' '.join(bits[1:])
224 if len(dsc) > 1:
225 desc[grp] = dsc
226 except IndexError:
227 pass
228 except IOError:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000229 print('Failed to open description file ' + descfile)
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000230 return
Guido van Rossumb25c4021994-05-27 13:32:41 +0000231
Guido van Rossume6b79791994-05-27 13:33:17 +0000232# Check that ouput directory exists, ------------------------------
233# and offer to create it if not
Guido van Rossumb25c4021994-05-27 13:32:41 +0000234
Guido van Rossume6b79791994-05-27 13:33:17 +0000235def checkopdir(pagedir):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000236 if not os.path.isdir(pagedir):
Georg Brandl22fff432009-10-27 20:19:02 +0000237 print('Directory %s does not exist.' % pagedir)
Collin Winter6f2df4d2007-07-17 20:59:35 +0000238 print('Shall I create it for you? (y/n)')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000239 if sys.stdin.readline()[0] == 'y':
240 try:
Georg Brandl22fff432009-10-27 20:19:02 +0000241 os.mkdir(pagedir, 0o777)
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000242 except:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000243 print('Sorry - failed!')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000244 sys.exit(1)
245 else:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000246 print('OK. Exiting.')
Guido van Rossum4117e541998-09-14 16:44:15 +0000247 sys.exit(1)
Guido van Rossumb25c4021994-05-27 13:32:41 +0000248
Guido van Rossume6b79791994-05-27 13:33:17 +0000249# Read and write current local tree ----------------------------------
Guido van Rossumb25c4021994-05-27 13:32:41 +0000250
Guido van Rossume6b79791994-05-27 13:33:17 +0000251def readlocallist(treefile):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000252 print('Reading current local group list...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000253 tree = {}
254 try:
255 treetime = time.localtime(os.stat(treefile)[ST_MTIME])
256 except:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000257 print('\n*** Failed to open local group cache '+treefile)
258 print('If this is the first time you have run newslist, then')
259 print('use the -a option to create it.')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000260 sys.exit(1)
Georg Brandl22fff432009-10-27 20:19:02 +0000261 treedate = '%02d%02d%02d' % (treetime[0] % 100, treetime[1], treetime[2])
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000262 try:
Georg Brandl22fff432009-10-27 20:19:02 +0000263 with open(treefile, 'rb') as dump:
264 tree = marshal.load(dump)
265 except IOError:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000266 print('Cannot open local group list ' + treefile)
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000267 return (tree, treedate)
Guido van Rossumb25c4021994-05-27 13:32:41 +0000268
Guido van Rossume6b79791994-05-27 13:33:17 +0000269def writelocallist(treefile, tree):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000270 try:
Georg Brandl22fff432009-10-27 20:19:02 +0000271 with open(treefile, 'wb') as dump:
272 groups = marshal.dump(tree, dump)
273 print('Saved list to %s\n' % treefile)
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000274 except:
Georg Brandl22fff432009-10-27 20:19:02 +0000275 print('Sorry - failed to write to local group cache', treefile)
Collin Winter6f2df4d2007-07-17 20:59:35 +0000276 print('Does it (or its directory) have the correct permissions?')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000277 sys.exit(1)
Guido van Rossume6b79791994-05-27 13:33:17 +0000278
279# Return list of all groups on server -----------------------------
280
281def getallgroups(server):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000282 print('Getting list of all groups...')
Georg Brandl22fff432009-10-27 20:19:02 +0000283 treedate = '010101'
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000284 info = server.list()[1]
285 groups = []
Collin Winter6f2df4d2007-07-17 20:59:35 +0000286 print('Processing...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000287 if skipempty:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000288 print('\nIgnoring following empty groups:')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000289 for i in info:
Georg Brandl22fff432009-10-27 20:19:02 +0000290 grpname = i[0].split()[0]
291 if skipempty and int(i[1]) < int(i[2]):
292 print(grpname.decode() + ' ', end=' ')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000293 else:
Georg Brandl22fff432009-10-27 20:19:02 +0000294 groups.append(grpname.decode())
Collin Winter6f2df4d2007-07-17 20:59:35 +0000295 print('\n')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000296 if skipempty:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000297 print('(End of empty groups)')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000298 return groups
Guido van Rossume6b79791994-05-27 13:33:17 +0000299
300# Return list of new groups on server -----------------------------
301
302def getnewgroups(server, treedate):
Georg Brandl22fff432009-10-27 20:19:02 +0000303 print('Getting list of new groups since start of %s...' % treedate, end=' ')
304 info = server.newgroups(treedate, '000001')[1]
Collin Winter6f2df4d2007-07-17 20:59:35 +0000305 print('got %d.' % len(info))
306 print('Processing...', end=' ')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000307 groups = []
308 for i in info:
Georg Brandl22fff432009-10-27 20:19:02 +0000309 grpname = i.split()[0]
310 groups.append(grpname.decode())
Collin Winter6f2df4d2007-07-17 20:59:35 +0000311 print('Done')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000312 return groups
Guido van Rossume6b79791994-05-27 13:33:17 +0000313
314# Now the main program --------------------------------------------
315
316def main():
Georg Brandl22fff432009-10-27 20:19:02 +0000317 tree = {}
Guido van Rossume6b79791994-05-27 13:33:17 +0000318
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000319 # Check that the output directory exists
Benjamin Petersond7b03282008-09-13 15:58:53 +0000320 checkopdir(pagedir)
Guido van Rossume6b79791994-05-27 13:33:17 +0000321
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000322 try:
Georg Brandl22fff432009-10-27 20:19:02 +0000323 print('Connecting to %s...' % newshost)
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000324 if sys.version[0] == '0':
325 s = NNTP.init(newshost)
326 else:
327 s = NNTP(newshost)
Georg Brandl22fff432009-10-27 20:19:02 +0000328 connected = True
Guido van Rossumb940e112007-01-10 16:19:56 +0000329 except (nntplib.error_temp, nntplib.error_perm) as x:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000330 print('Error connecting to host:', x)
331 print('I\'ll try to use just the local list.')
Georg Brandl22fff432009-10-27 20:19:02 +0000332 connected = False
Guido van Rossume6b79791994-05-27 13:33:17 +0000333
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000334 # If -a is specified, read the full list of groups from server
335 if connected and len(sys.argv) > 1 and sys.argv[1] == '-a':
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000336 groups = getallgroups(s)
Guido van Rossume6b79791994-05-27 13:33:17 +0000337
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000338 # Otherwise just read the local file and then add
339 # groups created since local file last modified.
340 else:
Guido van Rossume6b79791994-05-27 13:33:17 +0000341
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000342 (tree, treedate) = readlocallist(treefile)
343 if connected:
344 groups = getnewgroups(s, treedate)
Guido van Rossumb25c4021994-05-27 13:32:41 +0000345
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000346 if connected:
347 addtotree(tree, groups)
348 writelocallist(treefile,tree)
Guido van Rossume6b79791994-05-27 13:33:17 +0000349
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000350 # Read group descriptions
351 readdesc(descfile)
352
Collin Winter6f2df4d2007-07-17 20:59:35 +0000353 print('Creating pages...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000354 createpage(rootpage, tree, '')
Collin Winter6f2df4d2007-07-17 20:59:35 +0000355 print('Done')
Guido van Rossumb25c4021994-05-27 13:32:41 +0000356
Johannes Gijsbers7a8c43e2004-09-11 16:34:35 +0000357if __name__ == "__main__":
358 main()
Guido van Rossumb25c4021994-05-27 13:32:41 +0000359
360# That's all folks
361######################################################################