Guido van Rossum | f06ee5f | 1996-11-27 19:52:01 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 2 | ####################################################################### |
| 3 | # Newslist $Revision$ |
| 4 | # |
| 5 | # Syntax: |
| 6 | # newslist [ -a ] |
| 7 | # |
| 8 | # This is a program to create a directory full of HTML pages |
| 9 | # which between them contain links to all the newsgroups available |
| 10 | # on your server. |
| 11 | # |
| 12 | # The -a option causes a complete list of all groups to be read from |
| 13 | # the server rather than just the ones which have appeared since last |
| 14 | # execution. This recreates the local list from scratch. Use this on |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 15 | # the first invocation of the program, and from time to time thereafter. |
| 16 | # When new groups are first created they may appear on your server as |
| 17 | # empty groups. By default, empty groups are ignored by the -a option. |
| 18 | # However, these new groups will not be created again, and so will not |
| 19 | # appear in the server's list of 'new groups' at a later date. Hence it |
| 20 | # won't appear until you do a '-a' after some articles have appeared. |
| 21 | # |
| 22 | # I should really keep a list of ignored empty groups and re-check them |
| 23 | # for articles on every run, but I haven't got around to it yet. |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 24 | # |
| 25 | # This assumes an NNTP news feed. |
| 26 | # |
| 27 | # Feel free to copy, distribute and modify this code for |
| 28 | # non-commercial use. If you make any useful modifications, let me |
| 29 | # know! |
| 30 | # |
| 31 | # (c) Quentin Stafford-Fraser 1994 |
| 32 | # fraser@europarc.xerox.com qs101@cl.cam.ac.uk |
| 33 | # # |
| 34 | ####################################################################### |
| 35 | import sys,nntplib, string, marshal, time, os, posix, string |
| 36 | |
| 37 | ####################################################################### |
| 38 | # Check these variables before running! # |
| 39 | |
| 40 | # Top directory. |
| 41 | # Filenames which don't start with / are taken as being relative to this. |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 42 | topdir='/anfs/qsbigdisc/web/html/newspage' |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 43 | |
| 44 | # The name of your NNTP host |
| 45 | # eg. |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 46 | # newshost = 'nntp-serv.cl.cam.ac.uk' |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 47 | # or use following to get the name from the NNTPSERVER environment |
| 48 | # variable: |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 49 | # newshost = posix.environ['NNTPSERVER'] |
| 50 | newshost = 'nntp-serv.cl.cam.ac.uk' |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 51 | |
| 52 | # The filename for a local cache of the newsgroup list |
| 53 | treefile = 'grouptree' |
| 54 | |
| 55 | # The filename for descriptions of newsgroups |
| 56 | # I found a suitable one at ftp.uu.net in /uunet-info/newgroups.gz |
| 57 | # You can set this to '' if you don't wish to use one. |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 58 | descfile = 'newsgroups' |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 59 | |
| 60 | # The directory in which HTML pages should be created |
| 61 | # eg. |
| 62 | # pagedir = '/usr/local/lib/html/newspage' |
| 63 | # pagedir = 'pages' |
| 64 | pagedir = topdir |
| 65 | |
| 66 | # The html prefix which will refer to this directory |
| 67 | # eg. |
| 68 | # httppref = '/newspage/', |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 69 | # or leave blank for relative links between pages: (Recommended) |
| 70 | # httppref = '' |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 71 | httppref = '' |
| 72 | |
| 73 | # The name of the 'root' news page in this directory. |
| 74 | # A .html suffix will be added. |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 75 | rootpage = 'root' |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 76 | |
| 77 | # Set skipempty to 0 if you wish to see links to empty groups as well. |
| 78 | # Only affects the -a option. |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 79 | skipempty = 1 |
| 80 | |
| 81 | # pagelinkicon can contain html to put an icon after links to |
| 82 | # further pages. This helps to make important links stand out. |
| 83 | # Set to '' if not wanted, or '...' is quite a good one. |
| 84 | pagelinkicon='... <img src="http://pelican.cl.cam.ac.uk/icons/page.xbm"> ' |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 85 | |
| 86 | # --------------------------------------------------------------------- |
| 87 | # Less important personal preferences: |
| 88 | |
| 89 | # Sublistsize controls the maximum number of items the will appear as |
| 90 | # an indented sub-list before the whole thing is moved onto a different |
| 91 | # page. The smaller this is, the more pages you will have, but the |
| 92 | # shorter each will be. |
| 93 | sublistsize = 4 |
| 94 | |
| 95 | # That should be all. # |
| 96 | ####################################################################### |
| 97 | |
Guido van Rossum | 9af22a0 | 1994-08-19 15:02:57 +0000 | [diff] [blame] | 98 | for dir in os.curdir, os.environ['HOME']: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 99 | rcfile = os.path.join(dir, '.newslistrc.py') |
| 100 | if os.path.exists(rcfile): |
| 101 | print rcfile |
| 102 | execfile(rcfile) |
| 103 | break |
Guido van Rossum | 9af22a0 | 1994-08-19 15:02:57 +0000 | [diff] [blame] | 104 | |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 105 | from nntplib import NNTP |
| 106 | from stat import * |
| 107 | |
Guido van Rossum | ba17905 | 1997-12-09 19:39:12 +0000 | [diff] [blame] | 108 | rcsrev = '$Revision$' |
| 109 | rcsrev = string.join(filter(lambda s: '$' not in s, string.split(rcsrev))) |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 110 | desc = {} |
| 111 | |
| 112 | # Make (possibly) relative filenames into absolute ones |
| 113 | treefile = os.path.join(topdir,treefile) |
| 114 | descfile = os.path.join(topdir,descfile) |
| 115 | page = os.path.join(topdir,pagedir) |
| 116 | |
| 117 | # First the bits for creating trees --------------------------- |
| 118 | |
| 119 | # Addtotree creates/augments a tree from a list of group names |
| 120 | def addtotree(tree, groups): |
| 121 | print 'Updating tree...' |
| 122 | for i in groups: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 123 | parts = string.splitfields(i,'.') |
| 124 | makeleaf(tree, parts) |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 125 | |
| 126 | # Makeleaf makes a leaf and the branch leading to it if necessary |
| 127 | def makeleaf(tree,path): |
| 128 | j = path[0] |
| 129 | l = len(path) |
| 130 | |
| 131 | if not tree.has_key(j): |
| 132 | tree[j] = {} |
| 133 | if l == 1: |
| 134 | tree[j]['.'] = '.' |
| 135 | if l > 1: |
| 136 | makeleaf(tree[j],path[1:]) |
| 137 | |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 138 | # Then the bits for outputting trees as pages ---------------- |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 139 | |
| 140 | # Createpage creates an HTML file named <root>.html containing links |
| 141 | # to those groups beginning with <root>. |
| 142 | |
| 143 | def createpage(root, tree, p): |
| 144 | filename = os.path.join(pagedir,root+'.html') |
| 145 | if root == rootpage: |
| 146 | detail = '' |
| 147 | else: |
| 148 | detail = ' under ' + root |
| 149 | f = open(filename,'w') |
| 150 | # f.write('Content-Type: text/html\n') |
| 151 | f.write('<TITLE>Newsgroups available' + detail + '</TITLE>\n') |
| 152 | f.write('<H1>Newsgroups available' + detail +'</H1>\n') |
| 153 | f.write('<A HREF="'+httppref+rootpage+'.html">Back to top level</A><P>\n') |
| 154 | printtree(f,tree,0,p) |
| 155 | f.write('<I>This page automatically created by \'newslist\' v. '+rcsrev+'.') |
| 156 | f.write(time.ctime(time.time()) + '</I><P>') |
| 157 | f.close() |
| 158 | |
| 159 | # Printtree prints the groups as a bulleted list. Groups with |
| 160 | # more than <sublistsize> subgroups will be put on a separate page. |
| 161 | # Other sets of subgroups are just indented. |
| 162 | |
| 163 | def printtree(f, tree, indent, p): |
| 164 | global desc |
| 165 | l = len(tree) |
| 166 | |
| 167 | if l > sublistsize and indent>0: |
| 168 | # Create a new page and a link to it |
| 169 | f.write('<LI><B><A HREF="'+httppref+p[1:]+'.html">') |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 170 | f.write(p[1:]+'.*') |
| 171 | f.write('</A></B>'+pagelinkicon+'\n') |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 172 | createpage(p[1:], tree, p) |
| 173 | return |
| 174 | |
| 175 | kl = tree.keys() |
| 176 | |
| 177 | if l > 1: |
| 178 | kl.sort() |
| 179 | if indent > 0: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 180 | # Create a sub-list |
| 181 | f.write('<LI>'+p[1:]+'\n<UL>') |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 182 | else: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 183 | # Create a main list |
| 184 | f.write('<UL>') |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 185 | indent = indent + 1 |
| 186 | |
| 187 | for i in kl: |
| 188 | if i == '.': |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 189 | # Output a newsgroup |
| 190 | f.write('<LI><A HREF="news:' + p[1:] + '">'+ p[1:] + '</A> ') |
| 191 | if desc.has_key(p[1:]): |
| 192 | f.write(' <I>'+desc[p[1:]]+'</I>\n') |
| 193 | else: |
| 194 | f.write('\n') |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 195 | else: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 196 | # Output a hierarchy |
| 197 | printtree(f,tree[i], indent, p+'.'+i) |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 198 | |
| 199 | if l > 1: |
| 200 | f.write('\n</UL>') |
| 201 | |
| 202 | # Reading descriptions file --------------------------------------- |
| 203 | |
| 204 | # This returns an array mapping group name to its description |
| 205 | |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 206 | def readdesc(descfile): |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 207 | global desc |
| 208 | |
| 209 | desc = {} |
| 210 | |
| 211 | if descfile == '': |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 212 | return |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 213 | |
| 214 | try: |
| 215 | d = open(descfile, 'r') |
| 216 | print 'Reading descriptions...' |
| 217 | except (IOError): |
| 218 | print 'Failed to open description file ' + descfile |
| 219 | return |
| 220 | l = d.readline() |
| 221 | while l != '': |
| 222 | bits = string.split(l) |
| 223 | try: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 224 | grp = bits[0] |
| 225 | dsc = string.join(bits[1:]) |
| 226 | if len(dsc)>1: |
| 227 | desc[grp] = dsc |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 228 | except (IndexError): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 229 | pass |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 230 | l = d.readline() |
| 231 | |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 232 | # Check that ouput directory exists, ------------------------------ |
| 233 | # and offer to create it if not |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 234 | |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 235 | def checkopdir(pagedir): |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 236 | if not os.path.isdir(pagedir): |
| 237 | print 'Directory '+pagedir+' does not exist.' |
| 238 | print 'Shall I create it for you? (y/n)' |
| 239 | if sys.stdin.readline()[0] == 'y': |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 240 | try: |
| 241 | os.mkdir(pagedir,0777) |
| 242 | except: |
| 243 | print 'Sorry - failed!' |
| 244 | sys.exit(1) |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 245 | else: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 246 | print 'OK. Exiting.' |
| 247 | sys.exit(1) |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 248 | |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 249 | # Read and write current local tree ---------------------------------- |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 250 | |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 251 | def readlocallist(treefile): |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 252 | print 'Reading current local group list...' |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 253 | tree = {} |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 254 | try: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 255 | treetime = time.localtime(os.stat(treefile)[ST_MTIME]) |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 256 | except: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 257 | print '\n*** Failed to open local group cache '+treefile |
| 258 | print 'If this is the first time you have run newslist, then' |
| 259 | print 'use the -a option to create it.' |
| 260 | sys.exit(1) |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 261 | treedate = '%02d%02d%02d' % (treetime[0] % 100 ,treetime[1], treetime[2]) |
| 262 | try: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 263 | dump = open(treefile,'r') |
| 264 | tree = marshal.load(dump) |
| 265 | dump.close() |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 266 | except (IOError): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 267 | print 'Cannot open local group list ' + treefile |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 268 | return (tree, treedate) |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 269 | |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 270 | def writelocallist(treefile, tree): |
| 271 | try: |
| 272 | dump = open(treefile,'w') |
| 273 | groups = marshal.dump(tree,dump) |
| 274 | dump.close() |
| 275 | print 'Saved list to '+treefile+'\n' |
| 276 | except: |
| 277 | print 'Sorry - failed to write to local group cache '+treefile |
| 278 | print 'Does it (or its directory) have the correct permissions?' |
| 279 | sys.exit(1) |
| 280 | |
| 281 | # Return list of all groups on server ----------------------------- |
| 282 | |
| 283 | def getallgroups(server): |
| 284 | print 'Getting list of all groups...' |
| 285 | treedate='010101' |
| 286 | info = server.list()[1] |
| 287 | groups = [] |
| 288 | print 'Processing...' |
| 289 | if skipempty: |
| 290 | print '\nIgnoring following empty groups:' |
| 291 | for i in info: |
| 292 | grpname = string.split(i[0])[0] |
| 293 | if skipempty and string.atoi(i[1]) < string.atoi(i[2]): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 294 | print grpname+' ', |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 295 | else: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 296 | groups.append(grpname) |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 297 | print '\n' |
| 298 | if skipempty: |
| 299 | print '(End of empty groups)' |
| 300 | return groups |
| 301 | |
| 302 | # Return list of new groups on server ----------------------------- |
| 303 | |
| 304 | def getnewgroups(server, treedate): |
| 305 | print 'Getting list of new groups since start of '+treedate+'...', |
| 306 | info = server.newgroups(treedate,'000001')[1] |
| 307 | print 'got '+`len(info)`+'.' |
| 308 | print 'Processing...', |
| 309 | groups = [] |
| 310 | for i in info: |
| 311 | grpname = string.split(i)[0] |
| 312 | groups.append(grpname) |
| 313 | print 'Done' |
| 314 | return groups |
| 315 | |
| 316 | # Now the main program -------------------------------------------- |
| 317 | |
| 318 | def main(): |
| 319 | global desc |
| 320 | |
| 321 | tree={} |
| 322 | |
| 323 | # Check that the output directory exists |
| 324 | checkopdir(pagedir); |
| 325 | |
| 326 | try: |
| 327 | print 'Connecting to '+newshost+'...' |
| 328 | if sys.version[0] == '0': |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 329 | s = NNTP.init(newshost) |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 330 | else: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 331 | s = NNTP(newshost) |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 332 | connected = 1 |
| 333 | except (nntplib.error_temp, nntplib.error_perm), x: |
| 334 | print 'Error connecting to host:', x |
| 335 | print 'I\'ll try to use just the local list.' |
| 336 | connected = 0 |
| 337 | |
| 338 | # If -a is specified, read the full list of groups from server |
| 339 | if connected and len(sys.argv) > 1 and sys.argv[1] == '-a': |
| 340 | |
| 341 | groups = getallgroups(s) |
| 342 | |
| 343 | # Otherwise just read the local file and then add |
| 344 | # groups created since local file last modified. |
| 345 | else: |
| 346 | |
| 347 | (tree, treedate) = readlocallist(treefile) |
| 348 | if connected: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 349 | groups = getnewgroups(s, treedate) |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 350 | |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 351 | if connected: |
| 352 | addtotree(tree, groups) |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 353 | writelocallist(treefile,tree) |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 354 | |
Guido van Rossum | e6b7979 | 1994-05-27 13:33:17 +0000 | [diff] [blame] | 355 | # Read group descriptions |
| 356 | readdesc(descfile) |
| 357 | |
Guido van Rossum | b25c402 | 1994-05-27 13:32:41 +0000 | [diff] [blame] | 358 | print 'Creating pages...' |
| 359 | createpage(rootpage, tree, '') |
| 360 | print 'Done' |
| 361 | |
| 362 | |
| 363 | main() |
| 364 | |
| 365 | # That's all folks |
| 366 | ###################################################################### |