blob: 10c77d014da61e6f65e01680b43a0f60edb17aaf [file] [log] [blame]
Guido van Rossum06981c31997-01-31 18:58:12 +00001#! /usr/bin/env python
2
3"""GUI interface to webchecker.
4
5This works as a Grail applet too! E.g.
6
Guido van Rossum4f6ecda1997-02-01 05:17:29 +00007 <APPLET CODE=wcgui.py NAME=CheckerWindow></APPLET>
Guido van Rossum06981c31997-01-31 18:58:12 +00008
Guido van Rossumaf310c11997-02-02 23:30:32 +00009Checkpoints are not (yet??? ever???) supported.
Guido van Rossum06981c31997-01-31 18:58:12 +000010
11User interface:
12
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000013Enter a root to check in the text entry box. To enter more than one root,
Guido van Rossumaf310c11997-02-02 23:30:32 +000014enter them one at a time and press <Return> for each one.
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000015
16Command buttons Start, Stop and "Check one" govern the checking process in
17the obvious way. Start and "Check one" also enter the root from the text
Guido van Rossumaf310c11997-02-02 23:30:32 +000018entry box if one is present. There's also a check box (enabled by default)
19to decide whether actually to follow external links (since this can slow
20the checking down considerably). Finally there's a Quit button.
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000021
22A series of checkbuttons determines whether the corresponding output panel
23is shown. List panels are also automatically shown or hidden when their
24status changes between empty to non-empty. There are six panels:
Guido van Rossum06981c31997-01-31 18:58:12 +000025
26Log -- raw output from the checker (-v, -q affect this)
Guido van Rossumaf310c11997-02-02 23:30:32 +000027To check -- links discovered but not yet checked
28Checked -- links that have been checked
Guido van Rossum06981c31997-01-31 18:58:12 +000029Bad links -- links that failed upon checking
Guido van Rossumaf310c11997-02-02 23:30:32 +000030Errors -- pages containing at least one bad link
Guido van Rossum06981c31997-01-31 18:58:12 +000031Details -- details about one URL; double click on a URL in any of
Guido van Rossumaf310c11997-02-02 23:30:32 +000032 the above list panels (not in Log) will show details
33 for that URL
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000034
Guido van Rossum06981c31997-01-31 18:58:12 +000035Use your window manager's Close command to quit.
36
37Command line options:
38
39-m bytes -- skip HTML pages larger than this size (default %(MAXPAGE)d)
Guido van Rossum06981c31997-01-31 18:58:12 +000040-q -- quiet operation (also suppresses external links report)
41-v -- verbose operation; repeating -v will increase verbosity
Guido van Rossum06981c31997-01-31 18:58:12 +000042
43Command line arguments:
44
45rooturl -- URL to start checking
46 (default %(DEFROOT)s)
47
Guido van Rossumaf310c11997-02-02 23:30:32 +000048XXX The command line options (-m, -q, -v) should be GUI accessible.
Guido van Rossum06981c31997-01-31 18:58:12 +000049
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000050XXX The roots should be visible as a list (?).
Guido van Rossum06981c31997-01-31 18:58:12 +000051
Guido van Rossumaf310c11997-02-02 23:30:32 +000052XXX The multipanel user interface is clumsy.
Guido van Rossum06981c31997-01-31 18:58:12 +000053
54"""
55
56# ' Emacs bait
57
58
59import sys
60import getopt
61import string
62from Tkinter import *
63import tktools
64import webchecker
65import random
66
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000067# Override some for a weaker platform
68if sys.platform == 'mac':
69 webchecker.DEFROOT = "http://grail.cnri.reston.va.us/"
70 webchecker.MAXPAGE = 50000
71 webchecker.verbose = 4
Guido van Rossum06981c31997-01-31 18:58:12 +000072
73def main():
Guido van Rossum06981c31997-01-31 18:58:12 +000074 try:
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000075 opts, args = getopt.getopt(sys.argv[1:], 'm:qv')
Guido van Rossum06981c31997-01-31 18:58:12 +000076 except getopt.error, msg:
77 sys.stdout = sys.stderr
78 print msg
79 print __doc__%vars(webchecker)
80 sys.exit(2)
81 for o, a in opts:
82 if o == '-m':
83 webchecker.maxpage = string.atoi(a)
84 if o == '-q':
85 webchecker.verbose = 0
86 if o == '-v':
87 webchecker.verbose = webchecker.verbose + 1
Guido van Rossum06981c31997-01-31 18:58:12 +000088 root = Tk(className='Webchecker')
89 root.protocol("WM_DELETE_WINDOW", root.quit)
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000090 c = CheckerWindow(root)
91 if args:
92 for arg in args[:-1]:
93 c.addroot(arg)
94 c.suggestroot(args[-1])
Guido van Rossum06981c31997-01-31 18:58:12 +000095 root.mainloop()
96
97
98class CheckerWindow(webchecker.Checker):
99
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000100 def __init__(self, parent, root=webchecker.DEFROOT):
Guido van Rossum06981c31997-01-31 18:58:12 +0000101 self.__parent = parent
Guido van Rossumaf310c11997-02-02 23:30:32 +0000102
103 self.__topcontrols = Frame(parent)
104 self.__topcontrols.pack(side=TOP, fill=X)
105 self.__label = Label(self.__topcontrols, text="Root URL:")
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000106 self.__label.pack(side=LEFT)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000107 self.__rootentry = Entry(self.__topcontrols, width=60)
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000108 self.__rootentry.pack(side=LEFT)
109 self.__rootentry.bind('<Return>', self.enterroot)
110 self.__rootentry.focus_set()
Guido van Rossumaf310c11997-02-02 23:30:32 +0000111
112 self.__controls = Frame(parent)
113 self.__controls.pack(side=TOP, fill=X)
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000114 self.__running = 0
115 self.__start = Button(self.__controls, text="Run", command=self.start)
116 self.__start.pack(side=LEFT)
117 self.__stop = Button(self.__controls, text="Stop", command=self.stop,
118 state=DISABLED)
119 self.__stop.pack(side=LEFT)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000120 self.__step = Button(self.__controls, text="Check one",
121 command=self.step)
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000122 self.__step.pack(side=LEFT)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000123 self.__cv = BooleanVar()
124 self.__cv.set(1)
125 self.__checkext = Checkbutton(self.__controls, variable=self.__cv,
126 text="Check nonlocal links")
127 self.__checkext.pack(side=LEFT)
128 self.__reset = Button(self.__controls, text="Start over", command=self.reset)
129 self.__reset.pack(side=LEFT)
130 if __name__ == '__main__': # No Quit button under Grail!
131 self.__quit = Button(self.__controls, text="Quit",
132 command=self.__parent.quit)
133 self.__quit.pack(side=RIGHT)
134
Guido van Rossum06981c31997-01-31 18:58:12 +0000135 self.__status = Label(parent, text="Status: initial", anchor=W)
136 self.__status.pack(side=TOP, fill=X)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000137 self.__checking = Label(parent, text="Idle", anchor=W)
Guido van Rossum06981c31997-01-31 18:58:12 +0000138 self.__checking.pack(side=TOP, fill=X)
139 self.__mp = mp = MultiPanel(parent)
140 sys.stdout = self.__log = LogPanel(mp, "Log")
141 self.__todo = ListPanel(mp, "To check", self.showinfo)
Guido van Rossum06981c31997-01-31 18:58:12 +0000142 self.__done = ListPanel(mp, "Checked", self.showinfo)
143 self.__bad = ListPanel(mp, "Bad links", self.showinfo)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000144 self.__errors = ListPanel(mp, "Pages w/ bad links", self.showinfo)
Guido van Rossum06981c31997-01-31 18:58:12 +0000145 self.__details = LogPanel(mp, "Details")
Guido van Rossum06981c31997-01-31 18:58:12 +0000146 webchecker.Checker.__init__(self)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000147 del self.checkext # See __getattr__ below
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000148 if root:
149 root = string.strip(str(root))
150 if root:
151 self.suggestroot(root)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000152 self.newstatus()
153
154 def __getattr__(self, name):
155 if name != 'checkext': raise AttributeError, name
156 return self.__cv.get()
157
158 def reset(self):
159 webchecker.Checker.reset(self)
160 for p in self.__todo, self.__done, self.__bad, self.__errors:
161 p.clear()
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000162
163 def suggestroot(self, root):
164 self.__rootentry.delete(0, END)
165 self.__rootentry.insert(END, root)
166 self.__rootentry.select_range(0, END)
167
168 def enterroot(self, event=None):
169 root = self.__rootentry.get()
170 root = string.strip(root)
171 if root:
Guido van Rossumaf310c11997-02-02 23:30:32 +0000172 self.__checking.config(text="Adding root "+root)
173 self.__checking.update_idletasks()
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000174 self.addroot(root)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000175 self.__checking.config(text="Idle")
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000176 try:
177 i = self.__todo.items.index(root)
178 except (ValueError, IndexError):
179 pass
180 else:
181 self.__todo.list.select_clear(0, END)
182 self.__todo.list.select_set(i)
183 self.__todo.list.yview(i)
184 self.__rootentry.delete(0, END)
185
186 def start(self):
187 self.__start.config(state=DISABLED, relief=SUNKEN)
188 self.__stop.config(state=NORMAL)
189 self.__step.config(state=DISABLED)
190 self.enterroot()
191 self.__running = 1
192 self.go()
193
194 def stop(self):
Guido van Rossumaf310c11997-02-02 23:30:32 +0000195 self.__stop.config(state=DISABLED, relief=SUNKEN)
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000196 self.__running = 0
197
198 def step(self):
199 self.__start.config(state=DISABLED)
200 self.__step.config(state=DISABLED, relief=SUNKEN)
201 self.enterroot()
202 self.__running = 0
203 self.dosomething()
Guido van Rossum06981c31997-01-31 18:58:12 +0000204
205 def go(self):
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000206 if self.__running:
Guido van Rossum06981c31997-01-31 18:58:12 +0000207 self.__parent.after_idle(self.dosomething)
208 else:
Guido van Rossumaf310c11997-02-02 23:30:32 +0000209 self.__checking.config(text="Idle")
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000210 self.__start.config(state=NORMAL, relief=RAISED)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000211 self.__stop.config(state=DISABLED, relief=RAISED)
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000212 self.__step.config(state=NORMAL, relief=RAISED)
213
214 __busy = 0
Guido van Rossum06981c31997-01-31 18:58:12 +0000215
216 def dosomething(self):
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000217 if self.__busy: return
218 self.__busy = 1
Guido van Rossum06981c31997-01-31 18:58:12 +0000219 if self.todo:
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000220 l = self.__todo.selectedindices()
221 if l:
222 i = l[0]
223 else:
224 i = 0
225 self.__todo.list.select_set(i)
226 self.__todo.list.yview(i)
Guido van Rossum06981c31997-01-31 18:58:12 +0000227 url = self.__todo.items[i]
Guido van Rossumaf310c11997-02-02 23:30:32 +0000228 self.__checking.config(text="Checking "+url)
Guido van Rossum06981c31997-01-31 18:58:12 +0000229 self.__parent.update()
230 self.dopage(url)
Guido van Rossum06981c31997-01-31 18:58:12 +0000231 else:
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000232 self.stop()
233 self.__busy = 0
Guido van Rossum06981c31997-01-31 18:58:12 +0000234 self.go()
235
236 def showinfo(self, url):
237 d = self.__details
238 d.clear()
Guido van Rossumaf310c11997-02-02 23:30:32 +0000239 d.put("URL: %s\n" % url)
Guido van Rossum06981c31997-01-31 18:58:12 +0000240 if self.bad.has_key(url):
Guido van Rossumaf310c11997-02-02 23:30:32 +0000241 d.put("Error: %s\n" % str(self.bad[url]))
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000242 if url in self.roots:
Guido van Rossumaf310c11997-02-02 23:30:32 +0000243 d.put("Note: This is a root URL\n")
Guido van Rossum06981c31997-01-31 18:58:12 +0000244 if self.done.has_key(url):
Guido van Rossumaf310c11997-02-02 23:30:32 +0000245 d.put("Status: checked\n")
Guido van Rossum06981c31997-01-31 18:58:12 +0000246 o = self.done[url]
247 elif self.todo.has_key(url):
Guido van Rossumaf310c11997-02-02 23:30:32 +0000248 d.put("Status: to check\n")
Guido van Rossum06981c31997-01-31 18:58:12 +0000249 o = self.todo[url]
Guido van Rossum06981c31997-01-31 18:58:12 +0000250 else:
Guido van Rossumaf310c11997-02-02 23:30:32 +0000251 d.put("Status: unknown (!)\n")
Guido van Rossum06981c31997-01-31 18:58:12 +0000252 o = []
Guido van Rossumaf310c11997-02-02 23:30:32 +0000253 if self.errors.has_key(url):
254 d.put("Bad links from this page:\n")
255 for triple in self.errors[url]:
256 link, rawlink, msg = triple
257 d.put(" HREF %s" % link)
258 if link != rawlink: d.put(" (%s)" %rawlink)
259 d.put("\n")
260 d.put(" error %s\n" % str(msg))
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000261 self.__mp.showpanel("Details")
Guido van Rossum06981c31997-01-31 18:58:12 +0000262 for source, rawlink in o:
Guido van Rossumaf310c11997-02-02 23:30:32 +0000263 d.put("Origin: %s" % source)
Guido van Rossum06981c31997-01-31 18:58:12 +0000264 if rawlink != url:
Guido van Rossumaf310c11997-02-02 23:30:32 +0000265 d.put(" (%s)" % rawlink)
266 d.put("\n")
Guido van Rossum29f65331997-05-09 03:19:29 +0000267 d.text.yview("1.0")
Guido van Rossum06981c31997-01-31 18:58:12 +0000268
269 def setbad(self, url, msg):
270 webchecker.Checker.setbad(self, url, msg)
271 self.__bad.insert(url)
272 self.newstatus()
273
274 def setgood(self, url):
275 webchecker.Checker.setgood(self, url)
276 self.__bad.remove(url)
277 self.newstatus()
278
Guido van Rossumaf310c11997-02-02 23:30:32 +0000279 def newlink(self, url, origin):
280 webchecker.Checker.newlink(self, url, origin)
Guido van Rossum06981c31997-01-31 18:58:12 +0000281 if self.done.has_key(url):
282 self.__done.insert(url)
283 elif self.todo.has_key(url):
284 self.__todo.insert(url)
285 self.newstatus()
286
287 def markdone(self, url):
288 webchecker.Checker.markdone(self, url)
289 self.__done.insert(url)
290 self.__todo.remove(url)
291 self.newstatus()
292
Guido van Rossumaf310c11997-02-02 23:30:32 +0000293 def seterror(self, url, triple):
294 webchecker.Checker.seterror(self, url, triple)
295 self.__errors.insert(url)
296 self.newstatus()
297
298 def newstatus(self):
299 self.__status.config(text="Status: "+self.status())
300 self.__parent.update()
301
Guido van Rossum06981c31997-01-31 18:58:12 +0000302
303class ListPanel:
304
305 def __init__(self, mp, name, showinfo=None):
306 self.mp = mp
307 self.name = name
308 self.showinfo = showinfo
309 self.panel = mp.addpanel(name)
310 self.list, self.frame = tktools.make_list_box(
311 self.panel, width=60, height=5)
312 self.list.config(exportselection=0)
313 if showinfo:
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000314 self.list.bind('<Double-Button-1>', self.doubleclick)
Guido van Rossum06981c31997-01-31 18:58:12 +0000315 self.items = []
316
Guido van Rossumaf310c11997-02-02 23:30:32 +0000317 def clear(self):
318 self.items = []
319 self.list.delete(0, END)
320 self.mp.hidepanel(self.name)
321
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000322 def doubleclick(self, event):
323 l = self.selectedindices()
324 if l:
325 self.showinfo(self.list.get(l[0]))
326
327 def selectedindices(self):
328 l = self.list.curselection()
329 if not l: return []
330 return map(string.atoi, l)
Guido van Rossum06981c31997-01-31 18:58:12 +0000331
332 def insert(self, url):
333 if url not in self.items:
334 if not self.items:
335 self.mp.showpanel(self.name)
336 # (I tried sorting alphabetically, but the display is too jumpy)
337 i = len(self.items)
338 self.list.insert(i, url)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000339 self.list.yview(i)
Guido van Rossum06981c31997-01-31 18:58:12 +0000340 self.items.insert(i, url)
341
342 def remove(self, url):
343 try:
344 i = self.items.index(url)
345 except (ValueError, IndexError):
346 pass
347 else:
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000348 was_selected = i in self.selectedindices()
Guido van Rossum06981c31997-01-31 18:58:12 +0000349 self.list.delete(i)
Guido van Rossum06981c31997-01-31 18:58:12 +0000350 del self.items[i]
351 if not self.items:
352 self.mp.hidepanel(self.name)
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000353 elif was_selected:
354 if i >= len(self.items):
355 i = len(self.items) - 1
356 self.list.select_set(i)
Guido van Rossum06981c31997-01-31 18:58:12 +0000357
358
359class LogPanel:
360
361 def __init__(self, mp, name):
362 self.mp = mp
363 self.name = name
364 self.panel = mp.addpanel(name)
365 self.text, self.frame = tktools.make_text_box(self.panel, height=10)
366 self.text.config(wrap=NONE)
367
368 def clear(self):
369 self.text.delete("1.0", END)
370 self.text.yview("1.0")
371
Guido van Rossumaf310c11997-02-02 23:30:32 +0000372 def put(self, s):
373 self.text.insert(END, s)
374 if '\n' in s:
375 self.text.yview(END)
376
Guido van Rossum06981c31997-01-31 18:58:12 +0000377 def write(self, s):
378 self.text.insert(END, s)
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000379 if '\n' in s:
380 self.text.yview(END)
381 self.panel.update()
Guido van Rossum06981c31997-01-31 18:58:12 +0000382
383
384class MultiPanel:
385
386 def __init__(self, parent):
387 self.parent = parent
388 self.frame = Frame(self.parent)
389 self.frame.pack(expand=1, fill=BOTH)
390 self.topframe = Frame(self.frame, borderwidth=2, relief=RAISED)
391 self.topframe.pack(fill=X)
392 self.botframe = Frame(self.frame)
393 self.botframe.pack(expand=1, fill=BOTH)
394 self.panelnames = []
395 self.panels = {}
396
397 def addpanel(self, name, on=0):
398 v = StringVar()
399 if on:
400 v.set(name)
401 else:
402 v.set("")
403 check = Checkbutton(self.topframe, text=name,
404 offvalue="", onvalue=name, variable=v,
405 command=self.checkpanel)
406 check.pack(side=LEFT)
407 panel = Frame(self.botframe)
408 label = Label(panel, text=name, borderwidth=2, relief=RAISED, anchor=W)
409 label.pack(side=TOP, fill=X)
410 t = v, check, panel
411 self.panelnames.append(name)
412 self.panels[name] = t
413 if on:
414 panel.pack(expand=1, fill=BOTH)
415 return panel
416
417 def showpanel(self, name):
418 v, check, panel = self.panels[name]
419 v.set(name)
420 panel.pack(expand=1, fill=BOTH)
421
422 def hidepanel(self, name):
423 v, check, panel = self.panels[name]
424 v.set("")
425 panel.pack_forget()
426
427 def checkpanel(self):
428 for name in self.panelnames:
429 v, check, panel = self.panels[name]
430 panel.pack_forget()
431 for name in self.panelnames:
432 v, check, panel = self.panels[name]
433 if v.get():
434 panel.pack(expand=1, fill=BOTH)
435
436
437if __name__ == '__main__':
438 main()