blob: 600082978a3b73834060ce17d203d375a18ab079 [file] [log] [blame]
Guido van Rossum06981c31997-01-31 18:58:12 +00001#! /usr/bin/env python
2
3"""GUI interface to webchecker.
4
5This works as a Grail applet too! E.g.
6
Guido van Rossum4f6ecda1997-02-01 05:17:29 +00007 <APPLET CODE=wcgui.py NAME=CheckerWindow></APPLET>
Guido van Rossum06981c31997-01-31 18:58:12 +00008
Guido van Rossumaf310c11997-02-02 23:30:32 +00009Checkpoints are not (yet??? ever???) supported.
Guido van Rossum06981c31997-01-31 18:58:12 +000010
11User interface:
12
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000013Enter a root to check in the text entry box. To enter more than one root,
Guido van Rossumaf310c11997-02-02 23:30:32 +000014enter them one at a time and press <Return> for each one.
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000015
16Command buttons Start, Stop and "Check one" govern the checking process in
17the obvious way. Start and "Check one" also enter the root from the text
Guido van Rossumaf310c11997-02-02 23:30:32 +000018entry box if one is present. There's also a check box (enabled by default)
19to decide whether actually to follow external links (since this can slow
20the checking down considerably). Finally there's a Quit button.
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000021
22A series of checkbuttons determines whether the corresponding output panel
23is shown. List panels are also automatically shown or hidden when their
24status changes between empty to non-empty. There are six panels:
Guido van Rossum06981c31997-01-31 18:58:12 +000025
26Log -- raw output from the checker (-v, -q affect this)
Guido van Rossumaf310c11997-02-02 23:30:32 +000027To check -- links discovered but not yet checked
28Checked -- links that have been checked
Guido van Rossum06981c31997-01-31 18:58:12 +000029Bad links -- links that failed upon checking
Guido van Rossumaf310c11997-02-02 23:30:32 +000030Errors -- pages containing at least one bad link
Guido van Rossum06981c31997-01-31 18:58:12 +000031Details -- details about one URL; double click on a URL in any of
Guido van Rossumaf310c11997-02-02 23:30:32 +000032 the above list panels (not in Log) will show details
33 for that URL
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000034
Guido van Rossum06981c31997-01-31 18:58:12 +000035Use your window manager's Close command to quit.
36
37Command line options:
38
39-m bytes -- skip HTML pages larger than this size (default %(MAXPAGE)d)
Guido van Rossum06981c31997-01-31 18:58:12 +000040-q -- quiet operation (also suppresses external links report)
41-v -- verbose operation; repeating -v will increase verbosity
Guido van Rossum06981c31997-01-31 18:58:12 +000042
43Command line arguments:
44
45rooturl -- URL to start checking
46 (default %(DEFROOT)s)
47
Guido van Rossumaf310c11997-02-02 23:30:32 +000048XXX The command line options (-m, -q, -v) should be GUI accessible.
Guido van Rossum06981c31997-01-31 18:58:12 +000049
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000050XXX The roots should be visible as a list (?).
Guido van Rossum06981c31997-01-31 18:58:12 +000051
Guido van Rossumaf310c11997-02-02 23:30:32 +000052XXX The multipanel user interface is clumsy.
Guido van Rossum06981c31997-01-31 18:58:12 +000053
54"""
55
56# ' Emacs bait
57
58
59import sys
60import getopt
61import string
62from Tkinter import *
63import tktools
64import webchecker
65import random
66
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000067# Override some for a weaker platform
68if sys.platform == 'mac':
69 webchecker.DEFROOT = "http://grail.cnri.reston.va.us/"
70 webchecker.MAXPAGE = 50000
71 webchecker.verbose = 4
Guido van Rossum06981c31997-01-31 18:58:12 +000072
73def main():
Guido van Rossum06981c31997-01-31 18:58:12 +000074 try:
Guido van Rossum986abac1998-04-06 14:29:28 +000075 opts, args = getopt.getopt(sys.argv[1:], 'm:qv')
Guido van Rossum06981c31997-01-31 18:58:12 +000076 except getopt.error, msg:
Guido van Rossum986abac1998-04-06 14:29:28 +000077 sys.stdout = sys.stderr
78 print msg
79 print __doc__%vars(webchecker)
80 sys.exit(2)
Guido van Rossum06981c31997-01-31 18:58:12 +000081 for o, a in opts:
Guido van Rossum986abac1998-04-06 14:29:28 +000082 if o == '-m':
83 webchecker.maxpage = string.atoi(a)
84 if o == '-q':
85 webchecker.verbose = 0
86 if o == '-v':
87 webchecker.verbose = webchecker.verbose + 1
Guido van Rossum06981c31997-01-31 18:58:12 +000088 root = Tk(className='Webchecker')
89 root.protocol("WM_DELETE_WINDOW", root.quit)
Guido van Rossum4f6ecda1997-02-01 05:17:29 +000090 c = CheckerWindow(root)
91 if args:
Guido van Rossum986abac1998-04-06 14:29:28 +000092 for arg in args[:-1]:
93 c.addroot(arg)
94 c.suggestroot(args[-1])
Guido van Rossum06981c31997-01-31 18:58:12 +000095 root.mainloop()
96
97
98class CheckerWindow(webchecker.Checker):
99
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000100 def __init__(self, parent, root=webchecker.DEFROOT):
Guido van Rossum986abac1998-04-06 14:29:28 +0000101 self.__parent = parent
Guido van Rossumaf310c11997-02-02 23:30:32 +0000102
Guido van Rossum986abac1998-04-06 14:29:28 +0000103 self.__topcontrols = Frame(parent)
104 self.__topcontrols.pack(side=TOP, fill=X)
105 self.__label = Label(self.__topcontrols, text="Root URL:")
106 self.__label.pack(side=LEFT)
107 self.__rootentry = Entry(self.__topcontrols, width=60)
108 self.__rootentry.pack(side=LEFT)
109 self.__rootentry.bind('<Return>', self.enterroot)
110 self.__rootentry.focus_set()
Guido van Rossumaf310c11997-02-02 23:30:32 +0000111
Guido van Rossum986abac1998-04-06 14:29:28 +0000112 self.__controls = Frame(parent)
113 self.__controls.pack(side=TOP, fill=X)
114 self.__running = 0
115 self.__start = Button(self.__controls, text="Run", command=self.start)
116 self.__start.pack(side=LEFT)
117 self.__stop = Button(self.__controls, text="Stop", command=self.stop,
118 state=DISABLED)
119 self.__stop.pack(side=LEFT)
120 self.__step = Button(self.__controls, text="Check one",
121 command=self.step)
122 self.__step.pack(side=LEFT)
123 self.__cv = BooleanVar(parent)
124 self.__cv.set(self.checkext)
125 self.__checkext = Checkbutton(self.__controls, variable=self.__cv,
126 command=self.update_checkext,
127 text="Check nonlocal links",)
128 self.__checkext.pack(side=LEFT)
129 self.__reset = Button(self.__controls, text="Start over", command=self.reset)
130 self.__reset.pack(side=LEFT)
131 if __name__ == '__main__': # No Quit button under Grail!
132 self.__quit = Button(self.__controls, text="Quit",
133 command=self.__parent.quit)
134 self.__quit.pack(side=RIGHT)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000135
Guido van Rossum986abac1998-04-06 14:29:28 +0000136 self.__status = Label(parent, text="Status: initial", anchor=W)
137 self.__status.pack(side=TOP, fill=X)
138 self.__checking = Label(parent, text="Idle", anchor=W)
139 self.__checking.pack(side=TOP, fill=X)
140 self.__mp = mp = MultiPanel(parent)
141 sys.stdout = self.__log = LogPanel(mp, "Log")
142 self.__todo = ListPanel(mp, "To check", self.showinfo)
143 self.__done = ListPanel(mp, "Checked", self.showinfo)
144 self.__bad = ListPanel(mp, "Bad links", self.showinfo)
145 self.__errors = ListPanel(mp, "Pages w/ bad links", self.showinfo)
146 self.__details = LogPanel(mp, "Details")
147 webchecker.Checker.__init__(self)
148 if root:
149 root = string.strip(str(root))
150 if root:
151 self.suggestroot(root)
152 self.newstatus()
Guido van Rossumaf310c11997-02-02 23:30:32 +0000153
Guido van Rossumaf310c11997-02-02 23:30:32 +0000154 def reset(self):
Guido van Rossum986abac1998-04-06 14:29:28 +0000155 webchecker.Checker.reset(self)
156 for p in self.__todo, self.__done, self.__bad, self.__errors:
157 p.clear()
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000158
159 def suggestroot(self, root):
Guido van Rossum986abac1998-04-06 14:29:28 +0000160 self.__rootentry.delete(0, END)
161 self.__rootentry.insert(END, root)
162 self.__rootentry.select_range(0, END)
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000163
164 def enterroot(self, event=None):
Guido van Rossum986abac1998-04-06 14:29:28 +0000165 root = self.__rootentry.get()
166 root = string.strip(root)
167 if root:
168 self.__checking.config(text="Adding root "+root)
169 self.__checking.update_idletasks()
170 self.addroot(root)
171 self.__checking.config(text="Idle")
172 try:
173 i = self.__todo.items.index(root)
174 except (ValueError, IndexError):
175 pass
176 else:
177 self.__todo.list.select_clear(0, END)
178 self.__todo.list.select_set(i)
179 self.__todo.list.yview(i)
180 self.__rootentry.delete(0, END)
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000181
182 def start(self):
Guido van Rossum986abac1998-04-06 14:29:28 +0000183 self.__start.config(state=DISABLED, relief=SUNKEN)
184 self.__stop.config(state=NORMAL)
185 self.__step.config(state=DISABLED)
186 self.enterroot()
187 self.__running = 1
188 self.go()
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000189
190 def stop(self):
Guido van Rossum986abac1998-04-06 14:29:28 +0000191 self.__stop.config(state=DISABLED, relief=SUNKEN)
192 self.__running = 0
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000193
194 def step(self):
Guido van Rossum986abac1998-04-06 14:29:28 +0000195 self.__start.config(state=DISABLED)
196 self.__step.config(state=DISABLED, relief=SUNKEN)
197 self.enterroot()
198 self.__running = 0
199 self.dosomething()
Guido van Rossum06981c31997-01-31 18:58:12 +0000200
201 def go(self):
Guido van Rossum986abac1998-04-06 14:29:28 +0000202 if self.__running:
203 self.__parent.after_idle(self.dosomething)
204 else:
205 self.__checking.config(text="Idle")
206 self.__start.config(state=NORMAL, relief=RAISED)
207 self.__stop.config(state=DISABLED, relief=RAISED)
208 self.__step.config(state=NORMAL, relief=RAISED)
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000209
210 __busy = 0
Guido van Rossum06981c31997-01-31 18:58:12 +0000211
212 def dosomething(self):
Guido van Rossum986abac1998-04-06 14:29:28 +0000213 if self.__busy: return
214 self.__busy = 1
215 if self.todo:
216 l = self.__todo.selectedindices()
217 if l:
218 i = l[0]
219 else:
220 i = 0
221 self.__todo.list.select_set(i)
222 self.__todo.list.yview(i)
223 url = self.__todo.items[i]
224 self.__checking.config(text="Checking "+url)
225 self.__parent.update()
226 self.dopage(url)
227 else:
228 self.stop()
229 self.__busy = 0
230 self.go()
Guido van Rossum06981c31997-01-31 18:58:12 +0000231
232 def showinfo(self, url):
Guido van Rossum986abac1998-04-06 14:29:28 +0000233 d = self.__details
234 d.clear()
235 d.put("URL: %s\n" % url)
236 if self.bad.has_key(url):
237 d.put("Error: %s\n" % str(self.bad[url]))
238 if url in self.roots:
239 d.put("Note: This is a root URL\n")
240 if self.done.has_key(url):
241 d.put("Status: checked\n")
242 o = self.done[url]
243 elif self.todo.has_key(url):
244 d.put("Status: to check\n")
245 o = self.todo[url]
246 else:
247 d.put("Status: unknown (!)\n")
248 o = []
249 if self.errors.has_key(url):
250 d.put("Bad links from this page:\n")
251 for triple in self.errors[url]:
252 link, rawlink, msg = triple
253 d.put(" HREF %s" % link)
254 if link != rawlink: d.put(" (%s)" %rawlink)
255 d.put("\n")
256 d.put(" error %s\n" % str(msg))
257 self.__mp.showpanel("Details")
258 for source, rawlink in o:
259 d.put("Origin: %s" % source)
260 if rawlink != url:
261 d.put(" (%s)" % rawlink)
262 d.put("\n")
263 d.text.yview("1.0")
Guido van Rossum06981c31997-01-31 18:58:12 +0000264
265 def setbad(self, url, msg):
Guido van Rossum986abac1998-04-06 14:29:28 +0000266 webchecker.Checker.setbad(self, url, msg)
267 self.__bad.insert(url)
268 self.newstatus()
Guido van Rossum06981c31997-01-31 18:58:12 +0000269
270 def setgood(self, url):
Guido van Rossum986abac1998-04-06 14:29:28 +0000271 webchecker.Checker.setgood(self, url)
272 self.__bad.remove(url)
273 self.newstatus()
Guido van Rossum06981c31997-01-31 18:58:12 +0000274
Guido van Rossumaf310c11997-02-02 23:30:32 +0000275 def newlink(self, url, origin):
Guido van Rossum986abac1998-04-06 14:29:28 +0000276 webchecker.Checker.newlink(self, url, origin)
277 if self.done.has_key(url):
278 self.__done.insert(url)
279 elif self.todo.has_key(url):
280 self.__todo.insert(url)
281 self.newstatus()
Guido van Rossum06981c31997-01-31 18:58:12 +0000282
283 def markdone(self, url):
Guido van Rossum986abac1998-04-06 14:29:28 +0000284 webchecker.Checker.markdone(self, url)
285 self.__done.insert(url)
286 self.__todo.remove(url)
287 self.newstatus()
Guido van Rossum06981c31997-01-31 18:58:12 +0000288
Guido van Rossumaf310c11997-02-02 23:30:32 +0000289 def seterror(self, url, triple):
Guido van Rossum986abac1998-04-06 14:29:28 +0000290 webchecker.Checker.seterror(self, url, triple)
291 self.__errors.insert(url)
292 self.newstatus()
Guido van Rossumaf310c11997-02-02 23:30:32 +0000293
294 def newstatus(self):
Guido van Rossum986abac1998-04-06 14:29:28 +0000295 self.__status.config(text="Status: "+self.status())
296 self.__parent.update()
Guido van Rossumaf310c11997-02-02 23:30:32 +0000297
Guido van Rossum88b02cf1998-03-05 20:12:18 +0000298 def update_checkext(self):
Guido van Rossum986abac1998-04-06 14:29:28 +0000299 self.checkext = self.__cv.get()
Guido van Rossum88b02cf1998-03-05 20:12:18 +0000300
Guido van Rossum06981c31997-01-31 18:58:12 +0000301
302class ListPanel:
303
304 def __init__(self, mp, name, showinfo=None):
Guido van Rossum986abac1998-04-06 14:29:28 +0000305 self.mp = mp
306 self.name = name
307 self.showinfo = showinfo
308 self.panel = mp.addpanel(name)
309 self.list, self.frame = tktools.make_list_box(
310 self.panel, width=60, height=5)
311 self.list.config(exportselection=0)
312 if showinfo:
313 self.list.bind('<Double-Button-1>', self.doubleclick)
314 self.items = []
Guido van Rossum06981c31997-01-31 18:58:12 +0000315
Guido van Rossumaf310c11997-02-02 23:30:32 +0000316 def clear(self):
Guido van Rossum986abac1998-04-06 14:29:28 +0000317 self.items = []
318 self.list.delete(0, END)
319 self.mp.hidepanel(self.name)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000320
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000321 def doubleclick(self, event):
Guido van Rossum986abac1998-04-06 14:29:28 +0000322 l = self.selectedindices()
323 if l:
324 self.showinfo(self.list.get(l[0]))
Guido van Rossum4f6ecda1997-02-01 05:17:29 +0000325
326 def selectedindices(self):
Guido van Rossum986abac1998-04-06 14:29:28 +0000327 l = self.list.curselection()
328 if not l: return []
329 return map(string.atoi, l)
Guido van Rossum06981c31997-01-31 18:58:12 +0000330
331 def insert(self, url):
Guido van Rossum986abac1998-04-06 14:29:28 +0000332 if url not in self.items:
333 if not self.items:
334 self.mp.showpanel(self.name)
335 # (I tried sorting alphabetically, but the display is too jumpy)
336 i = len(self.items)
337 self.list.insert(i, url)
338 self.list.yview(i)
339 self.items.insert(i, url)
Guido van Rossum06981c31997-01-31 18:58:12 +0000340
341 def remove(self, url):
Guido van Rossum986abac1998-04-06 14:29:28 +0000342 try:
343 i = self.items.index(url)
344 except (ValueError, IndexError):
345 pass
346 else:
347 was_selected = i in self.selectedindices()
348 self.list.delete(i)
349 del self.items[i]
350 if not self.items:
351 self.mp.hidepanel(self.name)
352 elif was_selected:
353 if i >= len(self.items):
354 i = len(self.items) - 1
355 self.list.select_set(i)
Guido van Rossum06981c31997-01-31 18:58:12 +0000356
357
358class LogPanel:
359
360 def __init__(self, mp, name):
Guido van Rossum986abac1998-04-06 14:29:28 +0000361 self.mp = mp
362 self.name = name
363 self.panel = mp.addpanel(name)
364 self.text, self.frame = tktools.make_text_box(self.panel, height=10)
365 self.text.config(wrap=NONE)
Guido van Rossum06981c31997-01-31 18:58:12 +0000366
367 def clear(self):
Guido van Rossum986abac1998-04-06 14:29:28 +0000368 self.text.delete("1.0", END)
369 self.text.yview("1.0")
Guido van Rossum06981c31997-01-31 18:58:12 +0000370
Guido van Rossumaf310c11997-02-02 23:30:32 +0000371 def put(self, s):
Guido van Rossum986abac1998-04-06 14:29:28 +0000372 self.text.insert(END, s)
373 if '\n' in s:
374 self.text.yview(END)
Guido van Rossumaf310c11997-02-02 23:30:32 +0000375
Guido van Rossum06981c31997-01-31 18:58:12 +0000376 def write(self, s):
Guido van Rossum986abac1998-04-06 14:29:28 +0000377 self.text.insert(END, s)
378 if '\n' in s:
379 self.text.yview(END)
380 self.panel.update()
Guido van Rossum06981c31997-01-31 18:58:12 +0000381
382
383class MultiPanel:
384
385 def __init__(self, parent):
Guido van Rossum986abac1998-04-06 14:29:28 +0000386 self.parent = parent
387 self.frame = Frame(self.parent)
388 self.frame.pack(expand=1, fill=BOTH)
389 self.topframe = Frame(self.frame, borderwidth=2, relief=RAISED)
390 self.topframe.pack(fill=X)
391 self.botframe = Frame(self.frame)
392 self.botframe.pack(expand=1, fill=BOTH)
393 self.panelnames = []
394 self.panels = {}
Guido van Rossum06981c31997-01-31 18:58:12 +0000395
396 def addpanel(self, name, on=0):
Guido van Rossum986abac1998-04-06 14:29:28 +0000397 v = StringVar(self.parent)
398 if on:
399 v.set(name)
400 else:
401 v.set("")
402 check = Checkbutton(self.topframe, text=name,
403 offvalue="", onvalue=name, variable=v,
404 command=self.checkpanel)
405 check.pack(side=LEFT)
406 panel = Frame(self.botframe)
407 label = Label(panel, text=name, borderwidth=2, relief=RAISED, anchor=W)
408 label.pack(side=TOP, fill=X)
409 t = v, check, panel
410 self.panelnames.append(name)
411 self.panels[name] = t
412 if on:
413 panel.pack(expand=1, fill=BOTH)
414 return panel
Guido van Rossum06981c31997-01-31 18:58:12 +0000415
416 def showpanel(self, name):
Guido van Rossum986abac1998-04-06 14:29:28 +0000417 v, check, panel = self.panels[name]
418 v.set(name)
419 panel.pack(expand=1, fill=BOTH)
Guido van Rossum06981c31997-01-31 18:58:12 +0000420
421 def hidepanel(self, name):
Guido van Rossum986abac1998-04-06 14:29:28 +0000422 v, check, panel = self.panels[name]
423 v.set("")
424 panel.pack_forget()
Guido van Rossum06981c31997-01-31 18:58:12 +0000425
426 def checkpanel(self):
Guido van Rossum986abac1998-04-06 14:29:28 +0000427 for name in self.panelnames:
428 v, check, panel = self.panels[name]
429 panel.pack_forget()
430 for name in self.panelnames:
431 v, check, panel = self.panels[name]
432 if v.get():
433 panel.pack(expand=1, fill=BOTH)
Guido van Rossum06981c31997-01-31 18:58:12 +0000434
435
436if __name__ == '__main__':
437 main()