/Tools/webchecker/wcgui.py

http://unladen-swallow.googlecode.com/ · Python · 462 lines · 387 code · 52 blank · 23 comment · 62 complexity · b10e8411bfae872995c4df89e947aafd MD5 · raw file

  1. #! /usr/bin/env python
  2. """GUI interface to webchecker.
  3. This works as a Grail applet too! E.g.
  4. <APPLET CODE=wcgui.py NAME=CheckerWindow></APPLET>
  5. Checkpoints are not (yet??? ever???) supported.
  6. User interface:
  7. Enter a root to check in the text entry box. To enter more than one root,
  8. enter them one at a time and press <Return> for each one.
  9. Command buttons Start, Stop and "Check one" govern the checking process in
  10. the obvious way. Start and "Check one" also enter the root from the text
  11. entry box if one is present. There's also a check box (enabled by default)
  12. to decide whether actually to follow external links (since this can slow
  13. the checking down considerably). Finally there's a Quit button.
  14. A series of checkbuttons determines whether the corresponding output panel
  15. is shown. List panels are also automatically shown or hidden when their
  16. status changes between empty to non-empty. There are six panels:
  17. Log -- raw output from the checker (-v, -q affect this)
  18. To check -- links discovered but not yet checked
  19. Checked -- links that have been checked
  20. Bad links -- links that failed upon checking
  21. Errors -- pages containing at least one bad link
  22. Details -- details about one URL; double click on a URL in any of
  23. the above list panels (not in Log) will show details
  24. for that URL
  25. Use your window manager's Close command to quit.
  26. Command line options:
  27. -m bytes -- skip HTML pages larger than this size (default %(MAXPAGE)d)
  28. -q -- quiet operation (also suppresses external links report)
  29. -v -- verbose operation; repeating -v will increase verbosity
  30. -t root -- specify root dir which should be treated as internal (can repeat)
  31. -a -- don't check name anchors
  32. Command line arguments:
  33. rooturl -- URL to start checking
  34. (default %(DEFROOT)s)
  35. XXX The command line options (-m, -q, -v) should be GUI accessible.
  36. XXX The roots should be visible as a list (?).
  37. XXX The multipanel user interface is clumsy.
  38. """
  39. # ' Emacs bait
  40. import sys
  41. import getopt
  42. from Tkinter import *
  43. import tktools
  44. import webchecker
  45. # Override some for a weaker platform
  46. if sys.platform == 'mac':
  47. webchecker.DEFROOT = "http://grail.cnri.reston.va.us/"
  48. webchecker.MAXPAGE = 50000
  49. webchecker.verbose = 4
  50. def main():
  51. try:
  52. opts, args = getopt.getopt(sys.argv[1:], 't:m:qva')
  53. except getopt.error, msg:
  54. sys.stdout = sys.stderr
  55. print msg
  56. print __doc__%vars(webchecker)
  57. sys.exit(2)
  58. webchecker.verbose = webchecker.VERBOSE
  59. webchecker.nonames = webchecker.NONAMES
  60. webchecker.maxpage = webchecker.MAXPAGE
  61. extra_roots = []
  62. for o, a in opts:
  63. if o == '-m':
  64. webchecker.maxpage = int(a)
  65. if o == '-q':
  66. webchecker.verbose = 0
  67. if o == '-v':
  68. webchecker.verbose = webchecker.verbose + 1
  69. if o == '-t':
  70. extra_roots.append(a)
  71. if o == '-a':
  72. webchecker.nonames = not webchecker.nonames
  73. root = Tk(className='Webchecker')
  74. root.protocol("WM_DELETE_WINDOW", root.quit)
  75. c = CheckerWindow(root)
  76. c.setflags(verbose=webchecker.verbose, maxpage=webchecker.maxpage,
  77. nonames=webchecker.nonames)
  78. if args:
  79. for arg in args[:-1]:
  80. c.addroot(arg)
  81. c.suggestroot(args[-1])
  82. # Usually conditioned on whether external links
  83. # will be checked, but since that's not a command
  84. # line option, just toss them in.
  85. for url_root in extra_roots:
  86. # Make sure it's terminated by a slash,
  87. # so that addroot doesn't discard the last
  88. # directory component.
  89. if url_root[-1] != "/":
  90. url_root = url_root + "/"
  91. c.addroot(url_root, add_to_do = 0)
  92. root.mainloop()
  93. class CheckerWindow(webchecker.Checker):
  94. def __init__(self, parent, root=webchecker.DEFROOT):
  95. self.__parent = parent
  96. self.__topcontrols = Frame(parent)
  97. self.__topcontrols.pack(side=TOP, fill=X)
  98. self.__label = Label(self.__topcontrols, text="Root URL:")
  99. self.__label.pack(side=LEFT)
  100. self.__rootentry = Entry(self.__topcontrols, width=60)
  101. self.__rootentry.pack(side=LEFT)
  102. self.__rootentry.bind('<Return>', self.enterroot)
  103. self.__rootentry.focus_set()
  104. self.__controls = Frame(parent)
  105. self.__controls.pack(side=TOP, fill=X)
  106. self.__running = 0
  107. self.__start = Button(self.__controls, text="Run", command=self.start)
  108. self.__start.pack(side=LEFT)
  109. self.__stop = Button(self.__controls, text="Stop", command=self.stop,
  110. state=DISABLED)
  111. self.__stop.pack(side=LEFT)
  112. self.__step = Button(self.__controls, text="Check one",
  113. command=self.step)
  114. self.__step.pack(side=LEFT)
  115. self.__cv = BooleanVar(parent)
  116. self.__cv.set(self.checkext)
  117. self.__checkext = Checkbutton(self.__controls, variable=self.__cv,
  118. command=self.update_checkext,
  119. text="Check nonlocal links",)
  120. self.__checkext.pack(side=LEFT)
  121. self.__reset = Button(self.__controls, text="Start over", command=self.reset)
  122. self.__reset.pack(side=LEFT)
  123. if __name__ == '__main__': # No Quit button under Grail!
  124. self.__quit = Button(self.__controls, text="Quit",
  125. command=self.__parent.quit)
  126. self.__quit.pack(side=RIGHT)
  127. self.__status = Label(parent, text="Status: initial", anchor=W)
  128. self.__status.pack(side=TOP, fill=X)
  129. self.__checking = Label(parent, text="Idle", anchor=W)
  130. self.__checking.pack(side=TOP, fill=X)
  131. self.__mp = mp = MultiPanel(parent)
  132. sys.stdout = self.__log = LogPanel(mp, "Log")
  133. self.__todo = ListPanel(mp, "To check", self, self.showinfo)
  134. self.__done = ListPanel(mp, "Checked", self, self.showinfo)
  135. self.__bad = ListPanel(mp, "Bad links", self, self.showinfo)
  136. self.__errors = ListPanel(mp, "Pages w/ bad links", self, self.showinfo)
  137. self.__details = LogPanel(mp, "Details")
  138. self.root_seed = None
  139. webchecker.Checker.__init__(self)
  140. if root:
  141. root = str(root).strip()
  142. if root:
  143. self.suggestroot(root)
  144. self.newstatus()
  145. def reset(self):
  146. webchecker.Checker.reset(self)
  147. for p in self.__todo, self.__done, self.__bad, self.__errors:
  148. p.clear()
  149. if self.root_seed:
  150. self.suggestroot(self.root_seed)
  151. def suggestroot(self, root):
  152. self.__rootentry.delete(0, END)
  153. self.__rootentry.insert(END, root)
  154. self.__rootentry.select_range(0, END)
  155. self.root_seed = root
  156. def enterroot(self, event=None):
  157. root = self.__rootentry.get()
  158. root = root.strip()
  159. if root:
  160. self.__checking.config(text="Adding root "+root)
  161. self.__checking.update_idletasks()
  162. self.addroot(root)
  163. self.__checking.config(text="Idle")
  164. try:
  165. i = self.__todo.items.index(root)
  166. except (ValueError, IndexError):
  167. pass
  168. else:
  169. self.__todo.list.select_clear(0, END)
  170. self.__todo.list.select_set(i)
  171. self.__todo.list.yview(i)
  172. self.__rootentry.delete(0, END)
  173. def start(self):
  174. self.__start.config(state=DISABLED, relief=SUNKEN)
  175. self.__stop.config(state=NORMAL)
  176. self.__step.config(state=DISABLED)
  177. self.enterroot()
  178. self.__running = 1
  179. self.go()
  180. def stop(self):
  181. self.__stop.config(state=DISABLED, relief=SUNKEN)
  182. self.__running = 0
  183. def step(self):
  184. self.__start.config(state=DISABLED)
  185. self.__step.config(state=DISABLED, relief=SUNKEN)
  186. self.enterroot()
  187. self.__running = 0
  188. self.dosomething()
  189. def go(self):
  190. if self.__running:
  191. self.__parent.after_idle(self.dosomething)
  192. else:
  193. self.__checking.config(text="Idle")
  194. self.__start.config(state=NORMAL, relief=RAISED)
  195. self.__stop.config(state=DISABLED, relief=RAISED)
  196. self.__step.config(state=NORMAL, relief=RAISED)
  197. __busy = 0
  198. def dosomething(self):
  199. if self.__busy: return
  200. self.__busy = 1
  201. if self.todo:
  202. l = self.__todo.selectedindices()
  203. if l:
  204. i = l[0]
  205. else:
  206. i = 0
  207. self.__todo.list.select_set(i)
  208. self.__todo.list.yview(i)
  209. url = self.__todo.items[i]
  210. self.__checking.config(text="Checking "+self.format_url(url))
  211. self.__parent.update()
  212. self.dopage(url)
  213. else:
  214. self.stop()
  215. self.__busy = 0
  216. self.go()
  217. def showinfo(self, url):
  218. d = self.__details
  219. d.clear()
  220. d.put("URL: %s\n" % self.format_url(url))
  221. if self.bad.has_key(url):
  222. d.put("Error: %s\n" % str(self.bad[url]))
  223. if url in self.roots:
  224. d.put("Note: This is a root URL\n")
  225. if self.done.has_key(url):
  226. d.put("Status: checked\n")
  227. o = self.done[url]
  228. elif self.todo.has_key(url):
  229. d.put("Status: to check\n")
  230. o = self.todo[url]
  231. else:
  232. d.put("Status: unknown (!)\n")
  233. o = []
  234. if (not url[1]) and self.errors.has_key(url[0]):
  235. d.put("Bad links from this page:\n")
  236. for triple in self.errors[url[0]]:
  237. link, rawlink, msg = triple
  238. d.put(" HREF %s" % self.format_url(link))
  239. if self.format_url(link) != rawlink: d.put(" (%s)" %rawlink)
  240. d.put("\n")
  241. d.put(" error %s\n" % str(msg))
  242. self.__mp.showpanel("Details")
  243. for source, rawlink in o:
  244. d.put("Origin: %s" % source)
  245. if rawlink != self.format_url(url):
  246. d.put(" (%s)" % rawlink)
  247. d.put("\n")
  248. d.text.yview("1.0")
  249. def setbad(self, url, msg):
  250. webchecker.Checker.setbad(self, url, msg)
  251. self.__bad.insert(url)
  252. self.newstatus()
  253. def setgood(self, url):
  254. webchecker.Checker.setgood(self, url)
  255. self.__bad.remove(url)
  256. self.newstatus()
  257. def newlink(self, url, origin):
  258. webchecker.Checker.newlink(self, url, origin)
  259. if self.done.has_key(url):
  260. self.__done.insert(url)
  261. elif self.todo.has_key(url):
  262. self.__todo.insert(url)
  263. self.newstatus()
  264. def markdone(self, url):
  265. webchecker.Checker.markdone(self, url)
  266. self.__done.insert(url)
  267. self.__todo.remove(url)
  268. self.newstatus()
  269. def seterror(self, url, triple):
  270. webchecker.Checker.seterror(self, url, triple)
  271. self.__errors.insert((url, ''))
  272. self.newstatus()
  273. def newstatus(self):
  274. self.__status.config(text="Status: "+self.status())
  275. self.__parent.update()
  276. def update_checkext(self):
  277. self.checkext = self.__cv.get()
  278. class ListPanel:
  279. def __init__(self, mp, name, checker, showinfo=None):
  280. self.mp = mp
  281. self.name = name
  282. self.showinfo = showinfo
  283. self.checker = checker
  284. self.panel = mp.addpanel(name)
  285. self.list, self.frame = tktools.make_list_box(
  286. self.panel, width=60, height=5)
  287. self.list.config(exportselection=0)
  288. if showinfo:
  289. self.list.bind('<Double-Button-1>', self.doubleclick)
  290. self.items = []
  291. def clear(self):
  292. self.items = []
  293. self.list.delete(0, END)
  294. self.mp.hidepanel(self.name)
  295. def doubleclick(self, event):
  296. l = self.selectedindices()
  297. if l:
  298. self.showinfo(self.items[l[0]])
  299. def selectedindices(self):
  300. l = self.list.curselection()
  301. if not l: return []
  302. return map(int, l)
  303. def insert(self, url):
  304. if url not in self.items:
  305. if not self.items:
  306. self.mp.showpanel(self.name)
  307. # (I tried sorting alphabetically, but the display is too jumpy)
  308. i = len(self.items)
  309. self.list.insert(i, self.checker.format_url(url))
  310. self.list.yview(i)
  311. self.items.insert(i, url)
  312. def remove(self, url):
  313. try:
  314. i = self.items.index(url)
  315. except (ValueError, IndexError):
  316. pass
  317. else:
  318. was_selected = i in self.selectedindices()
  319. self.list.delete(i)
  320. del self.items[i]
  321. if not self.items:
  322. self.mp.hidepanel(self.name)
  323. elif was_selected:
  324. if i >= len(self.items):
  325. i = len(self.items) - 1
  326. self.list.select_set(i)
  327. class LogPanel:
  328. def __init__(self, mp, name):
  329. self.mp = mp
  330. self.name = name
  331. self.panel = mp.addpanel(name)
  332. self.text, self.frame = tktools.make_text_box(self.panel, height=10)
  333. self.text.config(wrap=NONE)
  334. def clear(self):
  335. self.text.delete("1.0", END)
  336. self.text.yview("1.0")
  337. def put(self, s):
  338. self.text.insert(END, s)
  339. if '\n' in s:
  340. self.text.yview(END)
  341. def write(self, s):
  342. self.text.insert(END, s)
  343. if '\n' in s:
  344. self.text.yview(END)
  345. self.panel.update()
  346. class MultiPanel:
  347. def __init__(self, parent):
  348. self.parent = parent
  349. self.frame = Frame(self.parent)
  350. self.frame.pack(expand=1, fill=BOTH)
  351. self.topframe = Frame(self.frame, borderwidth=2, relief=RAISED)
  352. self.topframe.pack(fill=X)
  353. self.botframe = Frame(self.frame)
  354. self.botframe.pack(expand=1, fill=BOTH)
  355. self.panelnames = []
  356. self.panels = {}
  357. def addpanel(self, name, on=0):
  358. v = StringVar(self.parent)
  359. if on:
  360. v.set(name)
  361. else:
  362. v.set("")
  363. check = Checkbutton(self.topframe, text=name,
  364. offvalue="", onvalue=name, variable=v,
  365. command=self.checkpanel)
  366. check.pack(side=LEFT)
  367. panel = Frame(self.botframe)
  368. label = Label(panel, text=name, borderwidth=2, relief=RAISED, anchor=W)
  369. label.pack(side=TOP, fill=X)
  370. t = v, check, panel
  371. self.panelnames.append(name)
  372. self.panels[name] = t
  373. if on:
  374. panel.pack(expand=1, fill=BOTH)
  375. return panel
  376. def showpanel(self, name):
  377. v, check, panel = self.panels[name]
  378. v.set(name)
  379. panel.pack(expand=1, fill=BOTH)
  380. def hidepanel(self, name):
  381. v, check, panel = self.panels[name]
  382. v.set("")
  383. panel.pack_forget()
  384. def checkpanel(self):
  385. for name in self.panelnames:
  386. v, check, panel = self.panels[name]
  387. panel.pack_forget()
  388. for name in self.panelnames:
  389. v, check, panel = self.panels[name]
  390. if v.get():
  391. panel.pack(expand=1, fill=BOTH)
  392. if __name__ == '__main__':
  393. main()