/Demo/scripts/newslist.py

http://unladen-swallow.googlecode.com/ · Python · 366 lines · 203 code · 60 blank · 103 comment · 52 complexity · 93bc7112b65da955fba10052fab15ac9 MD5 · raw file

  1. #! /usr/bin/env python
  2. #######################################################################
  3. # Newslist $Revision: 66429 $
  4. #
  5. # Syntax:
  6. # newslist [ -a ]
  7. #
  8. # This is a program to create a directory full of HTML pages
  9. # which between them contain links to all the newsgroups available
  10. # on your server.
  11. #
  12. # The -a option causes a complete list of all groups to be read from
  13. # the server rather than just the ones which have appeared since last
  14. # execution. This recreates the local list from scratch. Use this on
  15. # the first invocation of the program, and from time to time thereafter.
  16. # When new groups are first created they may appear on your server as
  17. # empty groups. By default, empty groups are ignored by the -a option.
  18. # However, these new groups will not be created again, and so will not
  19. # appear in the server's list of 'new groups' at a later date. Hence it
  20. # won't appear until you do a '-a' after some articles have appeared.
  21. #
  22. # I should really keep a list of ignored empty groups and re-check them
  23. # for articles on every run, but I haven't got around to it yet.
  24. #
  25. # This assumes an NNTP news feed.
  26. #
  27. # Feel free to copy, distribute and modify this code for
  28. # non-commercial use. If you make any useful modifications, let me
  29. # know!
  30. #
  31. # (c) Quentin Stafford-Fraser 1994
  32. # fraser@europarc.xerox.com qs101@cl.cam.ac.uk
  33. # #
  34. #######################################################################
  35. import sys,nntplib, string, marshal, time, os, posix, string
  36. #######################################################################
  37. # Check these variables before running! #
  38. # Top directory.
  39. # Filenames which don't start with / are taken as being relative to this.
  40. topdir='/anfs/qsbigdisc/web/html/newspage'
  41. # The name of your NNTP host
  42. # eg.
  43. # newshost = 'nntp-serv.cl.cam.ac.uk'
  44. # or use following to get the name from the NNTPSERVER environment
  45. # variable:
  46. # newshost = posix.environ['NNTPSERVER']
  47. newshost = 'nntp-serv.cl.cam.ac.uk'
  48. # The filename for a local cache of the newsgroup list
  49. treefile = 'grouptree'
  50. # The filename for descriptions of newsgroups
  51. # I found a suitable one at ftp.uu.net in /uunet-info/newgroups.gz
  52. # You can set this to '' if you don't wish to use one.
  53. descfile = 'newsgroups'
  54. # The directory in which HTML pages should be created
  55. # eg.
  56. # pagedir = '/usr/local/lib/html/newspage'
  57. # pagedir = 'pages'
  58. pagedir = topdir
  59. # The html prefix which will refer to this directory
  60. # eg.
  61. # httppref = '/newspage/',
  62. # or leave blank for relative links between pages: (Recommended)
  63. # httppref = ''
  64. httppref = ''
  65. # The name of the 'root' news page in this directory.
  66. # A .html suffix will be added.
  67. rootpage = 'root'
  68. # Set skipempty to 0 if you wish to see links to empty groups as well.
  69. # Only affects the -a option.
  70. skipempty = 1
  71. # pagelinkicon can contain html to put an icon after links to
  72. # further pages. This helps to make important links stand out.
  73. # Set to '' if not wanted, or '...' is quite a good one.
  74. pagelinkicon='... <img src="http://pelican.cl.cam.ac.uk/icons/page.xbm"> '
  75. # ---------------------------------------------------------------------
  76. # Less important personal preferences:
  77. # Sublistsize controls the maximum number of items the will appear as
  78. # an indented sub-list before the whole thing is moved onto a different
  79. # page. The smaller this is, the more pages you will have, but the
  80. # shorter each will be.
  81. sublistsize = 4
  82. # That should be all. #
  83. #######################################################################
  84. for dir in os.curdir, os.environ['HOME']:
  85. rcfile = os.path.join(dir, '.newslistrc.py')
  86. if os.path.exists(rcfile):
  87. print rcfile
  88. execfile(rcfile)
  89. break
  90. from nntplib import NNTP
  91. from stat import *
  92. rcsrev = '$Revision: 66429 $'
  93. rcsrev = string.join(filter(lambda s: '$' not in s, string.split(rcsrev)))
  94. desc = {}
  95. # Make (possibly) relative filenames into absolute ones
  96. treefile = os.path.join(topdir,treefile)
  97. descfile = os.path.join(topdir,descfile)
  98. page = os.path.join(topdir,pagedir)
  99. # First the bits for creating trees ---------------------------
  100. # Addtotree creates/augments a tree from a list of group names
  101. def addtotree(tree, groups):
  102. print 'Updating tree...'
  103. for i in groups:
  104. parts = string.splitfields(i,'.')
  105. makeleaf(tree, parts)
  106. # Makeleaf makes a leaf and the branch leading to it if necessary
  107. def makeleaf(tree,path):
  108. j = path[0]
  109. l = len(path)
  110. if not tree.has_key(j):
  111. tree[j] = {}
  112. if l == 1:
  113. tree[j]['.'] = '.'
  114. if l > 1:
  115. makeleaf(tree[j],path[1:])
  116. # Then the bits for outputting trees as pages ----------------
  117. # Createpage creates an HTML file named <root>.html containing links
  118. # to those groups beginning with <root>.
  119. def createpage(root, tree, p):
  120. filename = os.path.join(pagedir,root+'.html')
  121. if root == rootpage:
  122. detail = ''
  123. else:
  124. detail = ' under ' + root
  125. f = open(filename,'w')
  126. # f.write('Content-Type: text/html\n')
  127. f.write('<TITLE>Newsgroups available' + detail + '</TITLE>\n')
  128. f.write('<H1>Newsgroups available' + detail +'</H1>\n')
  129. f.write('<A HREF="'+httppref+rootpage+'.html">Back to top level</A><P>\n')
  130. printtree(f,tree,0,p)
  131. f.write('<I>This page automatically created by \'newslist\' v. '+rcsrev+'.')
  132. f.write(time.ctime(time.time()) + '</I><P>')
  133. f.close()
  134. # Printtree prints the groups as a bulleted list. Groups with
  135. # more than <sublistsize> subgroups will be put on a separate page.
  136. # Other sets of subgroups are just indented.
  137. def printtree(f, tree, indent, p):
  138. global desc
  139. l = len(tree)
  140. if l > sublistsize and indent>0:
  141. # Create a new page and a link to it
  142. f.write('<LI><B><A HREF="'+httppref+p[1:]+'.html">')
  143. f.write(p[1:]+'.*')
  144. f.write('</A></B>'+pagelinkicon+'\n')
  145. createpage(p[1:], tree, p)
  146. return
  147. kl = tree.keys()
  148. if l > 1:
  149. kl.sort()
  150. if indent > 0:
  151. # Create a sub-list
  152. f.write('<LI>'+p[1:]+'\n<UL>')
  153. else:
  154. # Create a main list
  155. f.write('<UL>')
  156. indent = indent + 1
  157. for i in kl:
  158. if i == '.':
  159. # Output a newsgroup
  160. f.write('<LI><A HREF="news:' + p[1:] + '">'+ p[1:] + '</A> ')
  161. if desc.has_key(p[1:]):
  162. f.write(' <I>'+desc[p[1:]]+'</I>\n')
  163. else:
  164. f.write('\n')
  165. else:
  166. # Output a hierarchy
  167. printtree(f,tree[i], indent, p+'.'+i)
  168. if l > 1:
  169. f.write('\n</UL>')
  170. # Reading descriptions file ---------------------------------------
  171. # This returns an array mapping group name to its description
  172. def readdesc(descfile):
  173. global desc
  174. desc = {}
  175. if descfile == '':
  176. return
  177. try:
  178. d = open(descfile, 'r')
  179. print 'Reading descriptions...'
  180. except (IOError):
  181. print 'Failed to open description file ' + descfile
  182. return
  183. l = d.readline()
  184. while l != '':
  185. bits = string.split(l)
  186. try:
  187. grp = bits[0]
  188. dsc = string.join(bits[1:])
  189. if len(dsc)>1:
  190. desc[grp] = dsc
  191. except (IndexError):
  192. pass
  193. l = d.readline()
  194. # Check that ouput directory exists, ------------------------------
  195. # and offer to create it if not
  196. def checkopdir(pagedir):
  197. if not os.path.isdir(pagedir):
  198. print 'Directory '+pagedir+' does not exist.'
  199. print 'Shall I create it for you? (y/n)'
  200. if sys.stdin.readline()[0] == 'y':
  201. try:
  202. os.mkdir(pagedir,0777)
  203. except:
  204. print 'Sorry - failed!'
  205. sys.exit(1)
  206. else:
  207. print 'OK. Exiting.'
  208. sys.exit(1)
  209. # Read and write current local tree ----------------------------------
  210. def readlocallist(treefile):
  211. print 'Reading current local group list...'
  212. tree = {}
  213. try:
  214. treetime = time.localtime(os.stat(treefile)[ST_MTIME])
  215. except:
  216. print '\n*** Failed to open local group cache '+treefile
  217. print 'If this is the first time you have run newslist, then'
  218. print 'use the -a option to create it.'
  219. sys.exit(1)
  220. treedate = '%02d%02d%02d' % (treetime[0] % 100 ,treetime[1], treetime[2])
  221. try:
  222. dump = open(treefile,'r')
  223. tree = marshal.load(dump)
  224. dump.close()
  225. except (IOError):
  226. print 'Cannot open local group list ' + treefile
  227. return (tree, treedate)
  228. def writelocallist(treefile, tree):
  229. try:
  230. dump = open(treefile,'w')
  231. groups = marshal.dump(tree,dump)
  232. dump.close()
  233. print 'Saved list to '+treefile+'\n'
  234. except:
  235. print 'Sorry - failed to write to local group cache '+treefile
  236. print 'Does it (or its directory) have the correct permissions?'
  237. sys.exit(1)
  238. # Return list of all groups on server -----------------------------
  239. def getallgroups(server):
  240. print 'Getting list of all groups...'
  241. treedate='010101'
  242. info = server.list()[1]
  243. groups = []
  244. print 'Processing...'
  245. if skipempty:
  246. print '\nIgnoring following empty groups:'
  247. for i in info:
  248. grpname = string.split(i[0])[0]
  249. if skipempty and string.atoi(i[1]) < string.atoi(i[2]):
  250. print grpname+' ',
  251. else:
  252. groups.append(grpname)
  253. print '\n'
  254. if skipempty:
  255. print '(End of empty groups)'
  256. return groups
  257. # Return list of new groups on server -----------------------------
  258. def getnewgroups(server, treedate):
  259. print 'Getting list of new groups since start of '+treedate+'...',
  260. info = server.newgroups(treedate,'000001')[1]
  261. print 'got %d.' % len(info)
  262. print 'Processing...',
  263. groups = []
  264. for i in info:
  265. grpname = string.split(i)[0]
  266. groups.append(grpname)
  267. print 'Done'
  268. return groups
  269. # Now the main program --------------------------------------------
  270. def main():
  271. global desc
  272. tree={}
  273. # Check that the output directory exists
  274. checkopdir(pagedir)
  275. try:
  276. print 'Connecting to '+newshost+'...'
  277. if sys.version[0] == '0':
  278. s = NNTP.init(newshost)
  279. else:
  280. s = NNTP(newshost)
  281. connected = 1
  282. except (nntplib.error_temp, nntplib.error_perm), x:
  283. print 'Error connecting to host:', x
  284. print 'I\'ll try to use just the local list.'
  285. connected = 0
  286. # If -a is specified, read the full list of groups from server
  287. if connected and len(sys.argv) > 1 and sys.argv[1] == '-a':
  288. groups = getallgroups(s)
  289. # Otherwise just read the local file and then add
  290. # groups created since local file last modified.
  291. else:
  292. (tree, treedate) = readlocallist(treefile)
  293. if connected:
  294. groups = getnewgroups(s, treedate)
  295. if connected:
  296. addtotree(tree, groups)
  297. writelocallist(treefile,tree)
  298. # Read group descriptions
  299. readdesc(descfile)
  300. print 'Creating pages...'
  301. createpage(rootpage, tree, '')
  302. print 'Done'
  303. if __name__ == "__main__":
  304. main()
  305. # That's all folks
  306. ######################################################################