PageRenderTime 57ms CodeModel.GetById 7ms RepoModel.GetById 0ms app.codeStats 0ms

/jsb/plugs/common/wikipedia.py

https://code.google.com/p/jsonbot/
Python | 104 lines | 86 code | 9 blank | 9 comment | 5 complexity | 1153980d671d4fd2e859b419bdee085c MD5 | raw file
Possible License(s): JSON, MIT
  1. # jsb/plugs/common/wikipedia.py
  2. #
  3. #
  4. """ query wikipedia .. use countrycode to select a country specific wikipedia. """
  5. ## jsb imports
  6. from jsb.utils.url import geturl, striphtml
  7. from jsb.utils.generic import splittxt, handle_exception, fromenc
  8. from jsb.lib.commands import cmnds
  9. from jsb.lib.examples import examples
  10. from jsb.utils.rsslist import rsslist
  11. ## generic imports
  12. from urllib import quote
  13. import re
  14. import logging
  15. ## defines
  16. wikire = re.compile('start content(.*?)end content', re.M)
  17. ## searchwiki function
  18. def searchwiki(txt, lang='en'):
  19. """ parse wiki data. """
  20. input = []
  21. for i in txt.split():
  22. if i.startswith('-'):
  23. if len(i) != 3: continue
  24. else: lang = i[1:]
  25. continue
  26. input.append(i.strip().capitalize())
  27. what = "_".join(input)
  28. url = u'http://%s.wikipedia.org/wiki/Special:Export/%s' % (lang, quote(what.encode('utf-8')))
  29. url2 = u'http://%s.wikipedia.org/wiki/%s' % (lang, quote(what.encode('utf-8')))
  30. txt = getwikidata(url)
  31. if not txt: return ("", url2)
  32. if 'from other capitalisation' in txt:
  33. what = what.title()
  34. url = u'http://%s.wikipedia.org/wiki/Special:Export/%s' % (lang, quote(what.encode('utf-8')))
  35. url2 = u'http://%s.wikipedia.org/wiki/%s' % (lang, quote(what.encode('utf-8')))
  36. txt = getwikidata(url)
  37. if '#REDIRECT' in txt or '#redirect' in txt:
  38. redir = ' '.join(txt.split()[1:])
  39. url = u'http://%s.wikipedia.org/wiki/Special:Export/%s' % (lang, quote(redir.encode('utf-8')))
  40. url2 = u'http://%s.wikipedia.org/wiki/%s' % (lang, quote(redir.encode('utf-8')))
  41. txt = getwikidata(url)
  42. return (txt, url2)
  43. ## getwikidata function
  44. def getwikidata(url):
  45. """ fetch wiki data """
  46. try:
  47. result = geturl(url)
  48. except IOError, ex: logging.error("error fetching %s: %s" % (url, str(ex))) ; return
  49. if not result: return
  50. res = rsslist(result)
  51. txt = ""
  52. for i in res:
  53. try:
  54. logging.debug(unicode(i))
  55. txt = i['text']
  56. break
  57. except: pass
  58. txt = re.sub('\[\[(.*?)\]\]', '<b>\g<1></b>', txt)
  59. txt = re.sub('{{(.*?)}}', '<i>\g<1></i>', txt)
  60. txt = re.sub('==(.*?)==', '<h3>\g<1></h3>', txt)
  61. txt = re.sub('=(.*?)=', '<h2>\g<1></h2>', txt)
  62. txt = re.sub('\*(.*?)\n', '<li>\g<1></li>', txt)
  63. txt = re.sub('\n\n', '<br><br>', txt)
  64. txt = re.sub('\s+', ' ', txt)
  65. txt = txt.replace('|', ' - ')
  66. return txt
  67. ## wikipedia command
  68. resultre1 = re.compile("(<li>.*?</li>)")
  69. resultre2 = re.compile("(<h2>.*?</h2>)")
  70. def handle_wikipedia(bot, ievent):
  71. """ arguments: <searchtxt> ["-"<countrycode>] - search wikipedia, you can provide an optional country code. """
  72. if not ievent.rest: ievent.missing('<searchtxt>') ; return
  73. showall = False
  74. res = searchwiki(ievent.rest)
  75. if not res[0]: ievent.reply('no result found') ; return
  76. prefix = u'%s ===> ' % res[1]
  77. result = resultre1.findall(res[0])
  78. if result:
  79. if bot.type == "sxmpp" and not ievent.groupchat: showall = True
  80. ievent.reply(prefix, result, dot="<br>", showall=showall)
  81. return
  82. result2 = resultre2.findall(res[0])
  83. if result2:
  84. if bot.type == "sxmpp" and not ievent.groupchat: showall = True
  85. ievent.reply(prefix, result2, dot="<br>", showall=showall)
  86. return
  87. else: ievent.reply("no data found on %s" % event.rest)
  88. cmnds.add('wikipedia', handle_wikipedia, ['USER', 'GUEST'])
  89. examples.add('wikipedia', 'search wikipedia for <what>','1) wikipedia bot 2) wikipedia -nl bot')