PageRenderTime 46ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/modules/wiktionary.py

https://github.com/matthazinski/phenny
Python | 114 lines | 95 code | 10 blank | 9 comment | 10 complexity | 3880dbb97bfb5851352aec8d3262f351 MD5 | raw file
  1. #!/usr/bin/env python
  2. """
  3. wiktionary.py - Phenny Wiktionary Module
  4. Copyright 2009, Sean B. Palmer, inamidst.com
  5. Licensed under the Eiffel Forum License 2.
  6. http://inamidst.com/phenny/
  7. """
  8. import re
  9. import web
  10. import json
  11. uri = 'http://en.wiktionary.org/w/index.php?title=%s&printable=yes'
  12. wikiapi = 'http://en.wiktionary.org/w/api.php?action=query&titles={0}&prop=revisions&rvprop=content&format=json'
  13. #r_tag = re.compile(r'<[^>]+>')
  14. r_ul = re.compile(r'(?ims)<ul>.*?</ul>')
  15. r_li = re.compile(r'^# ')
  16. r_img = re.compile(r'\[\[Image:.*\]\]')
  17. r_link1 = re.compile(r'\[\[([A-Za-z0-9\-_ ]+?)\]\]')
  18. r_link2 = re.compile(r'\[\[([A-Za-z0-9\-_ ]+?)\|(.+?)\]\]')
  19. r_context = re.compile(r'{{context\|(.+?)}}')
  20. r_template1 = re.compile(r'{{.+?\|(.+?)}}')
  21. r_template2 = re.compile(r'{{(.+?)}}')
  22. def text(html):
  23. text = r_li.sub('', html).strip()
  24. text = r_img.sub('', text)
  25. text = r_link1.sub(r'\1', text)
  26. text = r_link2.sub(r'\2', text)
  27. text = r_context.sub(r'\1:', text)
  28. text = r_template1.sub(r'\1:', text)
  29. text = r_template2.sub(r'\1:', text)
  30. return text
  31. def wiktionary(word):
  32. bytes = web.get(wikiapi.format(web.quote(word)))
  33. pages = json.loads(bytes)
  34. pages = pages['query']['pages']
  35. pg = next(iter(pages))
  36. try:
  37. result = pages[pg]['revisions'][0]['*']
  38. except KeyError:
  39. return '', ''
  40. mode = None
  41. etymology = None
  42. definitions = {}
  43. for line in result.splitlines():
  44. if line == '===Etymology===':
  45. mode = 'etymology'
  46. elif 'Noun' in line:
  47. mode = 'noun'
  48. elif 'Verb' in line:
  49. mode = 'verb'
  50. elif 'Adjective' in line:
  51. mode = 'adjective'
  52. elif 'Adverb' in line:
  53. mode = 'adverb'
  54. elif 'Interjection' in line:
  55. mode = 'interjection'
  56. elif 'Particle' in line:
  57. mode = 'particle'
  58. elif 'Preposition' in line:
  59. mode = 'preposition'
  60. elif len(line) == 0:
  61. mode = None
  62. elif mode == 'etymology':
  63. etymology = text(line)
  64. elif mode is not None and '#' in line:
  65. definitions.setdefault(mode, []).append(text(line))
  66. if '====Synonyms====' in line:
  67. break
  68. return etymology, definitions
  69. parts = ('preposition', 'particle', 'noun', 'verb',
  70. 'adjective', 'adverb', 'interjection')
  71. def format(word, definitions, number=2):
  72. result = '%s' % word
  73. for part in parts:
  74. if part in definitions:
  75. defs = definitions[part][:number]
  76. result += ' \u2014 ' + ('%s: ' % part)
  77. n = ['%s. %s' % (i + 1, e.strip(' .')) for i, e in enumerate(defs)]
  78. result += ', '.join(n)
  79. return result.strip(' .,')
  80. def w(phenny, input):
  81. if not input.group(2):
  82. return phenny.reply("Nothing to define.")
  83. word = input.group(2)
  84. etymology, definitions = wiktionary(word)
  85. if not definitions:
  86. phenny.say("Couldn't get any definitions for %s." % word)
  87. return
  88. result = format(word, definitions)
  89. if len(result) < 150:
  90. result = format(word, definitions, 3)
  91. if len(result) < 150:
  92. result = format(word, definitions, 5)
  93. if len(result) > 300:
  94. result = result[:295] + '[...]'
  95. phenny.say(result)
  96. w.commands = ['w']
  97. w.example = '.w bailiwick'
  98. if __name__ == '__main__':
  99. print(__doc__.strip())