PageRenderTime 69ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/madcow/modules/factoids.py

http://madcow.googlecode.com/
Python | 469 lines | 469 code | 0 blank | 0 comment | 1 complexity | 9a5f9d82f8a6595b3aeb810db9f88a3f MD5 | raw file
Possible License(s): GPL-3.0
  1. """Emulate Perl InfoBot's factoid feature"""
  2. from madcow.util import Module
  3. import re
  4. from re import I
  5. import os
  6. import random
  7. from madcow.util import encoding
  8. from madcow.util.text import *
  9. try:
  10. import dbm
  11. except ImportError:
  12. import anydbm as dbm
  13. class Factoids(object):
  14. """
  15. This is a straight port of infobot.pl factoid handling.
  16. yes, this code is totally ridiculous, but it works pretty well. :P
  17. """
  18. # precompiled regex for do_question
  19. _qwords = u'what|where|who'
  20. _normalizations = (
  21. (r'^\S+\s*[:-]+\s*', u''),
  22. (r'^hey\s*[-,.: ]+\s*', u''),
  23. (r'whois', u'who is'),
  24. (r'where can i find', u'where is'),
  25. (r'\bhow about\b', u'where is'),
  26. (r'\bda\b', u'the'),
  27. (r'^([gj]ee+z*|boy|golly|gosh)\s*[-,. ]+\s*', u''),
  28. (r'^(well|and|but|or|yes)\s*[-,. ]+\s*', u''),
  29. (r'^(does\s+)?(any|ne)\s*(1|one|body)\s+know\s+', u''),
  30. (r'^[uh]+m*\s*[-,. ]+\s*', u''),
  31. (r'^o+[hk]+(a+y+)?\s*[-,. ]+\s*', u''),
  32. (r'^w(ow|hee+|o+ho+)+\s*[,. ]+\s*', u''),
  33. (r'^(still|well)\s*,\s*', u''),
  34. (r'^(stupid\s+)?question\s*[:-]+\s*', u''),
  35. (r'(?:^| )(%s)\s+(.*)\s+(is|are)(?: |$)' % _qwords, r' \1 \3 \2 '),
  36. (r'(?:^| )(%s)\s+(\S+)\s+(is|are)(?: |$)' % _qwords, r' \1 \3 \2 '),
  37. (r'be tellin\'?g?', r'tell'),
  38. (r" '?bout", r' about'),
  39. (r',? any(hoo?w?|ways?)', r' '),
  40. (r',?\s*(pretty )*please\??\s*$', r'?'),
  41. (r'th(e|at|is) (((m(o|u)th(a|er) ?)?fuck(in\'?g?)?|hell|heck|(god-?)?'
  42. r'damn?(ed)?) ?)+', r''),
  43. (r'\bw+t+f+\b', r'where'),
  44. (r'this (.*) thingy?', r' \1'),
  45. (r'this thingy? (called )?', r''),
  46. (r'ha(s|ve) (an?y?|some|ne) (idea|clue|guess|seen) ', r'know '),
  47. (r'does (any|ne|some) ?(1|one|body) know ', r''),
  48. (r'do you know ', r''),
  49. (r'can (you|u|((any|ne|some) ?(1|one|body)))( please)? tell (me|us|hi'
  50. r'm|her)', r''),
  51. (r'where (\S+) can \S+ (a|an|the)?', r''),
  52. (r'(can|do) (i|you|one|we|he|she) (find|get)( this)?', r'is'),
  53. (r'(i|one|we|he|she) can (find|get)', r'is'),
  54. (r'(the )?(address|url) (for|to) ', r''),
  55. (r'(where is )+', r'where is '),
  56. (r"(?:^| )(%s)'?s(?: |$)" % _qwords, r' \1 is '),
  57. )
  58. _normalizations = [(re.compile(x, I), y) for x, y in _normalizations]
  59. _tell = r'^tell\s+(\S+)\s+'
  60. _tell1 = re.compile(_tell + r'about[: ]+(.+)', I)
  61. _tell2 = re.compile(_tell + r'where\s+(?:\S+)\s+can\s+(?:\S+)\s+(.+)', I)
  62. _tell3 = re.compile(_tell + r'(%s)\s+(.*?)\s+(is|are)[.?!]*$' % _qwords, I)
  63. _qmark = re.compile(r'\s*[?!]*\?[?!1]*\s*$')
  64. _endpunc = re.compile(r'\s*[.?!]+\s*$')
  65. _normalize_names = [
  66. (r'(^|\W)WHOs\s+', r"\1NICK's ", False),
  67. (r'(^|\W)WHOs$', r"\1NICK's", False),
  68. (r"(^|\W)WHO'(\s|$)", r"\1NICK's\2", False),
  69. (r"(^|\s)i'm(\W|$)", r'\1NICK is\2', False),
  70. (r"(^|\s)i've(\W|$)", r'\1NICK has\2', False),
  71. (r'(^|\s)i have(\W|$)', r'\1NICK has\2', False),
  72. (r"(^|\s)i haven'?t(\W|$)", r'\1NICK has not\2', False),
  73. (r'(^|\s)i(\W|$)', r'\1NICK\2', False),
  74. (r' am\b', r' is', False),
  75. (r'\bam ', r'is', False),
  76. (r'yourself', r'BOTNICK', True),
  77. (r'(^|\s)(me|myself)(\W|$)', r'\1NICK\3', False),
  78. (r'(^|\s)my(\W|$)', r'\1NICK\'s\2', False),
  79. (r"(^|\W)you'?re(\W|$)", r'\1you are\2', False),
  80. (r'(^|\W)are you(\W|$)', r'\1is BOTNICK\2', True),
  81. (r'(^|\W)you are(\W|$)', r'\1BOTNICK is\2', True),
  82. (r'(^|\W)you(\W|$)', r'\1BOTNICK\2', True),
  83. (r'(^|\W)your(\W|$)', r"\1BOTNICK's\2", True),
  84. ]
  85. _whereat = re.compile(r'\s+at$', I)
  86. _qword = re.compile(r'^(?:(%s)\s+)?(.+)$' % _qwords)
  87. _literal = re.compile(r'^\s*literal\s+', I)
  88. _verbs = (u'is', u'are')
  89. _get_verb = re.compile(r'^.*?(is|are)\s+(?:(an?|the)\s+)?(.+)\s*$')
  90. _results = re.compile(r'\s*\|\s*')
  91. _isreply = re.compile(r'^\s*<reply>\s*', I)
  92. _reply_formats = (
  93. u'KEY is RESULT',
  94. u'i think KEY is RESULT',
  95. u'hmmm... KEY is RESULT',
  96. u'it has been said that KEY is RESULT',
  97. u'KEY is probably RESULT',
  98. u'rumour has it KEY is RESULT',
  99. u'i heard KEY was RESULT',
  100. u'somebody said KEY was RESULT',
  101. u'i guess KEY is RESULT',
  102. u'well, KEY is RESULT',
  103. u'KEY is, like, RESULT',
  104. )
  105. _unknown = (
  106. u"i don't know",
  107. u"i wish i knew",
  108. u"i haven't a clue",
  109. u"no idea",
  110. u"bugger all, i dunno",
  111. )
  112. _unknown_format = (
  113. u'NICK: RESULT',
  114. u'RESULT, NICK',
  115. )
  116. # precompiled regex for do_statement
  117. _normalize_statements = [
  118. (r'\bi am\b', u'NICK is', False),
  119. (r'\bmy\b', u"NICK's", False),
  120. (r'\byour\b', u"BOTNICK's", False),
  121. (r'\byou are\b', u'BOTNICK is', True),
  122. (r'^no\s*,\s*', u'', False),
  123. (r'^i\s+(heard|think)\s+', u'', False),
  124. (r'^some(one|1|body)\s+said\s+', u'', False),
  125. (r'\s+', u' ', False),
  126. ]
  127. _get_st_verb = re.compile(r'^(.*?)\b(is|are)\b(.*?)$', I)
  128. _article = re.compile(r'^(the|da|an?)\s+')
  129. _maxkey = 50
  130. _maxval = 325
  131. _st_qwords = u'who what where when why how'.split()
  132. _st_fails = [
  133. re.compile(r'^(who|what|when|where|why|how|it) '),
  134. re.compile(r'^(this|that|these|those|they|you) '),
  135. re.compile(r'^(every(one|body)|we) '),
  136. re.compile(r'^\s*\*'),
  137. re.compile(r'^\s*<+[-=]+'),
  138. re.compile(r'^[\[<\(]\w+[\]>\)]'),
  139. re.compile(r'^heya?,? '),
  140. re.compile(r'^\s*th(is|at|ere|ese|ose|ey)'),
  141. re.compile(r'^\s*it\'?s?\W'),
  142. re.compile(r'^\s*if '),
  143. re.compile(r'^\s*how\W'),
  144. re.compile(r'^\s*why\W'),
  145. re.compile(r'^\s*h(is|er) '),
  146. re.compile(r'^\s*\D[\d\w]*\.{2,}'),
  147. re.compile(r'^\s*so is'),
  148. re.compile(r'^\s*s+o+r+[ye]+\b'),
  149. re.compile(r'^\s*supposedly'),
  150. re.compile(r'^all '),
  151. ]
  152. _also_or = re.compile(r'\s*\|\s*')
  153. _also = re.compile(r'^also\s+')
  154. _forget = re.compile(r'^forget\s+((an?|the)\s+)?', I)
  155. _replace = re.compile(r'^\s*(.+?)\s*=~\s*s/(.+?)/(.*?)/\s*$')
  156. def __init__(self, parent):
  157. self.parent = parent
  158. # DBM functions
  159. def get_dbm(self, dbname):
  160. dbfile = os.path.join(self.parent.madcow.base, 'db', dbname.lower())
  161. return dbm.open(dbfile, u'c', 0640)
  162. def get(self, dbname, key):
  163. dbm = self.get_dbm(dbname)
  164. try:
  165. key = encode(key.lower())
  166. val = dbm.get(key)
  167. if isinstance(val, str):
  168. val = decode(val)
  169. return val
  170. finally:
  171. dbm.close()
  172. def set(self, dbname, key, val):
  173. dbm = self.get_dbm(dbname)
  174. try:
  175. key = encode(key.lower())
  176. val = encode(val)
  177. dbm[key] = val
  178. finally:
  179. dbm.close()
  180. def unset(self, dbname, key):
  181. dbm = self.get_dbm(dbname)
  182. try:
  183. key = encode(key.lower())
  184. if dbm.has_key(key):
  185. del dbm[key]
  186. return True
  187. return False
  188. finally:
  189. dbm.close()
  190. def parse(self, message, nick, req):
  191. for func in (self.do_replace, self.do_forget, self.do_question,
  192. self.do_statement):
  193. result = func(message, nick, req)
  194. if result:
  195. return result
  196. def do_question(self, message, nick, req):
  197. addressed = req.addressed
  198. # message normalizations
  199. message = message.strip()
  200. for norm, replacement in self._normalizations:
  201. message = norm.sub(replacement, message)
  202. # parse syntax for instructing bot to speak to someone else
  203. try:
  204. target, tell_obj = self._tell1.search(message).groups()
  205. except:
  206. try:
  207. target, tell_obj = self._tell2.search(message).groups()
  208. except:
  209. try:
  210. target, q, tell_obj, verb = \
  211. self._tell3.search(message).groups()
  212. tell_obj = u' '.join([q, verb, tell_obj])
  213. except:
  214. target = tell_obj = None
  215. if tell_obj:
  216. message = self._endpunc.sub(u'', tell_obj)
  217. if not target or target.lower() == u'me':
  218. target = nick
  219. elif target.lower() == u'us':
  220. target = None
  221. message, final_qmark = self._qmark.subn(u'', message)
  222. message = self._endpunc.sub(u'', message)
  223. # switchPerson from infobot.pl
  224. if target:
  225. who = target
  226. else:
  227. who = nick
  228. who = re.escape(who).lower()[:9].split()[0]
  229. botnick = self.parent.madcow.botname()
  230. # callback to interpolate the dynamic regexes
  231. interpolate = lambda x: x.replace(u'WHO', who).replace(
  232. u'BOTNICK', botnick).replace(u'NICK', nick)
  233. for norm, replacement, need_addressing in self._normalize_names:
  234. if need_addressing and not addressed:
  235. continue
  236. norm = interpolate(norm)
  237. replacement = interpolate(replacement)
  238. message = re.sub(norm, replacement, message)
  239. # this has to come after the punctuation check, i guess..
  240. message = self._whereat.sub(u'', message)
  241. # get qword
  242. message = message.strip()
  243. try:
  244. qword, message = self._qword.search(message).groups()
  245. except:
  246. qword = None
  247. if not qword and final_qmark and addressed:
  248. qword = u'where'
  249. # literal request?
  250. message, literal = self._literal.subn(u'', message)
  251. # if no verb specified, try both dbs for direct match?
  252. result = None
  253. for dbname in self._verbs:
  254. result = self.get(dbname, message)
  255. if result:
  256. verb = dbname
  257. key = message
  258. break
  259. # that didnu't work, let's try this..
  260. if not result and qword:
  261. try:
  262. verb, keymod, key = self._get_verb.search(message).groups()
  263. result = self.get(verb, key)
  264. if keymod:
  265. key = u'%s %s' % (keymod, key)
  266. except:
  267. pass
  268. # output final result
  269. if result:
  270. if literal:
  271. return u'%s: %s =%s= %s' % (nick, key, verb, result)
  272. result = random.choice(self._results.split(result))
  273. result, short = self._isreply.subn(u'', result)
  274. if not short:
  275. if verb == u'is':
  276. format = random.choice(self._reply_formats)
  277. format = format.replace(u'KEY', key)
  278. format = format.replace(u'RESULT', result)
  279. result = format
  280. else:
  281. result = u'%s %s %s' % (key, verb, result)
  282. result = result.replace(u'$who', nick)
  283. result = result.strip()
  284. # so.. should we really send it or not?
  285. if not final_qmark and not addressed and not tell_obj:
  286. result = None
  287. # did we actually figure this out? if not, say so only if explicit
  288. if qword or final_qmark:
  289. if addressed and not result:
  290. result = random.choice(self._unknown)
  291. format = random.choice(self._unknown_format)
  292. format = format.replace(u'RESULT', result)
  293. format = format.replace(u'NICK', nick)
  294. result = format
  295. # modify output parameters for tells
  296. if result and tell_obj:
  297. result = u'%s wants you to know: %s' % (nick, result)
  298. req.sendto = target
  299. return result
  300. def do_statement(self, message, nick, req):
  301. botnick = self.parent.madcow.botname()
  302. addressed = req.addressed
  303. private = req.private
  304. correction = req.correction
  305. # normalize
  306. message = message.strip()
  307. for norm, replacement, needs_addressing in self._normalize_statements:
  308. if needs_addressing and not addressed:
  309. continue
  310. replacement = replacement.replace(u'BOTNICK', botnick)
  311. replacement = replacement.replace(u'NICK', nick)
  312. message = re.compile(norm, I).sub(replacement, message)
  313. # does this look like a statement?
  314. try:
  315. key, verb, val = self._get_st_verb.search(message).groups()
  316. except:
  317. return
  318. # clean it up
  319. key = key.strip().lower()
  320. key = self._article.sub(u'', key)
  321. key = key[:self._maxkey]
  322. val = val.strip()
  323. val = val[:self._maxval]
  324. # stuff to ignore to prevent storing dumb stuff
  325. if key in self._st_qwords:
  326. return
  327. if not addressed:
  328. try:
  329. for regex in self._st_fails:
  330. if regex.search(key):
  331. raise Exception
  332. except:
  333. return
  334. if not len(key):
  335. return
  336. # update db
  337. val, also = self._also.subn(u'', val)
  338. val, also_or = self._also_or.subn(u'', val)
  339. exists = self.get(verb, key)
  340. if exists == val:
  341. if addressed:
  342. return u'I already had it that way, %s' % nick
  343. else:
  344. return
  345. if exists:
  346. if also:
  347. if also_or:
  348. val = exists + u'|' + val
  349. else:
  350. val = exists + u' or ' + val
  351. elif not correction:
  352. if addressed:
  353. return u'%s: but %s %s %s' % (nick, key, verb, exists)
  354. else:
  355. return
  356. val = val[:self._maxval]
  357. self.set(verb, key, val)
  358. if addressed:
  359. return u'OK, %s' % nick
  360. def do_forget(self, message, nick, req):
  361. try:
  362. key, forget = self._forget.subn(u'', message)
  363. except:
  364. forget = 0
  365. if not forget:
  366. return
  367. key = self._endpunc.sub(u'', key).strip()
  368. # message normalizations
  369. for norm, replacement in self._normalizations:
  370. key = norm.sub(replacement, key)
  371. # remove
  372. found = False
  373. for dbname in self._verbs:
  374. if self.get(dbname, key):
  375. self.unset(dbname, key)
  376. found = True
  377. # respond
  378. if found:
  379. return u'%s: I forgot %s' % (nick, key)
  380. else:
  381. return u"%s, I didn't find anything matching %s" % (nick, key)
  382. def do_replace(self, message, nick, req):
  383. try:
  384. key, orig, new = self._replace.search(message).groups()
  385. except:
  386. return
  387. found = None
  388. for dbname in self._verbs:
  389. val = self.get(dbname, key)
  390. if val:
  391. found = val
  392. break
  393. if not found:
  394. return u'%s: no entry in db for %s' % (nick, repr(key))
  395. if orig not in val:
  396. return u'%s: entry found, but %s is not in it' % (nick, repr(orig))
  397. val = val.replace(orig, new)
  398. self.set(dbname, key, val)
  399. return u'OK, %s' % nick
  400. class Main(Module):
  401. pattern = Module._any
  402. require_addressing = False
  403. priority = 99
  404. allow_threading = False
  405. terminate = False
  406. help = u'\n'.join([
  407. 'this module picks up factoids using natural language parsing. try stuff like:',
  408. 'foo is bar.',
  409. 'foo is also baz.',
  410. 'what is foo?',
  411. 'forget foo',
  412. ])
  413. def init(self):
  414. self.factoids = Factoids(parent=self)
  415. def response(self, nick, args, kwargs):
  416. result = self.factoids.parse(args[0], nick, kwargs[u'req'])
  417. return encoding.convert(result)