PageRenderTime 50ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/fathead/UNCLEAN/parse_mysqlfunction.py

http://github.com/duckduckgo/zeroclickinfo-fathead
Python | 49 lines | 48 code | 1 blank | 0 comment | 0 complexity | ee9fb4d10813b6cf1dae88bcf9b794ec MD5 | raw file
Possible License(s): Apache-2.0
  1. from BeautifulSoup import BeautifulSoup
  2. import re
  3. import os
  4. import sys
  5. import string
  6. openclosediv = re.compile('''<div class="itemizedlist">.*?</div>|<div class="orderedlist">.*?</div>''',re.DOTALL)
  7. openclosetags = re.compile('''<.*?>|</.*?>''',re.DOTALL)
  8. spaces = re.compile('''\s+''',re.DOTALL)
  9. files = []
  10. for file in os.listdir('./docs/mysql/functions/'):
  11. if 'functions' in file and '.html' in file:
  12. files.append('./docs/mysql/functions/%s'%(file))
  13. for file in files:
  14. filecontents = ''
  15. for line in open(file):
  16. line = ''.join(filter(lambda x:x in string.printable, line))
  17. filecontents = "%s %s"%(filecontents,line.strip())
  18. soup = BeautifulSoup(filecontents)
  19. t = soup.findAll(attrs={"class" : "itemizedlist"})[0]
  20. t = openclosediv.sub('',str(t.contents[0]))
  21. t = BeautifulSoup(t)
  22. for li in t.findAll('li'):
  23. name = openclosetags.sub('',str(li.findAll('p')[0])).strip()
  24. desc = openclosetags.sub('',str(li.findAll('p')[1])).strip()
  25. synopsis = ''
  26. for a in li.findAll('a'):
  27. try:
  28. url = a['href']
  29. break
  30. except:
  31. pass
  32. pre = li.findAll('pre')
  33. if len(pre) != 0:
  34. synopsis = openclosetags.sub('',str(pre[0])).replace('mysql&gt;',"\r\nmysql>").replace('','').strip()
  35. if len(sys.argv) == 1 or sys.argv[1].lower() == 'tsv':
  36. print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"%(name,'',url,desc,synopsis.replace("\r\n","__NEWLINE__"),'','mysqlfunction','en')
  37. if sys.argv[1].lower() == 'sql':
  38. print '''INSERT INTO functions (`id`, `name`, `namespace`, `url`, `description`, `synopsis`, `detail`, `type`, `lang`) VALUES (NULL, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');'''%(name.replace("'","\\'").replace("\\\\","\\"),'',url,desc.replace("'","\\'").replace("\\\\","\\"),synopsis.replace("'","\\'").replace("\\\\","\\"),'','mysqlfunction','en')