PageRenderTime 46ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/fathead/UNCLEAN/parse_perlvars.py

http://github.com/duckduckgo/zeroclickinfo-fathead
Python | 52 lines | 51 code | 1 blank | 0 comment | 3 complexity | c0ebe9ac4c4fbddf76b7dc522525171e MD5 | raw file
Possible License(s): Apache-2.0
  1. from BeautifulSoup import BeautifulSoup
  2. import re
  3. import os
  4. import sys
  5. openclosetags = re.compile('''<.*?>|</.*?>''',re.DOTALL)
  6. files = []
  7. for file in os.listdir('./docs/perl/perlvar/'):
  8. if '.html' in file:
  9. files.append('./docs/perl/perlvar/%s'%(file))
  10. for file in files:
  11. filecontents = ''
  12. for line in open(file):
  13. filecontents = "%s %s"%(filecontents,line.strip())
  14. filecontents = filecontents.replace("'","")
  15. soup = BeautifulSoup(filecontents)
  16. for ul in soup.findAll("ul"):
  17. prevnames = []
  18. for li in ul.findAll('li',recursive=False):
  19. b = li.findAll('b')
  20. p = li.findAll('p')
  21. pre = li.findAll('pre')
  22. name = openclosetags.sub('',str(b[0]))
  23. synopsis = ""
  24. if len(p) == 0:
  25. prevnames.append(name)
  26. else:
  27. desc = openclosetags.sub('',str(p[0]))
  28. if len(pre) != 0:
  29. for l in pre[0].findAll('li'):
  30. synopsis = "%s\r\nb%s"%(synopsis,openclosetags.sub('',str(l)).strip())
  31. synopsis = synopsis.strip()
  32. synopsis = synopsis.replace("\r\n","\n")
  33. url = "http://perldoc.perl.org/perlvar.html#%s"%(li.findAll('a')[0]['name'])
  34. url = url.replace("\\","\\\\")
  35. prevnames.append(name)
  36. for name in prevnames:
  37. if len(sys.argv) == 1 or sys.argv[1].lower() == 'tsv':
  38. print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"%(name,'',url,desc,synopsis.replace("\r\n","__NEWLINE__"),'','perl5var','en')
  39. if sys.argv[1].lower() == 'sql':
  40. name = ' '.join(prevnames)
  41. print '''INSERT INTO functions (`id`, `name`, `namespace`, `url`, `description`, `synopsis`, `detail`, `type`, `lang`) VALUES (NULL, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');'''%(name,'',url,desc,synopsis,'','perl5var','en')
  42. prevnames = []