PageRenderTime 35ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/fathead/UNCLEAN/parse_cplusplus.py

http://github.com/duckduckgo/zeroclickinfo-fathead
Python | 38 lines | 35 code | 3 blank | 0 comment | 0 complexity | 4c2a9a7272fcb6dbca3aea979ed77493 MD5 | raw file
Possible License(s): Apache-2.0
  1. from BeautifulSoup import BeautifulSoup
  2. import re
  3. import os
  4. import sys
  5. import MySQLdb
  6. conn = MySQLdb.connect(user='root')
  7. openclosetags = re.compile('''<.*?>|</.*?>''',re.DOTALL)
  8. files = []
  9. for file in os.listdir('./docs/cplusplus/'):
  10. files.append('./docs/cplusplus/%s'%(file))
  11. #http://www.cplusplus.com/reference/
  12. for file in files:
  13. filecontents = open(file).read()
  14. soup = BeautifulSoup(filecontents)
  15. for node in soup.findAll("div",{"class":re.compile(r'\btype-post\b')}):
  16. name = openclosetags.sub('',str(node.findAll("div","post-title")[0]))
  17. desc = openclosetags.sub('',str(node.findAll("div","p-con")[0].findAll('p')[0]))
  18. s = node.findAll("div","wp_syntax")[0].findAll('pre')
  19. synopsis = ''
  20. if len(s) == 1:
  21. synopsis = openclosetags.sub('',str(s[0]))
  22. else:
  23. synopsis = openclosetags.sub('',str(s[1]))
  24. url = node.findAll('a')[0]['href']
  25. if len(sys.argv) == 1 or sys.argv[1].lower() == 'tsv':
  26. print "%s\t%s\t%s\t%s\t%s"%(name,url,desc,synopsis,desc)
  27. if sys.argv[1].lower() == 'sql':
  28. print '''INSERT INTO functions (`id`, `name`, `namespace`, `url`, `description`, `synopsis`, `detail`, `type`, `lang`) VALUES (NULL, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');'''%(name,'',url,conn.escape_string(desc),conn.escape_string(synopsis),'','stuntsnippets','')