PageRenderTime 43ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/fathead/UNCLEAN/parse_sqlserverfunction.py

http://github.com/duckduckgo/zeroclickinfo-fathead
Python | 34 lines | 33 code | 1 blank | 0 comment | 0 complexity | 8dd46c8219d3985d829d88c2984d0550 MD5 | raw file
Possible License(s): Apache-2.0
  1. from BeautifulSoup import BeautifulSoup
  2. import re
  3. import os
  4. import sys
  5. import string
  6. openclosetags = re.compile('''<.*?>|</.*?>''',re.DOTALL)
  7. spaces = re.compile('''\s+''',re.DOTALL)
  8. files = []
  9. for file in os.listdir('./docs/sqlserver/functions/'):
  10. if '.html' in file:
  11. files.append('./docs/sqlserver/functions/%s'%(file))
  12. for file in files:
  13. filecontents = ''
  14. for line in open(file):
  15. line = ''.join(filter(lambda x:x in string.printable, line))
  16. filecontents = "%s %s"%(filecontents,line.strip())
  17. soup = BeautifulSoup(filecontents)
  18. name = soup.findAll('h1')[0].string.replace('(Transact-SQL)','')
  19. desc = openclosetags.sub('',str(soup.findAll(attrs={"class" : "introduction"})[0].findAll('p')[0]))
  20. synopsis = soup.findAll(attrs={"class":"LW_CodeSnippetContainerCodeCollection"})[0].findAll('pre')[0].string.strip()
  21. url = "http://msdn.microsoft.com/en-us/library/%s"%(file.replace('./docs/sqlserver/functions/library','').replace('.html',''))
  22. url = url.replace('./docs/sqlserver/functions/','')
  23. if len(sys.argv) == 1 or sys.argv[1].lower() == 'tsv':
  24. print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"%(name,'',url,desc,synopsis,'','sqlserverfunction','en')
  25. if sys.argv[1].lower() == 'sql':
  26. print '''INSERT INTO functions (`id`, `name`, `namespace`, `url`, `description`, `synopsis`, `detail`, `type`, `lang`) VALUES (NULL, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');'''%(name,'',url,desc.replace("'","\\'"),synopsis.replace("'","\\'"),'sql server sqlserver sqlserver2008 2008','sqlserverfunction','en')