PageRenderTime 29ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/fathead/UNCLEAN/parse_apache2option.py

http://github.com/duckduckgo/zeroclickinfo-fathead
Python | 39 lines | 38 code | 1 blank | 0 comment | 0 complexity | c069481114f0b6ee0cd38ec40132166c MD5 | raw file
Possible License(s): Apache-2.0
  1. from BeautifulSoup import BeautifulSoup
  2. import re
  3. import os
  4. import sys
  5. import string
  6. openclosetags = re.compile('''<.*?>|</.*?>''',re.DOTALL)
  7. spaces = re.compile('''\s+''',re.DOTALL)
  8. files = []
  9. for file in os.listdir('./docs/apache2/'):
  10. if '.html' in file:
  11. files.append('./docs/apache2/%s'%(file))
  12. for file in files:
  13. filecontents = ''
  14. for line in open(file):
  15. line = ''.join(filter(lambda x:x in string.printable, line))
  16. filecontents = "%s %s"%(filecontents,line.strip())
  17. soup = BeautifulSoup(filecontents)
  18. for dir in soup.findAll(attrs={"class":"directive-section"}):
  19. name = openclosetags.sub('',str(dir.findAll('h2')[0])).strip()
  20. desc = ''
  21. p = dir.findAll('p')
  22. if len(p) == 0:
  23. desc = openclosetags.sub('',str(dir.findAll(attrs={"class":"note"})[0]))
  24. else:
  25. desc = openclosetags.sub('',str(p[0]))
  26. synopsis = openclosetags.sub('',str(dir.findAll('tr')[1].findAll('td')[0]))
  27. url = "http://httpd.apache.org/docs/2.2/mod/%s#%s"%(file.replace('./docs/apache2/',''),dir.findAll('a')[0]['id'])
  28. if len(sys.argv) == 1 or sys.argv[1].lower() == 'tsv':
  29. print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"%(name,'',url,desc,synopsis,'','apache2directive','en')
  30. if sys.argv[1].lower() == 'sql':
  31. print '''INSERT INTO functions (`id`, `name`, `namespace`, `url`, `description`, `synopsis`, `detail`, `type`, `lang`) VALUES (NULL, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');'''%(name,'',url,desc.replace("'","\\'"),synopsis.replace("'","\\'"),'apache apache2 directive apache2.2','apache2directive','en')