PageRenderTime 45ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/share/UNCLEAN/parse_nginxcore.py

https://github.com/elebow/zeroclickinfo-fathead
Python | 55 lines | 54 code | 1 blank | 0 comment | 1 complexity | 18eae1e7ef157b8b9052e6afb81fc4ae MD5 | raw file
Possible License(s): Apache-2.0
  1. from BeautifulSoup import BeautifulSoup
  2. import re
  3. import os
  4. import sys
  5. import string
  6. openclosetags = re.compile('''<.*?>|</.*?>''',re.DOTALL)
  7. spaces = re.compile('''\s+''',re.DOTALL)
  8. files = []
  9. files.append('./docs/NginxHttpCoreModule.htm')
  10. #files.append('./NginxHttpCoreModule.htm')
  11. def geth2locations(filecontents):
  12. ret = []
  13. count = 0
  14. for x in filecontents:
  15. count += 1
  16. if 'h2' in x:
  17. ret.append(count)
  18. ret.append(count)
  19. return ret
  20. for file in files:
  21. filecontents = []
  22. for line in open(file):
  23. line = ''.join(filter(lambda x:x in string.printable, line))
  24. filecontents.append(line.strip())
  25. contents = geth2locations(filecontents)
  26. for x in range(len(contents)-1):
  27. name = ''
  28. synopsis = ''
  29. description = ''
  30. soup = BeautifulSoup(''.join(filecontents[contents[x]-1:contents[x+1]-1]))
  31. h2 = soup.findAll('h2')
  32. if len(h2) != 0:
  33. name = openclosetags.sub('',str(h2[0])).strip()
  34. ps = soup.findAll('p')
  35. if '$' in name:
  36. description = openclosetags.sub('',str(ps[0]))
  37. else:
  38. for p in ps:
  39. if '<b>' in str(p):
  40. synopsis = '%s<br>%s'%(synopsis,openclosetags.sub('',str(p)))
  41. else:
  42. description = openclosetags.sub('',str(p))
  43. break
  44. url = 'http://wiki.nginx.org/NginxHttpCoreModule#%s'%(name.replace('$','.24'))
  45. if name != '':
  46. if len(sys.argv) == 1 or sys.argv[1].lower() == 'tsv':
  47. print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"%(name,'',url,description,synopsis[4:],'','nginxcoremodule','en')
  48. if sys.argv[1].lower() == 'sql':
  49. print '''INSERT INTO functions (`id`, `name`, `namespace`, `url`, `description`, `synopsis`, `detail`, `type`, `lang`) VALUES (NULL, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');'''%(name,'',url,description.replace("'","\\'"),synopsis.replace("'","\\'"),'nginx core module','nginxcoremodule','en')