PageRenderTime 43ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/src/pentest/metagoofil/parser.py

https://github.com/sullivanmatt/Raspberry-Pwn
Python | 104 lines | 101 code | 3 blank | 0 comment | 0 complexity | 72b8e52886b6a187480076ace12fd1bb MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, MPL-2.0-no-copyleft-exception, GPL-2.0, GPL-3.0
  1. import string
  2. import re
  3. class parser:
  4. def __init__(self,results,word,file):
  5. self.results=results
  6. self.word=word
  7. self.temp=[]
  8. self.file=file
  9. def genericClean(self):
  10. self.results = re.sub('<em>', '', self.results)
  11. self.results = re.sub('<b>', '', self.results)
  12. self.results = re.sub('</b>', '', self.results)
  13. self.results = re.sub('</em>', '', self.results)
  14. self.results = re.sub('%2f', ' ', self.results)
  15. self.results = re.sub('%3a', ' ', self.results)
  16. self.results = re.sub('<strong>', '', self.results)
  17. self.results = re.sub('</strong>', '', self.results)
  18. for e in ('>',':','=', '<', '/', '\\',';','&','%3A','%3D','%3C'):
  19. self.results = string.replace(self.results, e, ' ')
  20. def urlClean(self):
  21. self.results = re.sub('<em>', '', self.results)
  22. self.results = re.sub('</em>', '', self.results)
  23. self.results = re.sub('%2f', ' ', self.results)
  24. self.results = re.sub('%3a', ' ', self.results)
  25. for e in ('<','>',':','=',';','&','%3A','%3D','%3C'):
  26. self.results = string.replace(self.results, e, ' ')
  27. def emails(self):
  28. self.genericClean()
  29. reg_emails = re.compile('[a-zA-Z0-9.-_]*' + '@' + '[a-zA-Z0-9.-]*' + self.word)
  30. self.temp = reg_emails.findall(self.results)
  31. emails=self.unique()
  32. return emails
  33. def fileurls(self):
  34. urls=[]
  35. reg_urls = re.compile('<a href="(.*?)"')
  36. self.temp = reg_urls.findall(self.results)
  37. allurls=self.unique()
  38. for x in allurls:
  39. if x.count('webcache') or x.count('google.com') or x.count('search?'):
  40. pass
  41. else:
  42. urls.append(x)
  43. return urls
  44. def people_linkedin(self):
  45. reg_people = re.compile('">[a-zA-Z0-9._ -]* profiles | LinkedIn')
  46. self.temp = reg_people.findall(self.results)
  47. resul = []
  48. for x in self.temp:
  49. y = string.replace(x, ' LinkedIn', '')
  50. y = string.replace(y, ' profiles ', '')
  51. y = string.replace(y, 'LinkedIn', '')
  52. y = string.replace(y, '"', '')
  53. y = string.replace(y, '>', '')
  54. if y !=" ":
  55. resul.append(y)
  56. return resul
  57. def profiles(self):
  58. reg_people = re.compile('">[a-zA-Z0-9._ -]* - <em>Google Profile</em>')
  59. self.temp = reg_people.findall(self.results)
  60. resul = []
  61. for x in self.temp:
  62. y = string.replace(x, ' <em>Google Profile</em>', '')
  63. y = string.replace(y, '-', '')
  64. y = string.replace(y, '">', '')
  65. if y !=" ":
  66. resul.append(y)
  67. return resul
  68. def hostnames(self):
  69. self.genericClean()
  70. reg_hosts = re.compile('[a-zA-Z0-9.-]*\.'+ self.word)
  71. self.temp = reg_hosts.findall(self.results)
  72. hostnames=self.unique()
  73. return hostnames
  74. def hostnames_all(self):
  75. reg_hosts = re.compile('<cite>(.*?)</cite>')
  76. temp = reg_hosts.findall(self.results)
  77. for x in temp:
  78. if x.count(':'):
  79. res=x.split(':')[1].split('/')[2]
  80. else:
  81. res=x.split("/")[0]
  82. self.temp.append(res)
  83. hostnames=self.unique()
  84. return hostnames
  85. def unique(self):
  86. self.new=[]
  87. for x in self.temp:
  88. if x not in self.new:
  89. self.new.append(x)
  90. return self.new