PageRenderTime 46ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/src/pentest/theharvester/discovery/exaleadsearch.py

https://github.com/sullivanmatt/Raspberry-Pwn
Python | 78 lines | 77 code | 1 blank | 0 comment | 0 complexity | 9ba72226f4d400e8eb9b2dd25583b1b5 MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, MPL-2.0-no-copyleft-exception, GPL-2.0, GPL-3.0
  1. import string
  2. import httplib, sys
  3. import parser
  4. import re
  5. import time
  6. class search_exalead:
  7. def __init__(self,word,limit,start):
  8. self.word=word
  9. self.files="pdf"
  10. self.results=""
  11. self.totalresults=""
  12. self.server="www.exalead.com"
  13. self.hostname="www.exalead.com"
  14. self.userAgent="(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
  15. self.limit=limit
  16. self.counter=start
  17. def do_search(self):
  18. h = httplib.HTTP(self.server)
  19. h.putrequest('GET', "/search/web/results/?q=%40"+ self.word + "&elements_per_page=100&start_index="+str(self.counter))
  20. h.putheader('Host', self.hostname)
  21. h.putheader('User-agent', self.userAgent)
  22. h.endheaders()
  23. returncode, returnmsg, headers = h.getreply()
  24. self.results = h.getfile().read()
  25. self.totalresults+= self.results
  26. def do_search_files(self,files):
  27. h = httplib.HTTP(self.server)
  28. h.putrequest('GET', "search/web/results/?q="+ self.word + "filetype:"+ self.files +"&elements_per_page=100&start_index="+self.counter)
  29. h.putheader('Host', self.hostname)
  30. h.putheader('User-agent', self.userAgent)
  31. h.endheaders()
  32. returncode, returnmsg, headers = h.getreply()
  33. self.results = h.getfile().read()
  34. self.totalresults+= self.results
  35. def check_next(self):
  36. renext = re.compile('topNextUrl')
  37. nextres=renext.findall(self.results)
  38. if nextres !=[]:
  39. nexty="1"
  40. print str(self.counter)
  41. else:
  42. nexty="0"
  43. return nexty
  44. def get_emails(self):
  45. rawres=parser.parser(self.totalresults,self.word)
  46. return rawres.emails()
  47. def get_hostnames(self):
  48. rawres=parser.parser(self.totalresults,self.word)
  49. return rawres.hostnames()
  50. def get_files(self):
  51. rawres=parser.parser(self.totalresults,self.word)
  52. return rawres.fileurls(self.files)
  53. def process(self):
  54. while self.counter <= self.limit:
  55. self.do_search()
  56. self.counter+=100
  57. print "\tSearching " + str(self.counter) + " results..."
  58. def process_files(self,files):
  59. while self.counter < self.limit:
  60. self.do_search_files(files)
  61. time.sleep(1)
  62. more = self.check_next()
  63. if more == "1":
  64. self.counter+=100
  65. else:
  66. break