/intelmq/bots/collectors/mail/collector_mail_url.py

https://gitlab.com/mayakarya/intelmq · Python · 97 lines · 70 code · 22 blank · 5 comment · 17 complexity · f4088eff26dae622f21da0d00f7d99b3 MD5 · raw file

  1. # -*- coding: utf-8 -*-
  2. import re
  3. import sys
  4. try:
  5. import imbox
  6. except ImportError:
  7. imbox = None
  8. import requests
  9. from intelmq.lib.bot import Bot
  10. from intelmq.lib.message import Report
  11. class MailURLCollectorBot(Bot):
  12. def init(self):
  13. if imbox is None:
  14. self.logger.error('Could not import imbox. Please install it.')
  15. self.stop()
  16. def process(self):
  17. mailbox = imbox.Imbox(self.parameters.mail_host,
  18. self.parameters.mail_user,
  19. self.parameters.mail_password,
  20. self.parameters.mail_ssl)
  21. emails = mailbox.messages(folder=self.parameters.folder, unread=True)
  22. if emails:
  23. for uid, message in emails:
  24. if (self.parameters.subject_regex and
  25. not re.search(self.parameters.subject_regex,
  26. message.subject)):
  27. continue
  28. self.logger.info("Reading email report")
  29. for body in message.body['plain']:
  30. match = re.search(self.parameters.url_regex, str(body))
  31. if match:
  32. url = match.group()
  33. url = url.strip() # strip leading and trailing spaces, newlines and carriage returns
  34. # Build request
  35. self.http_header = getattr(self.parameters, 'http_header', {})
  36. self.http_verify_cert = getattr(self.parameters,
  37. 'http_verify_cert', True)
  38. if hasattr(self.parameters, 'http_user') and hasattr(
  39. self.parameters, 'http_password'):
  40. self.auth = (self.parameters.http_user,
  41. self.parameters.http_password)
  42. else:
  43. self.auth = None
  44. http_proxy = getattr(self.parameters, 'http_proxy', None)
  45. https_proxy = getattr(self.parameters,
  46. 'http_ssl_proxy', None)
  47. if http_proxy and https_proxy:
  48. self.proxy = {'http': http_proxy, 'https': https_proxy}
  49. else:
  50. self.proxy = None
  51. self.http_header['User-agent'] = self.parameters.http_user_agent
  52. self.logger.info("Downloading report from %s" % url)
  53. resp = requests.get(url=url,
  54. auth=self.auth, proxies=self.proxy,
  55. headers=self.http_header,
  56. verify=self.http_verify_cert)
  57. if resp.status_code // 100 != 2:
  58. raise ValueError('HTTP response status code was {}.'
  59. ''.format(resp.status_code))
  60. self.logger.info("Report downloaded.")
  61. report = Report()
  62. report.add("raw", resp.content)
  63. report.add("feed.name",
  64. self.parameters.feed)
  65. report.add("feed.accuracy", self.parameters.accuracy)
  66. self.send_message(report)
  67. # Only mark read if message relevant to this instance,
  68. # so other instances watching this mailbox will still
  69. # check it.
  70. mailbox.mark_seen(uid)
  71. self.logger.info("Email report read")
  72. mailbox.logout()
  73. if __name__ == "__main__":
  74. bot = MailURLCollectorBot(sys.argv[1])
  75. bot.start()