/Chapter09/search_opportunities.py

https://github.com/PacktPublishing/Python-Automation-Cookbook · Python · 150 lines · 89 code · 29 blank · 32 comment · 11 complexity · 0c6fee282ee6c2898ec9385d50e89ae9 MD5 · raw file

  1. ############
  2. # IMPORTS
  3. ############
  4. import argparse
  5. import configparser
  6. import feedparser
  7. import datetime
  8. import delorean
  9. import requests
  10. from bs4 import BeautifulSoup
  11. import mistune
  12. import jinja2
  13. from collections import namedtuple
  14. import smtplib
  15. from email.mime.multipart import MIMEMultipart
  16. from email.mime.text import MIMEText
  17. # Group the email configuration parameters
  18. # Note the 'from_' to avoid using a reserved Python keyword (from)
  19. EmailConfig = namedtuple('EmailConfig', ['user', 'password', 'from_', 'to'])
  20. ############
  21. # READ TEMPLATES INTO MEMORY
  22. ############
  23. # Get the email templates from hard disk
  24. EMAIL_TEMPLATE_FILE = 'email_template.md'
  25. EMAIL_STYLING_FILE = 'email_styling.html'
  26. with open(EMAIL_TEMPLATE_FILE) as md_file:
  27. EMAIL_TEMPLATE = md_file.read()
  28. with open(EMAIL_STYLING_FILE) as html_file:
  29. EMAIL_STYLING = html_file.read()
  30. def get_articles(keywords, feeds):
  31. '''
  32. Retrieve a list of articles from the feeds that contain the keywords
  33. Each article is returned in the format:
  34. (title, summary, link)
  35. '''
  36. articles = []
  37. for feed in feeds:
  38. rss = feedparser.parse(feed)
  39. updated_time = rss.get('updated', str(datetime.datetime.utcnow()))
  40. # Only get the articles published in the last 7 days
  41. time_limit = delorean.parse(updated_time) - datetime.timedelta(days=7)
  42. for entry in rss.entries:
  43. # Normalise the time
  44. entry_time = delorean.parse(entry.published)
  45. entry_time.shift('UTC')
  46. if entry_time < time_limit:
  47. # Skip this entry
  48. continue
  49. # Get the article
  50. response = requests.get(entry.link)
  51. article = BeautifulSoup(response.text, 'html.parser')
  52. article_reference = (article.title.string.strip(),
  53. entry.summary.strip(),
  54. entry.link)
  55. article_text = article.get_text()
  56. for keyword in keywords:
  57. # match with the keyword. Notice the lower on both to
  58. # make it case-insensitive
  59. if keyword.lower() in article_text.lower():
  60. articles.append(article_reference)
  61. break
  62. return articles
  63. def compose_email_body(articles, keywords, feed_list):
  64. '''
  65. From the list of articles, keywords and feeds, fill the email template
  66. Set the list in the adequate format for the template
  67. '''
  68. # Compose the list of articles
  69. ARTICLE_TEMPLATE = '* **{title}** {summary}: {link}'
  70. article_list = [ARTICLE_TEMPLATE.format(title=title, summary=summary,
  71. link=link)
  72. for title, summary, link in articles]
  73. data = {
  74. 'article_list': '\n'.join(article_list),
  75. 'keywords': ', '.join(keywords),
  76. 'feed_list': ', '.join(feed_list),
  77. }
  78. text = EMAIL_TEMPLATE.format(**data)
  79. html_content = mistune.markdown(text)
  80. html = jinja2.Template(EMAIL_STYLING).render(content=html_content)
  81. return text, html
  82. def send_email(email_config, text_body, html_body):
  83. '''
  84. Send an email with the text and html body, using the parameters
  85. configured in email_config
  86. '''
  87. msg = MIMEMultipart('alternative')
  88. msg['Subject'] = 'Weekly report'
  89. msg['From'] = email_config.from_
  90. msg['To'] = email_config.to
  91. part_plain = MIMEText(text_body, 'plain')
  92. part_html = MIMEText(html_body, 'html')
  93. msg.attach(part_plain)
  94. msg.attach(part_html)
  95. with smtplib.SMTP('smtp.gmail.com', 587) as server:
  96. server.starttls()
  97. server.login(email_config.user, email_config.password)
  98. server.sendmail(email_config.from_, [email_config.to], msg.as_string())
  99. def main(keywords, feeds, email_config):
  100. articles = get_articles(keywords, feeds)
  101. text_body, html_body = compose_email_body(articles, keywords, feeds)
  102. send_email(email_config, text_body, html_body)
  103. if __name__ == '__main__':
  104. parser = argparse.ArgumentParser()
  105. parser.add_argument(type=argparse.FileType('r'), dest='config',
  106. help='config file')
  107. args = parser.parse_args()
  108. config = configparser.ConfigParser()
  109. config.read_file(args.config)
  110. keywords = config['SEARCH']['keywords'].split(',')
  111. feeds = [feed.strip() for feed in config['SEARCH']['feeds'].split(',')]
  112. email_user = config['EMAIL']['user']
  113. email_password = config['EMAIL']['password']
  114. email_from = config['EMAIL']['from']
  115. email_to = config['EMAIL']['to']
  116. email_config = EmailConfig(email_user, email_password, email_from,
  117. email_to)
  118. main(keywords, feeds, email_config)