PageRenderTime 78ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/build/lib.linux-x86_64-2.7/openerp/tools/mail.py

https://gitlab.com/reymor/odoo-version7
Python | 359 lines | 307 code | 13 blank | 39 comment | 3 complexity | 51a9d9a8f5cd5f5732edaea65db22460 MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. ##############################################################################
  3. #
  4. # OpenERP, Open Source Business Applications
  5. # Copyright (C) 2012 OpenERP S.A. (<http://openerp.com>).
  6. #
  7. # This program is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU Affero General Public License as
  9. # published by the Free Software Foundation, either version 3 of the
  10. # License, or (at your option) any later version.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU Affero General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU Affero General Public License
  18. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. #
  20. ##############################################################################
  21. from lxml import etree
  22. import cgi
  23. import logging
  24. import lxml.html
  25. import lxml.html.clean as clean
  26. import openerp.pooler as pooler
  27. import random
  28. import re
  29. import socket
  30. import threading
  31. import time
  32. from openerp.loglevels import ustr
  33. _logger = logging.getLogger(__name__)
  34. #----------------------------------------------------------
  35. # HTML Sanitizer
  36. #----------------------------------------------------------
  37. tags_to_kill = ["script", "head", "meta", "title", "link", "style", "frame", "iframe", "base", "object", "embed"]
  38. tags_to_remove = ['html', 'body', 'font']
  39. def html_sanitize(src):
  40. if not src:
  41. return src
  42. src = ustr(src, errors='replace')
  43. # html encode email tags
  44. part = re.compile(r"(<(([^a<>]|a[^<>\s])[^<>]*)@[^<>]+>)", re.IGNORECASE | re.DOTALL)
  45. src = part.sub(lambda m: cgi.escape(m.group(1)), src)
  46. # some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
  47. try:
  48. cleaner = clean.Cleaner(page_structure=True, style=False, safe_attrs_only=False, forms=False, kill_tags=tags_to_kill, remove_tags=tags_to_remove)
  49. cleaned = cleaner.clean_html(src)
  50. except TypeError, e:
  51. # lxml.clean version < 2.3.1 does not have a kill_tags attribute
  52. # to remove in 2014
  53. cleaner = clean.Cleaner(page_structure=True, style=False, safe_attrs_only=False, forms=False, remove_tags=tags_to_kill+tags_to_remove)
  54. cleaned = cleaner.clean_html(src)
  55. except:
  56. _logger.warning('html_sanitize failed to parse %s' % (src))
  57. cleaned = '<p>Impossible to parse</p>'
  58. return cleaned
  59. #----------------------------------------------------------
  60. # HTML Cleaner
  61. #----------------------------------------------------------
  62. def html_email_clean(html):
  63. """ html_email_clean: clean the html to display in the web client.
  64. - strip email quotes (remove blockquote nodes)
  65. - strip signatures (remove --\n{\n)Blahblah), by replacing <br> by
  66. \n to avoid ignoring signatures converted into html
  67. :param string html: sanitized html; tags like html or head should not
  68. be present in the html string. This method therefore takes as input
  69. html code coming from a sanitized source, like fields.html.
  70. """
  71. def _replace_matching_regex(regex, source, replace=''):
  72. dest = ''
  73. idx = 0
  74. for item in re.finditer(regex, source):
  75. dest += source[idx:item.start()] + replace
  76. idx = item.end()
  77. dest += source[idx:]
  78. return dest
  79. if not html or not isinstance(html, basestring):
  80. return html
  81. html = ustr(html)
  82. # 0. remove encoding attribute inside tags
  83. doctype = re.compile(r'(<[^>]*\s)(encoding=(["\'][^"\']*?["\']|[^\s\n\r>]+)(\s[^>]*|/)?>)', re.IGNORECASE | re.DOTALL)
  84. html = doctype.sub(r"", html)
  85. # 1. <br[ /]> -> \n, because otherwise the tree is obfuscated
  86. br_tags = re.compile(r'([<]\s*[bB][rR]\s*\/?[>])')
  87. html = _replace_matching_regex(br_tags, html, '__BR_TAG__')
  88. # 2. form a tree, handle (currently ?) pure-text by enclosing them in a pre
  89. root = lxml.html.fromstring(html)
  90. if not len(root) and root.text is None and root.tail is None:
  91. html = '<div>%s</div>' % html
  92. root = lxml.html.fromstring(html)
  93. # 2.5 remove quoted text in nodes
  94. quote_tags = re.compile(r'(\n(>)+[^\n\r]*)')
  95. for node in root.getiterator():
  96. if not node.text:
  97. continue
  98. node.text = _replace_matching_regex(quote_tags, node.text)
  99. # 3. remove blockquotes
  100. quotes = [el for el in root.getiterator(tag='blockquote')]
  101. for node in quotes:
  102. # copy the node tail into parent text
  103. if node.tail:
  104. parent = node.getparent()
  105. parent.text = parent.text or '' + node.tail
  106. # remove the node
  107. node.getparent().remove(node)
  108. # 4. strip signatures
  109. signature = re.compile(r'([-]{2}[\s]?[\r\n]{1,2}[^\z]+)')
  110. for elem in root.getiterator():
  111. if elem.text:
  112. match = re.search(signature, elem.text)
  113. if match:
  114. elem.text = elem.text[:match.start()] + elem.text[match.end():]
  115. if elem.tail:
  116. match = re.search(signature, elem.tail)
  117. if match:
  118. elem.tail = elem.tail[:match.start()] + elem.tail[match.end():]
  119. # 5. \n back to <br/>
  120. html = etree.tostring(root, pretty_print=True)
  121. html = html.replace('__BR_TAG__', '<br />')
  122. # 6. Misc cleaning :
  123. # - ClEditor seems to love using <div><br /><div> -> replace with <br />
  124. br_div_tags = re.compile(r'(<div>\s*<br\s*\/>\s*<\/div>)')
  125. html = _replace_matching_regex(br_div_tags, html, '<br />')
  126. return html
  127. #----------------------------------------------------------
  128. # HTML/Text management
  129. #----------------------------------------------------------
  130. def html2plaintext(html, body_id=None, encoding='utf-8'):
  131. """ From an HTML text, convert the HTML to plain text.
  132. If @param body_id is provided then this is the tag where the
  133. body (not necessarily <body>) starts.
  134. """
  135. ## (c) Fry-IT, www.fry-it.com, 2007
  136. ## <peter@fry-it.com>
  137. ## download here: http://www.peterbe.com/plog/html2plaintext
  138. html = ustr(html)
  139. tree = etree.fromstring(html, parser=etree.HTMLParser())
  140. if body_id is not None:
  141. source = tree.xpath('//*[@id=%s]' % (body_id,))
  142. else:
  143. source = tree.xpath('//body')
  144. if len(source):
  145. tree = source[0]
  146. url_index = []
  147. i = 0
  148. for link in tree.findall('.//a'):
  149. url = link.get('href')
  150. if url:
  151. i += 1
  152. link.tag = 'span'
  153. link.text = '%s [%s]' % (link.text, i)
  154. url_index.append(url)
  155. html = ustr(etree.tostring(tree, encoding=encoding))
  156. # \r char is converted into &#13;, must remove it
  157. html = html.replace('&#13;', '')
  158. html = html.replace('<strong>', '*').replace('</strong>', '*')
  159. html = html.replace('<b>', '*').replace('</b>', '*')
  160. html = html.replace('<h3>', '*').replace('</h3>', '*')
  161. html = html.replace('<h2>', '**').replace('</h2>', '**')
  162. html = html.replace('<h1>', '**').replace('</h1>', '**')
  163. html = html.replace('<em>', '/').replace('</em>', '/')
  164. html = html.replace('<tr>', '\n')
  165. html = html.replace('</p>', '\n')
  166. html = re.sub('<br\s*/?>', '\n', html)
  167. html = re.sub('<.*?>', ' ', html)
  168. html = html.replace(' ' * 2, ' ')
  169. # strip all lines
  170. html = '\n'.join([x.strip() for x in html.splitlines()])
  171. html = html.replace('\n' * 2, '\n')
  172. for i, url in enumerate(url_index):
  173. if i == 0:
  174. html += '\n\n'
  175. html += ustr('[%s] %s\n') % (i + 1, url)
  176. return html
  177. def plaintext2html(text, container_tag=False):
  178. """ Convert plaintext into html. Content of the text is escaped to manage
  179. html entities, using cgi.escape().
  180. - all \n,\r are replaced by <br />
  181. - enclose content into <p>
  182. - 2 or more consecutive <br /> are considered as paragraph breaks
  183. :param string container_tag: container of the html; by default the
  184. content is embedded into a <div>
  185. """
  186. text = cgi.escape(ustr(text))
  187. # 1. replace \n and \r
  188. text = text.replace('\n', '<br/>')
  189. text = text.replace('\r', '<br/>')
  190. # 2-3: form paragraphs
  191. idx = 0
  192. final = '<p>'
  193. br_tags = re.compile(r'(([<]\s*[bB][rR]\s*\/?[>]\s*){2,})')
  194. for item in re.finditer(br_tags, text):
  195. final += text[idx:item.start()] + '</p><p>'
  196. idx = item.end()
  197. final += text[idx:] + '</p>'
  198. # 4. container
  199. if container_tag:
  200. final = '<%s>%s</%s>' % (container_tag, final, container_tag)
  201. return ustr(final)
  202. def append_content_to_html(html, content, plaintext=True, preserve=False, container_tag=False):
  203. """ Append extra content at the end of an HTML snippet, trying
  204. to locate the end of the HTML document (</body>, </html>, or
  205. EOF), and converting the provided content in html unless ``plaintext``
  206. is False.
  207. Content conversion can be done in two ways:
  208. - wrapping it into a pre (preserve=True)
  209. - use plaintext2html (preserve=False, using container_tag to wrap the
  210. whole content)
  211. A side-effect of this method is to coerce all HTML tags to
  212. lowercase in ``html``, and strip enclosing <html> or <body> tags in
  213. content if ``plaintext`` is False.
  214. :param str html: html tagsoup (doesn't have to be XHTML)
  215. :param str content: extra content to append
  216. :param bool plaintext: whether content is plaintext and should
  217. be wrapped in a <pre/> tag.
  218. :param bool preserve: if content is plaintext, wrap it into a <pre>
  219. instead of converting it into html
  220. """
  221. html = ustr(html)
  222. if plaintext and preserve:
  223. content = u'\n<pre>%s</pre>\n' % ustr(content)
  224. elif plaintext:
  225. content = '\n%s\n' % plaintext2html(content, container_tag)
  226. else:
  227. content = re.sub(r'(?i)(</?html.*>|</?body.*>|<!\W*DOCTYPE.*>)', '', content)
  228. content = u'\n%s\n' % ustr(content)
  229. # Force all tags to lowercase
  230. html = re.sub(r'(</?)\W*(\w+)([ >])',
  231. lambda m: '%s%s%s' % (m.group(1), m.group(2).lower(), m.group(3)), html)
  232. insert_location = html.find('</body>')
  233. if insert_location == -1:
  234. insert_location = html.find('</html>')
  235. if insert_location == -1:
  236. return '%s%s' % (html, content)
  237. return '%s%s%s' % (html[:insert_location], content, html[insert_location:])
  238. #----------------------------------------------------------
  239. # Emails
  240. #----------------------------------------------------------
  241. email_re = re.compile(r"""
  242. ([a-zA-Z][\w\.-]*[a-zA-Z0-9] # username part
  243. @ # mandatory @ sign
  244. [a-zA-Z0-9][\w\.-]* # domain must start with a letter ... Ged> why do we include a 0-9 then?
  245. \.
  246. [a-z]{2,3} # TLD
  247. )
  248. """, re.VERBOSE)
  249. res_re = re.compile(r"\[([0-9]+)\]", re.UNICODE)
  250. command_re = re.compile("^Set-([a-z]+) *: *(.+)$", re.I + re.UNICODE)
  251. # Updated in 7.0 to match the model name as well
  252. # Typical form of references is <timestamp-openerp-record_id-model_name@domain>
  253. # group(1) = the record ID ; group(2) = the model (if any) ; group(3) = the domain
  254. reference_re = re.compile("<.*-open(?:object|erp)-(\\d+)(?:-([\w.]+))?.*@(.*)>", re.UNICODE)
  255. def generate_tracking_message_id(res_id):
  256. """Returns a string that can be used in the Message-ID RFC822 header field
  257. Used to track the replies related to a given object thanks to the "In-Reply-To"
  258. or "References" fields that Mail User Agents will set.
  259. """
  260. try:
  261. rnd = random.SystemRandom().random()
  262. except NotImplementedError:
  263. rnd = random.random()
  264. rndstr = ("%.15f" % rnd)[2:]
  265. return "<%.15f.%s-openerp-%s@%s>" % (time.time(), rndstr, res_id, socket.gethostname())
  266. def email_send(email_from, email_to, subject, body, email_cc=None, email_bcc=None, reply_to=False,
  267. attachments=None, message_id=None, references=None, openobject_id=False, debug=False, subtype='plain', headers=None,
  268. smtp_server=None, smtp_port=None, ssl=False, smtp_user=None, smtp_password=None, cr=None, uid=None):
  269. """Low-level function for sending an email (deprecated).
  270. :deprecate: since OpenERP 6.1, please use ir.mail_server.send_email() instead.
  271. :param email_from: A string used to fill the `From` header, if falsy,
  272. config['email_from'] is used instead. Also used for
  273. the `Reply-To` header if `reply_to` is not provided
  274. :param email_to: a sequence of addresses to send the mail to.
  275. """
  276. # If not cr, get cr from current thread database
  277. local_cr = None
  278. if not cr:
  279. db_name = getattr(threading.currentThread(), 'dbname', None)
  280. if db_name:
  281. local_cr = cr = pooler.get_db(db_name).cursor()
  282. else:
  283. raise Exception("No database cursor found, please pass one explicitly")
  284. # Send Email
  285. try:
  286. mail_server_pool = pooler.get_pool(cr.dbname).get('ir.mail_server')
  287. res = False
  288. # Pack Message into MIME Object
  289. email_msg = mail_server_pool.build_email(email_from, email_to, subject, body, email_cc, email_bcc, reply_to,
  290. attachments, message_id, references, openobject_id, subtype, headers=headers)
  291. res = mail_server_pool.send_email(cr, uid or 1, email_msg, mail_server_id=None,
  292. smtp_server=smtp_server, smtp_port=smtp_port, smtp_user=smtp_user, smtp_password=smtp_password,
  293. smtp_encryption=('ssl' if ssl else None), smtp_debug=debug)
  294. except Exception:
  295. _logger.exception("tools.email_send failed to deliver email")
  296. return False
  297. finally:
  298. if local_cr:
  299. cr.close()
  300. return res
  301. def email_split(text):
  302. """ Return a list of the email addresses found in ``text`` """
  303. if not text:
  304. return []
  305. return re.findall(r'([^ ,<@]+@[^> ,]+)', text)