PageRenderTime 33ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/pre-i18n/mailman/Mailman/Bouncers/Catchall.py

#
Python | 194 lines | 138 code | 16 blank | 40 comment | 36 complexity | 30a254052f4f58330831df211f129311 MD5 | raw file
Possible License(s): GPL-2.0
  1. # Copyright (C) 1998 by the Free Software Foundation, Inc.
  2. #
  3. # This program is free software; you can redistribute it and/or
  4. # modify it under the terms of the GNU General Public License
  5. # as published by the Free Software Foundation; either version 2
  6. # of the License, or (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program; if not, write to the Free Software
  15. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  16. # TBD: this is cruft and should eventually just go away. It contains the old
  17. # implementation of Bouncer.ScanMessage(). We keep it because I don't feel
  18. # like splitting it up and porting it. It should at the very least be ported
  19. # to use mimetools and re. :(
  20. import re
  21. import string
  22. import regsub
  23. import regex
  24. from types import StringType
  25. # Return 0 if we couldn't make any sense of it, 1 if we handled it.
  26. def process(mlist, msg):
  27. candidates = []
  28. # See Mailman.Message.GetSender :(
  29. sender = msg.get('sender')
  30. if sender:
  31. name, addr = msg.getaddr('sender')
  32. else:
  33. name, addr = msg.getaddr('from')
  34. if addr and type(addr) == StringType:
  35. who_info = string.lower(addr)
  36. elif msg.unixfrom:
  37. who_info = string.lower(string.split(msg.unixfrom)[1])
  38. else:
  39. return None
  40. at_index = string.find(who_info, '@')
  41. if at_index != -1:
  42. who_from = who_info[:at_index]
  43. remote_host = who_info[at_index+1:]
  44. else:
  45. who_from = who_info
  46. remote_host = mlist.host_name
  47. if not who_from in ['mailer-daemon', 'postmaster', 'orphanage',
  48. 'postoffice', 'ucx_smtp', 'a2']:
  49. return 0
  50. mime_info = msg.getheader('content-type')
  51. boundry = None
  52. if mime_info:
  53. mime_info_parts = regsub.splitx(
  54. mime_info, '[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]="[^"]+"')
  55. if len(mime_info_parts) > 1:
  56. boundry = regsub.splitx(mime_info_parts[1],
  57. '"[^"]+"')[1][1:-1]
  58. # snag out the message body
  59. msg.rewindbody()
  60. msgbody = msg.fp.read()
  61. if boundry:
  62. relevant_text = string.split(msgbody, '--%s' % boundry)
  63. # Invalid MIME messages shouldn't cause exceptions
  64. if len(relevant_text) >= 2:
  65. relevant_text = relevant_text[1]
  66. else:
  67. relevant_text = relevant_text[0]
  68. else:
  69. # This looks strange, but at least 2 are going to be no-ops.
  70. relevant_text = regsub.split(msgbody,
  71. '^.*Message header follows.*$')[0]
  72. relevant_text = regsub.split(relevant_text,
  73. '^The text you sent follows:.*$')[0]
  74. relevant_text = regsub.split(
  75. relevant_text, '^Additional Message Information:.*$')[0]
  76. relevant_text = regsub.split(relevant_text,
  77. '^-+Your original message-+.*$')[0]
  78. BOUNCE = 1
  79. REMOVE = 2
  80. # Bounce patterns where it's simple to figure out the email addr.
  81. email_regexp = '<?\([^ \t@|<>]+@[^ \t@<>]+\.[^ \t<>.]+\)>?'
  82. simple_bounce_pats = (
  83. (regex.compile('.*451 %s.*' % email_regexp), BOUNCE),
  84. (regex.compile('.*554 %s.*' % email_regexp), BOUNCE),
  85. (regex.compile('.*552 %s.*' % email_regexp), BOUNCE),
  86. (regex.compile('.*501 %s.*' % email_regexp), BOUNCE),
  87. (regex.compile('.*553 %s.*' % email_regexp), BOUNCE),
  88. (regex.compile('.*550 %s.*' % email_regexp), BOUNCE),
  89. (regex.compile('%s .bounced.*' % email_regexp), BOUNCE),
  90. (regex.compile('.*%s\.\.\. Deferred.*' % email_regexp), BOUNCE),
  91. (regex.compile('.*User %s not known.*' % email_regexp), REMOVE),
  92. (regex.compile('.*%s: User unknown.*' % email_regexp), REMOVE),
  93. (regex.compile('.*%s\.\.\. User unknown' % email_regexp), REMOVE))
  94. # patterns we can't directly extract the email (special case these)
  95. messy_pattern_1 = regex.compile('^Recipient .*$')
  96. messy_pattern_2 = regex.compile('^Addressee: .*$')
  97. messy_pattern_3 = regex.compile('^User .* not listed.*$')
  98. messy_pattern_4 = regex.compile('^550 [^ ]+\.\.\. User unknown.*$')
  99. messy_pattern_5 = regex.compile('^User [^ ]+ is not defined.*$')
  100. messy_pattern_6 = regex.compile('^[ \t]*[^ ]+: User unknown.*$')
  101. messy_pattern_7 = regex.compile('^[^ ]+ - User currently disabled.*$')
  102. # Patterns for cases where email addr is separate from error cue.
  103. separate_cue_1 = re.compile(
  104. '^554 [^ ]+\.\.\. unknown mailer error.*$', re.I)
  105. separate_addr_1 = regex.compile('expanded from: %s' % email_regexp)
  106. message_grokked = 0
  107. use_prospects = 0
  108. prospects = [] # If bad but no candidates found.
  109. for line in string.split(relevant_text, '\n'):
  110. for pattern, action in simple_bounce_pats:
  111. if pattern.match(line) <> -1:
  112. email = extract(line)
  113. candidates.append((string.split(email,',')[0], action))
  114. message_grokked = 1
  115. # Now for the special case messages that are harder to parse...
  116. if (messy_pattern_1.match(line) <> -1
  117. or messy_pattern_2.match(line) <> -1):
  118. username = string.split(line)[1]
  119. candidates.append(('%s@%s' % (username, remote_host),
  120. BOUNCE))
  121. message_grokked = 1
  122. continue
  123. if (messy_pattern_3.match(line) <> -1
  124. or messy_pattern_4.match(line) <> -1
  125. or messy_pattern_5.match(line) <> -1):
  126. username = string.split(line)[1]
  127. candidates.append(('%s@%s' % (username, remote_host),
  128. REMOVE))
  129. message_grokked = 1
  130. continue
  131. if messy_pattern_6.match(line) <> -1:
  132. username = string.split(string.strip(line))[0][:-1]
  133. candidates.append(('%s@%s' % (username, remote_host),
  134. REMOVE))
  135. message_grokked = 1
  136. continue
  137. if messy_pattern_7.match(line) <> -1:
  138. username = string.split(string.strip(line))[0]
  139. candidates.append(('%s@%s' % (username, remote_host),
  140. REMOVE))
  141. message_grokked = 1
  142. continue
  143. if separate_cue_1.match(line):
  144. # Here's an error message that doesn't contain the addr.
  145. # Set a flag to use prospects found on separate lines.
  146. use_prospects = 1
  147. if separate_addr_1.search(line) != -1:
  148. # Found an addr that *might* be part of an error message.
  149. # Register it on prospects, where it will only be used if a
  150. # separate check identifies this message as an error message.
  151. prospects.append((separate_addr_1.group(1), BOUNCE))
  152. if use_prospects and prospects:
  153. candidates = candidates + prospects
  154. did = []
  155. for who, action in candidates:
  156. # First clean up some cruft around the addrs.
  157. el = string.find(who, "...")
  158. if el != -1:
  159. who = who[:el]
  160. if len(who) > 1 and who[0] == '<':
  161. # Use stuff after open angle and before (optional) close:
  162. who = regsub.splitx(who[1:], ">")[0]
  163. if who not in did:
  164. ## if action == REMOVE:
  165. ## mlist.HandleBouncingAddress(who, msg)
  166. ## else:
  167. ## mlist.RegisterBounce(who, msg)
  168. did.append(who)
  169. ## return message_grokked
  170. return did
  171. def extract(line):
  172. email = regsub.splitx(line, '[^ \t@<>]+@[^ \t@<>]+\.[^ \t<>.]+')[1]
  173. if email[0] == '<':
  174. return regsub.splitx(email[1:], ">")[0]
  175. else:
  176. return email