PageRenderTime 49ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/branches/exp-kid-templates/Mailman/Bouncers/SimpleMatch.py

#
Python | 165 lines | 98 code | 7 blank | 60 comment | 10 complexity | f019a7080800b5881467996137588476 MD5 | raw file
Possible License(s): GPL-2.0
  1. # Copyright (C) 1998-2006 by the Free Software Foundation, Inc.
  2. #
  3. # This program is free software; you can redistribute it and/or
  4. # modify it under the terms of the GNU General Public License
  5. # as published by the Free Software Foundation; either version 2
  6. # of the License, or (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program; if not, write to the Free Software
  15. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
  16. # USA.
  17. """Recognizes simple heuristically delimited bounces."""
  18. import re
  19. import email.Iterators
  20. def _c(pattern):
  21. return re.compile(pattern, re.IGNORECASE)
  22. # This is a list of tuples of the form
  23. #
  24. # (start cre, end cre, address cre)
  25. #
  26. # where `cre' means compiled regular expression, start is the line just before
  27. # the bouncing address block, end is the line just after the bouncing address
  28. # block, and address cre is the regexp that will recognize the addresses. It
  29. # must have a group called `addr' which will contain exactly and only the
  30. # address that bounced.
  31. PATTERNS = [
  32. # sdm.de
  33. (_c('here is your list of failed recipients'),
  34. _c('here is your returned mail'),
  35. _c(r'<(?P<addr>[^>]*)>')),
  36. # sz-sb.de, corridor.com, nfg.nl
  37. (_c('the following addresses had'),
  38. _c('transcript of session follows'),
  39. _c(r'<(?P<fulladdr>[^>]*)>|\(expanded from: <?(?P<addr>[^>)]*)>?\)')),
  40. # robanal.demon.co.uk
  41. (_c('this message was created automatically by mail delivery software'),
  42. _c('original message follows'),
  43. _c('rcpt to:\s*<(?P<addr>[^>]*)>')),
  44. # s1.com (InterScan E-Mail VirusWall NT ???)
  45. (_c('message from interscan e-mail viruswall nt'),
  46. _c('end of message'),
  47. _c('rcpt to:\s*<(?P<addr>[^>]*)>')),
  48. # Smail
  49. (_c('failed addresses follow:'),
  50. _c('message text follows:'),
  51. _c(r'\s*(?P<addr>\S+@\S+)')),
  52. # newmail.ru
  53. (_c('This is the machine generated message from mail service.'),
  54. _c('--- Below the next line is a copy of the message.'),
  55. _c('<(?P<addr>[^>]*)>')),
  56. # turbosport.com runs something called `MDaemon 3.5.2' ???
  57. (_c('The following addresses did NOT receive a copy of your message:'),
  58. _c('--- Session Transcript ---'),
  59. _c('[>]\s*(?P<addr>.*)$')),
  60. # usa.net
  61. (_c('Intended recipient:\s*(?P<addr>.*)$'),
  62. _c('--------RETURNED MAIL FOLLOWS--------'),
  63. _c('Intended recipient:\s*(?P<addr>.*)$')),
  64. # hotpop.com
  65. (_c('Undeliverable Address:\s*(?P<addr>.*)$'),
  66. _c('Original message attached'),
  67. _c('Undeliverable Address:\s*(?P<addr>.*)$')),
  68. # Another demon.co.uk format
  69. (_c('This message was created automatically by mail delivery'),
  70. _c('^---- START OF RETURNED MESSAGE ----'),
  71. _c("addressed to '(?P<addr>[^']*)'")),
  72. # Prodigy.net full mailbox
  73. (_c("User's mailbox is full:"),
  74. _c('Unable to deliver mail.'),
  75. _c("User's mailbox is full:\s*<(?P<addr>[^>]*)>")),
  76. # Microsoft SMTPSVC
  77. (_c('The email below could not be delivered to the following user:'),
  78. _c('Old message:'),
  79. _c('<(?P<addr>[^>]*)>')),
  80. # Yahoo on behalf of other domains like sbcglobal.net
  81. (_c('Unable to deliver message to the following address\(es\)\.'),
  82. _c('--- Original message follows\.'),
  83. _c('<(?P<addr>[^>]*)>:')),
  84. # kundenserver.de
  85. (_c('A message that you sent could not be delivered'),
  86. _c('^---'),
  87. _c('<(?P<addr>[^>]*)>')),
  88. # another kundenserver.de
  89. (_c('A message that you sent could not be delivered'),
  90. _c('^---'),
  91. _c('^(?P<addr>[^\s@]+@[^\s@:]+):')),
  92. # thehartford.com
  93. (_c('Delivery to the following recipients failed'),
  94. _c("Bogus - there actually isn't anything"),
  95. _c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')),
  96. # and another thehartfod.com/hartfordlife.com
  97. (_c('^Your message\s*$'),
  98. _c('^because:'),
  99. _c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')),
  100. # kviv.be (NTMail)
  101. (_c('^Unable to deliver message to'),
  102. _c(r'\*+\s+End of message\s+\*+'),
  103. _c('<(?P<addr>[^>]*)>')),
  104. # earthlink.net supported domains
  105. (_c('^Sorry, unable to deliver your message to'),
  106. _c('^A copy of the original message'),
  107. _c('\s*(?P<addr>[^\s@]+@[^\s@]+)\s+')),
  108. # ademe.fr
  109. (_c('^A message could not be delivered to:'),
  110. _c('^Subject:'),
  111. _c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')),
  112. # andrew.ac.jp
  113. (_c('^Invalid final delivery userid:'),
  114. _c('^Original message follows.'),
  115. _c('\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')),
  116. # E500_SMTP_Mail_Service@lerctr.org
  117. (_c('------ Failed Recipients ------'),
  118. _c('-------- Returned Mail --------'),
  119. _c('<(?P<addr>[^>]*)>')),
  120. # cynergycom.net
  121. (_c('A message that you sent could not be delivered'),
  122. _c('^---'),
  123. _c('(?P<addr>[^\s@]+@[^\s@)]+)')),
  124. # Next one goes here...
  125. ]
  126. def process(msg, patterns=None):
  127. if patterns is None:
  128. patterns = PATTERNS
  129. # simple state machine
  130. # 0 = nothing seen yet
  131. # 1 = intro seen
  132. addrs = {}
  133. # MAS: This is a mess. The outer loop used to be over the message
  134. # so we only looped through the message once. Looping through the
  135. # message for each set of patterns is obviously way more work, but
  136. # if we don't do it, problems arise because scre from the wrong
  137. # pattern set matches first and then acre doesn't match. The
  138. # alternative is to split things into separate modules, but then
  139. # we process the message multiple times anyway.
  140. for scre, ecre, acre in patterns:
  141. state = 0
  142. for line in email.Iterators.body_line_iterator(msg):
  143. if state == 0:
  144. if scre.search(line):
  145. state = 1
  146. if state == 1:
  147. mo = acre.search(line)
  148. if mo:
  149. addr = mo.group('addr')
  150. if addr:
  151. addrs[mo.group('addr')] = 1
  152. elif ecre.search(line):
  153. break
  154. if addrs:
  155. break
  156. return addrs.keys()