PageRenderTime 84ms CodeModel.GetById 37ms RepoModel.GetById 0ms app.codeStats 0ms

/Tools/scripts/mailerdaemon.py

https://bitbucket.org/glix/python
Python | 237 lines | 225 code | 3 blank | 9 comment | 21 complexity | f80ed57c6a1c56ed5d35eefbd8009d49 MD5 | raw file
  1. """mailerdaemon - classes to parse mailer-daemon messages"""
  2. import rfc822
  3. import calendar
  4. import re
  5. import os
  6. import sys
  7. Unparseable = 'mailerdaemon.Unparseable'
  8. class ErrorMessage(rfc822.Message):
  9. def __init__(self, fp):
  10. rfc822.Message.__init__(self, fp)
  11. self.sub = ''
  12. def is_warning(self):
  13. sub = self.getheader('Subject')
  14. if not sub:
  15. return 0
  16. sub = sub.lower()
  17. if sub.startswith('waiting mail'): return 1
  18. if 'warning' in sub: return 1
  19. self.sub = sub
  20. return 0
  21. def get_errors(self):
  22. for p in EMPARSERS:
  23. self.rewindbody()
  24. try:
  25. return p(self.fp, self.sub)
  26. except Unparseable:
  27. pass
  28. raise Unparseable
  29. # List of re's or tuples of re's.
  30. # If a re, it should contain at least a group (?P<email>...) which
  31. # should refer to the email address. The re can also contain a group
  32. # (?P<reason>...) which should refer to the reason (error message).
  33. # If no reason is present, the emparse_list_reason list is used to
  34. # find a reason.
  35. # If a tuple, the tuple should contain 2 re's. The first re finds a
  36. # location, the second re is repeated one or more times to find
  37. # multiple email addresses. The second re is matched (not searched)
  38. # where the previous match ended.
  39. # The re's are compiled using the re module.
  40. emparse_list_list = [
  41. 'error: (?P<reason>unresolvable): (?P<email>.+)',
  42. ('----- The following addresses had permanent fatal errors -----\n',
  43. '(?P<email>[^ \n].*)\n( .*\n)?'),
  44. 'remote execution.*\n.*rmail (?P<email>.+)',
  45. ('The following recipients did not receive your message:\n\n',
  46. ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
  47. '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)',
  48. '^<(?P<email>.*)>:\n(?P<reason>.*)',
  49. '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
  50. '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
  51. '^Original-Recipient: rfc822;(?P<email>.*)',
  52. '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
  53. '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
  54. '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
  55. '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
  56. '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
  57. ]
  58. # compile the re's in the list and store them in-place.
  59. for i in range(len(emparse_list_list)):
  60. x = emparse_list_list[i]
  61. if type(x) is type(''):
  62. x = re.compile(x, re.MULTILINE)
  63. else:
  64. xl = []
  65. for x in x:
  66. xl.append(re.compile(x, re.MULTILINE))
  67. x = tuple(xl)
  68. del xl
  69. emparse_list_list[i] = x
  70. del x
  71. del i
  72. # list of re's used to find reasons (error messages).
  73. # if a string, "<>" is replaced by a copy of the email address.
  74. # The expressions are searched for in order. After the first match,
  75. # no more expressions are searched for. So, order is important.
  76. emparse_list_reason = [
  77. r'^5\d{2} <>\.\.\. (?P<reason>.*)',
  78. '<>\.\.\. (?P<reason>.*)',
  79. re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
  80. re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
  81. re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
  82. ]
  83. emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
  84. def emparse_list(fp, sub):
  85. data = fp.read()
  86. res = emparse_list_from.search(data)
  87. if res is None:
  88. from_index = len(data)
  89. else:
  90. from_index = res.start(0)
  91. errors = []
  92. emails = []
  93. reason = None
  94. for regexp in emparse_list_list:
  95. if type(regexp) is type(()):
  96. res = regexp[0].search(data, 0, from_index)
  97. if res is not None:
  98. try:
  99. reason = res.group('reason')
  100. except IndexError:
  101. pass
  102. while 1:
  103. res = regexp[1].match(data, res.end(0), from_index)
  104. if res is None:
  105. break
  106. emails.append(res.group('email'))
  107. break
  108. else:
  109. res = regexp.search(data, 0, from_index)
  110. if res is not None:
  111. emails.append(res.group('email'))
  112. try:
  113. reason = res.group('reason')
  114. except IndexError:
  115. pass
  116. break
  117. if not emails:
  118. raise Unparseable
  119. if not reason:
  120. reason = sub
  121. if reason[:15] == 'returned mail: ':
  122. reason = reason[15:]
  123. for regexp in emparse_list_reason:
  124. if type(regexp) is type(''):
  125. for i in range(len(emails)-1,-1,-1):
  126. email = emails[i]
  127. exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
  128. res = exp.search(data)
  129. if res is not None:
  130. errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
  131. del emails[i]
  132. continue
  133. res = regexp.search(data)
  134. if res is not None:
  135. reason = res.group('reason')
  136. break
  137. for email in emails:
  138. errors.append(' '.join((email.strip()+': '+reason).split()))
  139. return errors
  140. EMPARSERS = [emparse_list, ]
  141. def sort_numeric(a, b):
  142. a = int(a)
  143. b = int(b)
  144. if a < b: return -1
  145. elif a > b: return 1
  146. else: return 0
  147. def parsedir(dir, modify):
  148. os.chdir(dir)
  149. pat = re.compile('^[0-9]*$')
  150. errordict = {}
  151. errorfirst = {}
  152. errorlast = {}
  153. nok = nwarn = nbad = 0
  154. # find all numeric file names and sort them
  155. files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))
  156. files.sort(sort_numeric)
  157. for fn in files:
  158. # Lets try to parse the file.
  159. fp = open(fn)
  160. m = ErrorMessage(fp)
  161. sender = m.getaddr('From')
  162. print '%s\t%-40s\t'%(fn, sender[1]),
  163. if m.is_warning():
  164. fp.close()
  165. print 'warning only'
  166. nwarn = nwarn + 1
  167. if modify:
  168. os.rename(fn, ','+fn)
  169. ## os.unlink(fn)
  170. continue
  171. try:
  172. errors = m.get_errors()
  173. except Unparseable:
  174. print '** Not parseable'
  175. nbad = nbad + 1
  176. fp.close()
  177. continue
  178. print len(errors), 'errors'
  179. # Remember them
  180. for e in errors:
  181. try:
  182. mm, dd = m.getdate('date')[1:1+2]
  183. date = '%s %02d' % (calendar.month_abbr[mm], dd)
  184. except:
  185. date = '??????'
  186. if not errordict.has_key(e):
  187. errordict[e] = 1
  188. errorfirst[e] = '%s (%s)' % (fn, date)
  189. else:
  190. errordict[e] = errordict[e] + 1
  191. errorlast[e] = '%s (%s)' % (fn, date)
  192. fp.close()
  193. nok = nok + 1
  194. if modify:
  195. os.rename(fn, ','+fn)
  196. ## os.unlink(fn)
  197. print '--------------'
  198. print nok, 'files parsed,',nwarn,'files warning-only,',
  199. print nbad,'files unparseable'
  200. print '--------------'
  201. list = []
  202. for e in errordict.keys():
  203. list.append((errordict[e], errorfirst[e], errorlast[e], e))
  204. list.sort()
  205. for num, first, last, e in list:
  206. print '%d %s - %s\t%s' % (num, first, last, e)
  207. def main():
  208. modify = 0
  209. if len(sys.argv) > 1 and sys.argv[1] == '-d':
  210. modify = 1
  211. del sys.argv[1]
  212. if len(sys.argv) > 1:
  213. for folder in sys.argv[1:]:
  214. parsedir(folder, modify)
  215. else:
  216. parsedir('/ufs/jack/Mail/errorsinbox', modify)
  217. if __name__ == '__main__' or sys.argv[0] == __name__:
  218. main()