Python | 246 lines | 202 code | 21 blank | 23 comment | 54 complexity | dbf4dbe23247e52b6e0484b7afda82cf MD5 | raw file
- #!/usr/bin/env python3
- """Classes to parse mailer-daemon messages."""
- import calendar
- import email.message
- import re
- import os
- import sys
- class Unparseable(Exception):
- pass
- class ErrorMessage(email.message.Message):
- def __init__(self):
- email.message.Message.__init__(self)
- self.sub = ''
- def is_warning(self):
- sub = self.get('Subject')
- if not sub:
- return 0
- sub = sub.lower()
- if sub.startswith('waiting mail'):
- return 1
- if 'warning' in sub:
- return 1
- self.sub = sub
- return 0
- def get_errors(self):
- for p in EMPARSERS:
- self.rewindbody()
- try:
- return p(self.fp, self.sub)
- except Unparseable:
- pass
- raise Unparseable
- # List of re's or tuples of re's.
- # If a re, it should contain at least a group (?P<email>...) which
- # should refer to the email address. The re can also contain a group
- # (?P<reason>...) which should refer to the reason (error message).
- # If no reason is present, the emparse_list_reason list is used to
- # find a reason.
- # If a tuple, the tuple should contain 2 re's. The first re finds a
- # location, the second re is repeated one or more times to find
- # multiple email addresses. The second re is matched (not searched)
- # where the previous match ended.
- # The re's are compiled using the re module.
- emparse_list_list = [
- 'error: (?P<reason>unresolvable): (?P<email>.+)',
- ('----- The following addresses had permanent fatal errors -----\n',
- '(?P<email>[^ \n].*)\n( .*\n)?'),
- 'remote execution.*\n.*rmail (?P<email>.+)',
- ('The following recipients did not receive your message:\n\n',
- ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
- '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)',
- '^<(?P<email>.*)>:\n(?P<reason>.*)',
- '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
- '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
- '^Original-Recipient: rfc822;(?P<email>.*)',
- '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
- '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
- '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
- '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
- '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
- ]
- # compile the re's in the list and store them in-place.
- for i in range(len(emparse_list_list)):
- x = emparse_list_list[i]
- if type(x) is type(''):
- x = re.compile(x, re.MULTILINE)
- else:
- xl = []
- for x in x:
- xl.append(re.compile(x, re.MULTILINE))
- x = tuple(xl)
- del xl
- emparse_list_list[i] = x
- del x
- del i
- # list of re's used to find reasons (error messages).
- # if a string, "<>" is replaced by a copy of the email address.
- # The expressions are searched for in order. After the first match,
- # no more expressions are searched for. So, order is important.
- emparse_list_reason = [
- r'^5\d{2} <>\.\.\. (?P<reason>.*)',
- r'<>\.\.\. (?P<reason>.*)',
- re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
- re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
- re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
- ]
- emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
- def emparse_list(fp, sub):
- data = fp.read()
- res = emparse_list_from.search(data)
- if res is None:
- from_index = len(data)
- else:
- from_index = res.start(0)
- errors = []
- emails = []
- reason = None
- for regexp in emparse_list_list:
- if type(regexp) is type(()):
- res = regexp[0].search(data, 0, from_index)
- if res is not None:
- try:
- reason = res.group('reason')
- except IndexError:
- pass
- while 1:
- res = regexp[1].match(data, res.end(0), from_index)
- if res is None:
- break
- emails.append(res.group('email'))
- break
- else:
- res = regexp.search(data, 0, from_index)
- if res is not None:
- emails.append(res.group('email'))
- try:
- reason = res.group('reason')
- except IndexError:
- pass
- break
- if not emails:
- raise Unparseable
- if not reason:
- reason = sub
- if reason[:15] == 'returned mail: ':
- reason = reason[15:]
- for regexp in emparse_list_reason:
- if type(regexp) is type(''):
- for i in range(len(emails)-1,-1,-1):
- email = emails[i]
- exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
- res = exp.search(data)
- if res is not None:
- errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
- del emails[i]
- continue
- res = regexp.search(data)
- if res is not None:
- reason = res.group('reason')
- break
- for email in emails:
- errors.append(' '.join((email.strip()+': '+reason).split()))
- return errors
- EMPARSERS = [emparse_list]
- def sort_numeric(a, b):
- a = int(a)
- b = int(b)
- if a < b:
- return -1
- elif a > b:
- return 1
- else:
- return 0
- def parsedir(dir, modify):
- os.chdir(dir)
- pat = re.compile('^[0-9]*$')
- errordict = {}
- errorfirst = {}
- errorlast = {}
- nok = nwarn = nbad = 0
- # find all numeric file names and sort them
- files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
- files.sort(sort_numeric)
- for fn in files:
- # Lets try to parse the file.
- fp = open(fn)
- m = email.message_from_file(fp, _class=ErrorMessage)
- sender = m.getaddr('From')
- print('%s\t%-40s\t'%(fn, sender[1]), end=' ')
- if m.is_warning():
- fp.close()
- print('warning only')
- nwarn = nwarn + 1
- if modify:
- os.rename(fn, ','+fn)
- ## os.unlink(fn)
- continue
- try:
- errors = m.get_errors()
- except Unparseable:
- print('** Not parseable')
- nbad = nbad + 1
- fp.close()
- continue
- print(len(errors), 'errors')
- # Remember them
- for e in errors:
- try:
- mm, dd = m.getdate('date')[1:1+2]
- date = '%s %02d' % (calendar.month_abbr[mm], dd)
- except:
- date = '??????'
- if e not in errordict:
- errordict[e] = 1
- errorfirst[e] = '%s (%s)' % (fn, date)
- else:
- errordict[e] = errordict[e] + 1
- errorlast[e] = '%s (%s)' % (fn, date)
- fp.close()
- nok = nok + 1
- if modify:
- os.rename(fn, ','+fn)
- ## os.unlink(fn)
- print('--------------')
- print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
- print(nbad,'files unparseable')
- print('--------------')
- list = []
- for e in errordict.keys():
- list.append((errordict[e], errorfirst[e], errorlast[e], e))
- list.sort()
- for num, first, last, e in list:
- print('%d %s - %s\t%s' % (num, first, last, e))
- def main():
- modify = 0
- if len(sys.argv) > 1 and sys.argv[1] == '-d':
- modify = 1
- del sys.argv[1]
- if len(sys.argv) > 1:
- for folder in sys.argv[1:]:
- parsedir(folder, modify)
- else:
- parsedir('/ufs/jack/Mail/errorsinbox', modify)
- if __name__ == '__main__' or sys.argv[0] == __name__:
- main()