Éric Araujo | a0e92a8 | 2011-07-26 18:01:08 +0200 | [diff] [blame^] | 1 | #!/usr/bin/env python3 |
Éric Araujo | 1ce7b17 | 2011-07-26 17:36:19 +0200 | [diff] [blame] | 2 | """Classes to parse mailer-daemon messages.""" |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 3 | |
Guido van Rossum | 8e7a54f | 1996-07-21 02:50:30 +0000 | [diff] [blame] | 4 | import calendar |
Georg Brandl | 9f0f960 | 2008-06-12 22:23:59 +0000 | [diff] [blame] | 5 | import email.message |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 6 | import re |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 7 | import os |
| 8 | import sys |
| 9 | |
Éric Araujo | 1ce7b17 | 2011-07-26 17:36:19 +0200 | [diff] [blame] | 10 | |
| 11 | class Unparseable(Exception): |
| 12 | pass |
| 13 | |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 14 | |
Georg Brandl | 9f0f960 | 2008-06-12 22:23:59 +0000 | [diff] [blame] | 15 | class ErrorMessage(email.message.Message): |
| 16 | def __init__(self): |
| 17 | email.message.Message.__init__(self) |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 18 | self.sub = '' |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 19 | |
| 20 | def is_warning(self): |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 21 | sub = self.get('Subject') |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 22 | if not sub: |
| 23 | return 0 |
Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 24 | sub = sub.lower() |
Éric Araujo | 1ce7b17 | 2011-07-26 17:36:19 +0200 | [diff] [blame] | 25 | if sub.startswith('waiting mail'): |
| 26 | return 1 |
| 27 | if 'warning' in sub: |
| 28 | return 1 |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 29 | self.sub = sub |
| 30 | return 0 |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 31 | |
| 32 | def get_errors(self): |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 33 | for p in EMPARSERS: |
| 34 | self.rewindbody() |
| 35 | try: |
| 36 | return p(self.fp, self.sub) |
| 37 | except Unparseable: |
| 38 | pass |
| 39 | raise Unparseable |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 40 | |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 41 | # List of re's or tuples of re's. |
| 42 | # If a re, it should contain at least a group (?P<email>...) which |
| 43 | # should refer to the email address. The re can also contain a group |
| 44 | # (?P<reason>...) which should refer to the reason (error message). |
| 45 | # If no reason is present, the emparse_list_reason list is used to |
| 46 | # find a reason. |
| 47 | # If a tuple, the tuple should contain 2 re's. The first re finds a |
| 48 | # location, the second re is repeated one or more times to find |
| 49 | # multiple email addresses. The second re is matched (not searched) |
| 50 | # where the previous match ended. |
| 51 | # The re's are compiled using the re module. |
| 52 | emparse_list_list = [ |
| 53 | 'error: (?P<reason>unresolvable): (?P<email>.+)', |
| 54 | ('----- The following addresses had permanent fatal errors -----\n', |
| 55 | '(?P<email>[^ \n].*)\n( .*\n)?'), |
| 56 | 'remote execution.*\n.*rmail (?P<email>.+)', |
| 57 | ('The following recipients did not receive your message:\n\n', |
| 58 | ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'), |
| 59 | '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)', |
| 60 | '^<(?P<email>.*)>:\n(?P<reason>.*)', |
| 61 | '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)', |
| 62 | '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)', |
| 63 | '^Original-Recipient: rfc822;(?P<email>.*)', |
| 64 | '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)', |
| 65 | '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)', |
| 66 | '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)', |
| 67 | '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)', |
| 68 | '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n', |
| 69 | ] |
| 70 | # compile the re's in the list and store them in-place. |
| 71 | for i in range(len(emparse_list_list)): |
| 72 | x = emparse_list_list[i] |
| 73 | if type(x) is type(''): |
| 74 | x = re.compile(x, re.MULTILINE) |
| 75 | else: |
| 76 | xl = [] |
| 77 | for x in x: |
| 78 | xl.append(re.compile(x, re.MULTILINE)) |
| 79 | x = tuple(xl) |
| 80 | del xl |
| 81 | emparse_list_list[i] = x |
| 82 | del x |
| 83 | del i |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 84 | |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 85 | # list of re's used to find reasons (error messages). |
| 86 | # if a string, "<>" is replaced by a copy of the email address. |
| 87 | # The expressions are searched for in order. After the first match, |
| 88 | # no more expressions are searched for. So, order is important. |
| 89 | emparse_list_reason = [ |
| 90 | r'^5\d{2} <>\.\.\. (?P<reason>.*)', |
| 91 | '<>\.\.\. (?P<reason>.*)', |
| 92 | re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE), |
| 93 | re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'), |
| 94 | re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE), |
| 95 | ] |
| 96 | emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE) |
| 97 | def emparse_list(fp, sub): |
| 98 | data = fp.read() |
| 99 | res = emparse_list_from.search(data) |
| 100 | if res is None: |
| 101 | from_index = len(data) |
| 102 | else: |
| 103 | from_index = res.start(0) |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 104 | errors = [] |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 105 | emails = [] |
| 106 | reason = None |
| 107 | for regexp in emparse_list_list: |
| 108 | if type(regexp) is type(()): |
| 109 | res = regexp[0].search(data, 0, from_index) |
| 110 | if res is not None: |
| 111 | try: |
| 112 | reason = res.group('reason') |
| 113 | except IndexError: |
| 114 | pass |
| 115 | while 1: |
| 116 | res = regexp[1].match(data, res.end(0), from_index) |
| 117 | if res is None: |
| 118 | break |
| 119 | emails.append(res.group('email')) |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 120 | break |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 121 | else: |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 122 | res = regexp.search(data, 0, from_index) |
| 123 | if res is not None: |
| 124 | emails.append(res.group('email')) |
| 125 | try: |
| 126 | reason = res.group('reason') |
| 127 | except IndexError: |
| 128 | pass |
| 129 | break |
| 130 | if not emails: |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 131 | raise Unparseable |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 132 | if not reason: |
| 133 | reason = sub |
| 134 | if reason[:15] == 'returned mail: ': |
| 135 | reason = reason[15:] |
| 136 | for regexp in emparse_list_reason: |
| 137 | if type(regexp) is type(''): |
| 138 | for i in range(len(emails)-1,-1,-1): |
| 139 | email = emails[i] |
Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 140 | exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE) |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 141 | res = exp.search(data) |
| 142 | if res is not None: |
Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 143 | errors.append(' '.join((email.strip()+': '+res.group('reason')).split())) |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 144 | del emails[i] |
| 145 | continue |
| 146 | res = regexp.search(data) |
| 147 | if res is not None: |
| 148 | reason = res.group('reason') |
| 149 | break |
| 150 | for email in emails: |
Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 151 | errors.append(' '.join((email.strip()+': '+reason).split())) |
Jack Jansen | 4ec940a | 1995-10-30 10:10:19 +0000 | [diff] [blame] | 152 | return errors |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 153 | |
Éric Araujo | 1ce7b17 | 2011-07-26 17:36:19 +0200 | [diff] [blame] | 154 | EMPARSERS = [emparse_list] |
Guido van Rossum | 8e7a54f | 1996-07-21 02:50:30 +0000 | [diff] [blame] | 155 | |
| 156 | def sort_numeric(a, b): |
Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 157 | a = int(a) |
| 158 | b = int(b) |
Éric Araujo | 1ce7b17 | 2011-07-26 17:36:19 +0200 | [diff] [blame] | 159 | if a < b: |
| 160 | return -1 |
| 161 | elif a > b: |
| 162 | return 1 |
| 163 | else: |
| 164 | return 0 |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 165 | |
| 166 | def parsedir(dir, modify): |
| 167 | os.chdir(dir) |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 168 | pat = re.compile('^[0-9]*$') |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 169 | errordict = {} |
Guido van Rossum | 8e7a54f | 1996-07-21 02:50:30 +0000 | [diff] [blame] | 170 | errorfirst = {} |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 171 | errorlast = {} |
| 172 | nok = nwarn = nbad = 0 |
Guido van Rossum | 8e7a54f | 1996-07-21 02:50:30 +0000 | [diff] [blame] | 173 | |
| 174 | # find all numeric file names and sort them |
Georg Brandl | 8efadf5 | 2008-05-16 15:23:30 +0000 | [diff] [blame] | 175 | files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))) |
Guido van Rossum | 8e7a54f | 1996-07-21 02:50:30 +0000 | [diff] [blame] | 176 | files.sort(sort_numeric) |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 177 | |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 178 | for fn in files: |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 179 | # Lets try to parse the file. |
| 180 | fp = open(fn) |
Georg Brandl | 9f0f960 | 2008-06-12 22:23:59 +0000 | [diff] [blame] | 181 | m = email.message_from_file(fp, _class=ErrorMessage) |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 182 | sender = m.getaddr('From') |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 183 | print('%s\t%-40s\t'%(fn, sender[1]), end=' ') |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 184 | |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 185 | if m.is_warning(): |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 186 | fp.close() |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 187 | print('warning only') |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 188 | nwarn = nwarn + 1 |
| 189 | if modify: |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 190 | os.rename(fn, ','+fn) |
| 191 | ## os.unlink(fn) |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 192 | continue |
Guido van Rossum | 8e7a54f | 1996-07-21 02:50:30 +0000 | [diff] [blame] | 193 | |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 194 | try: |
| 195 | errors = m.get_errors() |
| 196 | except Unparseable: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 197 | print('** Not parseable') |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 198 | nbad = nbad + 1 |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 199 | fp.close() |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 200 | continue |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 201 | print(len(errors), 'errors') |
Guido van Rossum | 8e7a54f | 1996-07-21 02:50:30 +0000 | [diff] [blame] | 202 | |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 203 | # Remember them |
| 204 | for e in errors: |
| 205 | try: |
| 206 | mm, dd = m.getdate('date')[1:1+2] |
| 207 | date = '%s %02d' % (calendar.month_abbr[mm], dd) |
| 208 | except: |
| 209 | date = '??????' |
Georg Brandl | 8efadf5 | 2008-05-16 15:23:30 +0000 | [diff] [blame] | 210 | if e not in errordict: |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 211 | errordict[e] = 1 |
| 212 | errorfirst[e] = '%s (%s)' % (fn, date) |
| 213 | else: |
| 214 | errordict[e] = errordict[e] + 1 |
| 215 | errorlast[e] = '%s (%s)' % (fn, date) |
Guido van Rossum | 8e7a54f | 1996-07-21 02:50:30 +0000 | [diff] [blame] | 216 | |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 217 | fp.close() |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 218 | nok = nok + 1 |
| 219 | if modify: |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 220 | os.rename(fn, ','+fn) |
| 221 | ## os.unlink(fn) |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 222 | |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 223 | print('--------------') |
| 224 | print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ') |
| 225 | print(nbad,'files unparseable') |
| 226 | print('--------------') |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 227 | list = [] |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 228 | for e in errordict.keys(): |
Guido van Rossum | 67dd17f | 2000-02-29 13:00:16 +0000 | [diff] [blame] | 229 | list.append((errordict[e], errorfirst[e], errorlast[e], e)) |
Guido van Rossum | e33bba8 | 1998-10-09 13:27:49 +0000 | [diff] [blame] | 230 | list.sort() |
| 231 | for num, first, last, e in list: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 232 | print('%d %s - %s\t%s' % (num, first, last, e)) |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 233 | |
| 234 | def main(): |
| 235 | modify = 0 |
| 236 | if len(sys.argv) > 1 and sys.argv[1] == '-d': |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 237 | modify = 1 |
| 238 | del sys.argv[1] |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 239 | if len(sys.argv) > 1: |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 240 | for folder in sys.argv[1:]: |
| 241 | parsedir(folder, modify) |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 242 | else: |
Guido van Rossum | ed5b3d8 | 1998-03-24 05:30:29 +0000 | [diff] [blame] | 243 | parsedir('/ufs/jack/Mail/errorsinbox', modify) |
Sjoerd Mullender | 8968624 | 1995-10-19 09:35:31 +0000 | [diff] [blame] | 244 | |
| 245 | if __name__ == '__main__' or sys.argv[0] == __name__: |
| 246 | main() |