blob: 85f90aa611f1d6495e955917658408c1abf50bab [file] [log] [blame]
Sjoerd Mullender89686241995-10-19 09:35:31 +00001"""mailerdaemon - classes to parse mailer-daemon messages"""
2
Sjoerd Mullender89686241995-10-19 09:35:31 +00003import rfc822
Guido van Rossum8e7a54f1996-07-21 02:50:30 +00004import calendar
Guido van Rossume33bba81998-10-09 13:27:49 +00005import re
Sjoerd Mullender89686241995-10-19 09:35:31 +00006import os
7import sys
8
9Unparseable = 'mailerdaemon.Unparseable'
10
11class ErrorMessage(rfc822.Message):
12 def __init__(self, fp):
Guido van Rossumed5b3d81998-03-24 05:30:29 +000013 rfc822.Message.__init__(self, fp)
Guido van Rossume33bba81998-10-09 13:27:49 +000014 self.sub = ''
Sjoerd Mullender89686241995-10-19 09:35:31 +000015
16 def is_warning(self):
Guido van Rossumed5b3d81998-03-24 05:30:29 +000017 sub = self.getheader('Subject')
18 if not sub:
19 return 0
Walter Dörwaldaaab30e2002-09-11 20:36:02 +000020 sub = sub.lower()
21 if sub.startswith('waiting mail'): return 1
22 if 'warning' in sub: return 1
Guido van Rossumed5b3d81998-03-24 05:30:29 +000023 self.sub = sub
24 return 0
Sjoerd Mullender89686241995-10-19 09:35:31 +000025
26 def get_errors(self):
Guido van Rossumed5b3d81998-03-24 05:30:29 +000027 for p in EMPARSERS:
28 self.rewindbody()
29 try:
30 return p(self.fp, self.sub)
31 except Unparseable:
32 pass
33 raise Unparseable
Sjoerd Mullender89686241995-10-19 09:35:31 +000034
Guido van Rossume33bba81998-10-09 13:27:49 +000035# List of re's or tuples of re's.
36# If a re, it should contain at least a group (?P<email>...) which
37# should refer to the email address. The re can also contain a group
38# (?P<reason>...) which should refer to the reason (error message).
39# If no reason is present, the emparse_list_reason list is used to
40# find a reason.
41# If a tuple, the tuple should contain 2 re's. The first re finds a
42# location, the second re is repeated one or more times to find
43# multiple email addresses. The second re is matched (not searched)
44# where the previous match ended.
45# The re's are compiled using the re module.
46emparse_list_list = [
47 'error: (?P<reason>unresolvable): (?P<email>.+)',
48 ('----- The following addresses had permanent fatal errors -----\n',
49 '(?P<email>[^ \n].*)\n( .*\n)?'),
50 'remote execution.*\n.*rmail (?P<email>.+)',
51 ('The following recipients did not receive your message:\n\n',
52 ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
53 '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)',
54 '^<(?P<email>.*)>:\n(?P<reason>.*)',
55 '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
56 '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
57 '^Original-Recipient: rfc822;(?P<email>.*)',
58 '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
59 '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
60 '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
61 '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
62 '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
63 ]
64# compile the re's in the list and store them in-place.
65for i in range(len(emparse_list_list)):
66 x = emparse_list_list[i]
67 if type(x) is type(''):
68 x = re.compile(x, re.MULTILINE)
69 else:
70 xl = []
71 for x in x:
72 xl.append(re.compile(x, re.MULTILINE))
73 x = tuple(xl)
74 del xl
75 emparse_list_list[i] = x
76 del x
77del i
Sjoerd Mullender89686241995-10-19 09:35:31 +000078
Guido van Rossume33bba81998-10-09 13:27:49 +000079# list of re's used to find reasons (error messages).
80# if a string, "<>" is replaced by a copy of the email address.
81# The expressions are searched for in order. After the first match,
82# no more expressions are searched for. So, order is important.
83emparse_list_reason = [
84 r'^5\d{2} <>\.\.\. (?P<reason>.*)',
85 '<>\.\.\. (?P<reason>.*)',
86 re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
87 re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
88 re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
89 ]
90emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
91def emparse_list(fp, sub):
92 data = fp.read()
93 res = emparse_list_from.search(data)
94 if res is None:
95 from_index = len(data)
96 else:
97 from_index = res.start(0)
Sjoerd Mullender89686241995-10-19 09:35:31 +000098 errors = []
Guido van Rossume33bba81998-10-09 13:27:49 +000099 emails = []
100 reason = None
101 for regexp in emparse_list_list:
102 if type(regexp) is type(()):
103 res = regexp[0].search(data, 0, from_index)
104 if res is not None:
105 try:
106 reason = res.group('reason')
107 except IndexError:
108 pass
109 while 1:
110 res = regexp[1].match(data, res.end(0), from_index)
111 if res is None:
112 break
113 emails.append(res.group('email'))
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000114 break
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000115 else:
Guido van Rossume33bba81998-10-09 13:27:49 +0000116 res = regexp.search(data, 0, from_index)
117 if res is not None:
118 emails.append(res.group('email'))
119 try:
120 reason = res.group('reason')
121 except IndexError:
122 pass
123 break
124 if not emails:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000125 raise Unparseable
Guido van Rossume33bba81998-10-09 13:27:49 +0000126 if not reason:
127 reason = sub
128 if reason[:15] == 'returned mail: ':
129 reason = reason[15:]
130 for regexp in emparse_list_reason:
131 if type(regexp) is type(''):
132 for i in range(len(emails)-1,-1,-1):
133 email = emails[i]
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000134 exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
Guido van Rossume33bba81998-10-09 13:27:49 +0000135 res = exp.search(data)
136 if res is not None:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000137 errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
Guido van Rossume33bba81998-10-09 13:27:49 +0000138 del emails[i]
139 continue
140 res = regexp.search(data)
141 if res is not None:
142 reason = res.group('reason')
143 break
144 for email in emails:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000145 errors.append(' '.join((email.strip()+': '+reason).split()))
Jack Jansen4ec940a1995-10-30 10:10:19 +0000146 return errors
Sjoerd Mullender89686241995-10-19 09:35:31 +0000147
Guido van Rossume33bba81998-10-09 13:27:49 +0000148EMPARSERS = [emparse_list, ]
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000149
150def sort_numeric(a, b):
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000151 a = int(a)
152 b = int(b)
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000153 if a < b: return -1
154 elif a > b: return 1
155 else: return 0
Sjoerd Mullender89686241995-10-19 09:35:31 +0000156
157def parsedir(dir, modify):
158 os.chdir(dir)
Guido van Rossume33bba81998-10-09 13:27:49 +0000159 pat = re.compile('^[0-9]*$')
Sjoerd Mullender89686241995-10-19 09:35:31 +0000160 errordict = {}
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000161 errorfirst = {}
Sjoerd Mullender89686241995-10-19 09:35:31 +0000162 errorlast = {}
163 nok = nwarn = nbad = 0
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000164
165 # find all numeric file names and sort them
Guido van Rossume33bba81998-10-09 13:27:49 +0000166 files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000167 files.sort(sort_numeric)
Tim Peters70c43782001-01-17 08:48:39 +0000168
Sjoerd Mullender89686241995-10-19 09:35:31 +0000169 for fn in files:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000170 # Lets try to parse the file.
171 fp = open(fn)
172 m = ErrorMessage(fp)
173 sender = m.getaddr('From')
174 print '%s\t%-40s\t'%(fn, sender[1]),
Sjoerd Mullender89686241995-10-19 09:35:31 +0000175
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000176 if m.is_warning():
Guido van Rossume33bba81998-10-09 13:27:49 +0000177 fp.close()
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000178 print 'warning only'
179 nwarn = nwarn + 1
180 if modify:
Guido van Rossume33bba81998-10-09 13:27:49 +0000181 os.rename(fn, ','+fn)
182## os.unlink(fn)
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000183 continue
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000184
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000185 try:
186 errors = m.get_errors()
187 except Unparseable:
188 print '** Not parseable'
189 nbad = nbad + 1
Guido van Rossume33bba81998-10-09 13:27:49 +0000190 fp.close()
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000191 continue
192 print len(errors), 'errors'
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000193
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000194 # Remember them
195 for e in errors:
196 try:
197 mm, dd = m.getdate('date')[1:1+2]
198 date = '%s %02d' % (calendar.month_abbr[mm], dd)
199 except:
200 date = '??????'
201 if not errordict.has_key(e):
202 errordict[e] = 1
203 errorfirst[e] = '%s (%s)' % (fn, date)
204 else:
205 errordict[e] = errordict[e] + 1
206 errorlast[e] = '%s (%s)' % (fn, date)
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000207
Guido van Rossume33bba81998-10-09 13:27:49 +0000208 fp.close()
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000209 nok = nok + 1
210 if modify:
Guido van Rossume33bba81998-10-09 13:27:49 +0000211 os.rename(fn, ','+fn)
212## os.unlink(fn)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000213
214 print '--------------'
215 print nok, 'files parsed,',nwarn,'files warning-only,',
216 print nbad,'files unparseable'
217 print '--------------'
Guido van Rossume33bba81998-10-09 13:27:49 +0000218 list = []
Sjoerd Mullender89686241995-10-19 09:35:31 +0000219 for e in errordict.keys():
Guido van Rossum67dd17f2000-02-29 13:00:16 +0000220 list.append((errordict[e], errorfirst[e], errorlast[e], e))
Guido van Rossume33bba81998-10-09 13:27:49 +0000221 list.sort()
222 for num, first, last, e in list:
223 print '%d %s - %s\t%s' % (num, first, last, e)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000224
225def main():
226 modify = 0
227 if len(sys.argv) > 1 and sys.argv[1] == '-d':
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000228 modify = 1
229 del sys.argv[1]
Sjoerd Mullender89686241995-10-19 09:35:31 +0000230 if len(sys.argv) > 1:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000231 for folder in sys.argv[1:]:
232 parsedir(folder, modify)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000233 else:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000234 parsedir('/ufs/jack/Mail/errorsinbox', modify)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000235
236if __name__ == '__main__' or sys.argv[0] == __name__:
237 main()