blob: 5ebe18b9cbd93860b441f66eb551ff503d4c4f3a [file] [log] [blame]
Sjoerd Mullender89686241995-10-19 09:35:31 +00001"""mailerdaemon - classes to parse mailer-daemon messages"""
2
3import string
4import rfc822
Guido van Rossum8e7a54f1996-07-21 02:50:30 +00005import calendar
Guido van Rossume33bba81998-10-09 13:27:49 +00006import re
Sjoerd Mullender89686241995-10-19 09:35:31 +00007import os
8import sys
9
10Unparseable = 'mailerdaemon.Unparseable'
11
12class ErrorMessage(rfc822.Message):
13 def __init__(self, fp):
Guido van Rossumed5b3d81998-03-24 05:30:29 +000014 rfc822.Message.__init__(self, fp)
Guido van Rossume33bba81998-10-09 13:27:49 +000015 self.sub = ''
Sjoerd Mullender89686241995-10-19 09:35:31 +000016
17 def is_warning(self):
Guido van Rossumed5b3d81998-03-24 05:30:29 +000018 sub = self.getheader('Subject')
19 if not sub:
20 return 0
21 sub = string.lower(sub)
22 if sub[:12] == 'waiting mail': return 1
23 if string.find(sub, 'warning') >= 0: return 1
24 self.sub = sub
25 return 0
Sjoerd Mullender89686241995-10-19 09:35:31 +000026
27 def get_errors(self):
Guido van Rossumed5b3d81998-03-24 05:30:29 +000028 for p in EMPARSERS:
29 self.rewindbody()
30 try:
31 return p(self.fp, self.sub)
32 except Unparseable:
33 pass
34 raise Unparseable
Sjoerd Mullender89686241995-10-19 09:35:31 +000035
Guido van Rossume33bba81998-10-09 13:27:49 +000036# List of re's or tuples of re's.
37# If a re, it should contain at least a group (?P<email>...) which
38# should refer to the email address. The re can also contain a group
39# (?P<reason>...) which should refer to the reason (error message).
40# If no reason is present, the emparse_list_reason list is used to
41# find a reason.
42# If a tuple, the tuple should contain 2 re's. The first re finds a
43# location, the second re is repeated one or more times to find
44# multiple email addresses. The second re is matched (not searched)
45# where the previous match ended.
46# The re's are compiled using the re module.
47emparse_list_list = [
48 'error: (?P<reason>unresolvable): (?P<email>.+)',
49 ('----- The following addresses had permanent fatal errors -----\n',
50 '(?P<email>[^ \n].*)\n( .*\n)?'),
51 'remote execution.*\n.*rmail (?P<email>.+)',
52 ('The following recipients did not receive your message:\n\n',
53 ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
54 '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)',
55 '^<(?P<email>.*)>:\n(?P<reason>.*)',
56 '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
57 '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
58 '^Original-Recipient: rfc822;(?P<email>.*)',
59 '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
60 '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
61 '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
62 '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
63 '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
64 ]
65# compile the re's in the list and store them in-place.
66for i in range(len(emparse_list_list)):
67 x = emparse_list_list[i]
68 if type(x) is type(''):
69 x = re.compile(x, re.MULTILINE)
70 else:
71 xl = []
72 for x in x:
73 xl.append(re.compile(x, re.MULTILINE))
74 x = tuple(xl)
75 del xl
76 emparse_list_list[i] = x
77 del x
78del i
Sjoerd Mullender89686241995-10-19 09:35:31 +000079
Guido van Rossume33bba81998-10-09 13:27:49 +000080# list of re's used to find reasons (error messages).
81# if a string, "<>" is replaced by a copy of the email address.
82# The expressions are searched for in order. After the first match,
83# no more expressions are searched for. So, order is important.
84emparse_list_reason = [
85 r'^5\d{2} <>\.\.\. (?P<reason>.*)',
86 '<>\.\.\. (?P<reason>.*)',
87 re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
88 re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
89 re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
90 ]
91emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
92def emparse_list(fp, sub):
93 data = fp.read()
94 res = emparse_list_from.search(data)
95 if res is None:
96 from_index = len(data)
97 else:
98 from_index = res.start(0)
Sjoerd Mullender89686241995-10-19 09:35:31 +000099 errors = []
Guido van Rossume33bba81998-10-09 13:27:49 +0000100 emails = []
101 reason = None
102 for regexp in emparse_list_list:
103 if type(regexp) is type(()):
104 res = regexp[0].search(data, 0, from_index)
105 if res is not None:
106 try:
107 reason = res.group('reason')
108 except IndexError:
109 pass
110 while 1:
111 res = regexp[1].match(data, res.end(0), from_index)
112 if res is None:
113 break
114 emails.append(res.group('email'))
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000115 break
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000116 else:
Guido van Rossume33bba81998-10-09 13:27:49 +0000117 res = regexp.search(data, 0, from_index)
118 if res is not None:
119 emails.append(res.group('email'))
120 try:
121 reason = res.group('reason')
122 except IndexError:
123 pass
124 break
125 if not emails:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000126 raise Unparseable
Guido van Rossume33bba81998-10-09 13:27:49 +0000127 if not reason:
128 reason = sub
129 if reason[:15] == 'returned mail: ':
130 reason = reason[15:]
131 for regexp in emparse_list_reason:
132 if type(regexp) is type(''):
133 for i in range(len(emails)-1,-1,-1):
134 email = emails[i]
135 exp = re.compile(string.join(string.split(regexp, '<>'), re.escape(email)), re.MULTILINE)
136 res = exp.search(data)
137 if res is not None:
138 errors.append(string.join(string.split(string.strip(email)+': '+res.group('reason'))))
139 del emails[i]
140 continue
141 res = regexp.search(data)
142 if res is not None:
143 reason = res.group('reason')
144 break
145 for email in emails:
146 errors.append(string.join(string.split(string.strip(email)+': '+reason)))
Jack Jansen4ec940a1995-10-30 10:10:19 +0000147 return errors
Sjoerd Mullender89686241995-10-19 09:35:31 +0000148
Guido van Rossume33bba81998-10-09 13:27:49 +0000149EMPARSERS = [emparse_list, ]
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000150
151def sort_numeric(a, b):
152 a = string.atoi(a)
153 b = string.atoi(b)
154 if a < b: return -1
155 elif a > b: return 1
156 else: return 0
Sjoerd Mullender89686241995-10-19 09:35:31 +0000157
158def parsedir(dir, modify):
159 os.chdir(dir)
Guido van Rossume33bba81998-10-09 13:27:49 +0000160 pat = re.compile('^[0-9]*$')
Sjoerd Mullender89686241995-10-19 09:35:31 +0000161 errordict = {}
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000162 errorfirst = {}
Sjoerd Mullender89686241995-10-19 09:35:31 +0000163 errorlast = {}
164 nok = nwarn = nbad = 0
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000165
166 # find all numeric file names and sort them
Guido van Rossume33bba81998-10-09 13:27:49 +0000167 files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000168 files.sort(sort_numeric)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000169
170 for fn in files:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000171 # Lets try to parse the file.
172 fp = open(fn)
173 m = ErrorMessage(fp)
174 sender = m.getaddr('From')
175 print '%s\t%-40s\t'%(fn, sender[1]),
Sjoerd Mullender89686241995-10-19 09:35:31 +0000176
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000177 if m.is_warning():
Guido van Rossume33bba81998-10-09 13:27:49 +0000178 fp.close()
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000179 print 'warning only'
180 nwarn = nwarn + 1
181 if modify:
Guido van Rossume33bba81998-10-09 13:27:49 +0000182 os.rename(fn, ','+fn)
183## os.unlink(fn)
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000184 continue
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000185
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000186 try:
187 errors = m.get_errors()
188 except Unparseable:
189 print '** Not parseable'
190 nbad = nbad + 1
Guido van Rossume33bba81998-10-09 13:27:49 +0000191 fp.close()
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000192 continue
193 print len(errors), 'errors'
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000194
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000195 # Remember them
196 for e in errors:
197 try:
198 mm, dd = m.getdate('date')[1:1+2]
199 date = '%s %02d' % (calendar.month_abbr[mm], dd)
200 except:
201 date = '??????'
202 if not errordict.has_key(e):
203 errordict[e] = 1
204 errorfirst[e] = '%s (%s)' % (fn, date)
205 else:
206 errordict[e] = errordict[e] + 1
207 errorlast[e] = '%s (%s)' % (fn, date)
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000208
Guido van Rossume33bba81998-10-09 13:27:49 +0000209 fp.close()
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000210 nok = nok + 1
211 if modify:
Guido van Rossume33bba81998-10-09 13:27:49 +0000212 os.rename(fn, ','+fn)
213## os.unlink(fn)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000214
215 print '--------------'
216 print nok, 'files parsed,',nwarn,'files warning-only,',
217 print nbad,'files unparseable'
218 print '--------------'
Guido van Rossume33bba81998-10-09 13:27:49 +0000219 list = []
Sjoerd Mullender89686241995-10-19 09:35:31 +0000220 for e in errordict.keys():
Guido van Rossume33bba81998-10-09 13:27:49 +0000221 list.append(errordict[e], errorfirst[e], errorlast[e], e)
222 list.sort()
223 for num, first, last, e in list:
224 print '%d %s - %s\t%s' % (num, first, last, e)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000225
226def main():
227 modify = 0
228 if len(sys.argv) > 1 and sys.argv[1] == '-d':
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000229 modify = 1
230 del sys.argv[1]
Sjoerd Mullender89686241995-10-19 09:35:31 +0000231 if len(sys.argv) > 1:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000232 for folder in sys.argv[1:]:
233 parsedir(folder, modify)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000234 else:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000235 parsedir('/ufs/jack/Mail/errorsinbox', modify)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000236
237if __name__ == '__main__' or sys.argv[0] == __name__:
238 main()