blob: 62189f14e9869a124ce42f1e216b27fa12f9e69c [file] [log] [blame]
Éric Araujo1ce7b172011-07-26 17:36:19 +02001"""Classes to parse mailer-daemon messages."""
Sjoerd Mullender89686241995-10-19 09:35:31 +00002
Guido van Rossum8e7a54f1996-07-21 02:50:30 +00003import calendar
Georg Brandl9f0f9602008-06-12 22:23:59 +00004import email.message
Guido van Rossume33bba81998-10-09 13:27:49 +00005import re
Sjoerd Mullender89686241995-10-19 09:35:31 +00006import os
7import sys
8
Éric Araujo1ce7b172011-07-26 17:36:19 +02009
10class Unparseable(Exception):
11 pass
12
Sjoerd Mullender89686241995-10-19 09:35:31 +000013
Georg Brandl9f0f9602008-06-12 22:23:59 +000014class ErrorMessage(email.message.Message):
15 def __init__(self):
16 email.message.Message.__init__(self)
Guido van Rossume33bba81998-10-09 13:27:49 +000017 self.sub = ''
Sjoerd Mullender89686241995-10-19 09:35:31 +000018
19 def is_warning(self):
Barry Warsaw820c1202008-06-12 04:06:45 +000020 sub = self.get('Subject')
Guido van Rossumed5b3d81998-03-24 05:30:29 +000021 if not sub:
22 return 0
Walter Dörwaldaaab30e2002-09-11 20:36:02 +000023 sub = sub.lower()
Éric Araujo1ce7b172011-07-26 17:36:19 +020024 if sub.startswith('waiting mail'):
25 return 1
26 if 'warning' in sub:
27 return 1
Guido van Rossumed5b3d81998-03-24 05:30:29 +000028 self.sub = sub
29 return 0
Sjoerd Mullender89686241995-10-19 09:35:31 +000030
31 def get_errors(self):
Guido van Rossumed5b3d81998-03-24 05:30:29 +000032 for p in EMPARSERS:
33 self.rewindbody()
34 try:
35 return p(self.fp, self.sub)
36 except Unparseable:
37 pass
38 raise Unparseable
Sjoerd Mullender89686241995-10-19 09:35:31 +000039
Guido van Rossume33bba81998-10-09 13:27:49 +000040# List of re's or tuples of re's.
41# If a re, it should contain at least a group (?P<email>...) which
42# should refer to the email address. The re can also contain a group
43# (?P<reason>...) which should refer to the reason (error message).
44# If no reason is present, the emparse_list_reason list is used to
45# find a reason.
46# If a tuple, the tuple should contain 2 re's. The first re finds a
47# location, the second re is repeated one or more times to find
48# multiple email addresses. The second re is matched (not searched)
49# where the previous match ended.
50# The re's are compiled using the re module.
51emparse_list_list = [
52 'error: (?P<reason>unresolvable): (?P<email>.+)',
53 ('----- The following addresses had permanent fatal errors -----\n',
54 '(?P<email>[^ \n].*)\n( .*\n)?'),
55 'remote execution.*\n.*rmail (?P<email>.+)',
56 ('The following recipients did not receive your message:\n\n',
57 ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
58 '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)',
59 '^<(?P<email>.*)>:\n(?P<reason>.*)',
60 '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
61 '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
62 '^Original-Recipient: rfc822;(?P<email>.*)',
63 '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
64 '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
65 '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
66 '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
67 '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
68 ]
69# compile the re's in the list and store them in-place.
70for i in range(len(emparse_list_list)):
71 x = emparse_list_list[i]
72 if type(x) is type(''):
73 x = re.compile(x, re.MULTILINE)
74 else:
75 xl = []
76 for x in x:
77 xl.append(re.compile(x, re.MULTILINE))
78 x = tuple(xl)
79 del xl
80 emparse_list_list[i] = x
81 del x
82del i
Sjoerd Mullender89686241995-10-19 09:35:31 +000083
Guido van Rossume33bba81998-10-09 13:27:49 +000084# list of re's used to find reasons (error messages).
85# if a string, "<>" is replaced by a copy of the email address.
86# The expressions are searched for in order. After the first match,
87# no more expressions are searched for. So, order is important.
88emparse_list_reason = [
89 r'^5\d{2} <>\.\.\. (?P<reason>.*)',
90 '<>\.\.\. (?P<reason>.*)',
91 re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
92 re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
93 re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
94 ]
95emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
96def emparse_list(fp, sub):
97 data = fp.read()
98 res = emparse_list_from.search(data)
99 if res is None:
100 from_index = len(data)
101 else:
102 from_index = res.start(0)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000103 errors = []
Guido van Rossume33bba81998-10-09 13:27:49 +0000104 emails = []
105 reason = None
106 for regexp in emparse_list_list:
107 if type(regexp) is type(()):
108 res = regexp[0].search(data, 0, from_index)
109 if res is not None:
110 try:
111 reason = res.group('reason')
112 except IndexError:
113 pass
114 while 1:
115 res = regexp[1].match(data, res.end(0), from_index)
116 if res is None:
117 break
118 emails.append(res.group('email'))
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000119 break
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000120 else:
Guido van Rossume33bba81998-10-09 13:27:49 +0000121 res = regexp.search(data, 0, from_index)
122 if res is not None:
123 emails.append(res.group('email'))
124 try:
125 reason = res.group('reason')
126 except IndexError:
127 pass
128 break
129 if not emails:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000130 raise Unparseable
Guido van Rossume33bba81998-10-09 13:27:49 +0000131 if not reason:
132 reason = sub
133 if reason[:15] == 'returned mail: ':
134 reason = reason[15:]
135 for regexp in emparse_list_reason:
136 if type(regexp) is type(''):
137 for i in range(len(emails)-1,-1,-1):
138 email = emails[i]
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000139 exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
Guido van Rossume33bba81998-10-09 13:27:49 +0000140 res = exp.search(data)
141 if res is not None:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000142 errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
Guido van Rossume33bba81998-10-09 13:27:49 +0000143 del emails[i]
144 continue
145 res = regexp.search(data)
146 if res is not None:
147 reason = res.group('reason')
148 break
149 for email in emails:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000150 errors.append(' '.join((email.strip()+': '+reason).split()))
Jack Jansen4ec940a1995-10-30 10:10:19 +0000151 return errors
Sjoerd Mullender89686241995-10-19 09:35:31 +0000152
Éric Araujo1ce7b172011-07-26 17:36:19 +0200153EMPARSERS = [emparse_list]
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000154
155def sort_numeric(a, b):
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000156 a = int(a)
157 b = int(b)
Éric Araujo1ce7b172011-07-26 17:36:19 +0200158 if a < b:
159 return -1
160 elif a > b:
161 return 1
162 else:
163 return 0
Sjoerd Mullender89686241995-10-19 09:35:31 +0000164
165def parsedir(dir, modify):
166 os.chdir(dir)
Guido van Rossume33bba81998-10-09 13:27:49 +0000167 pat = re.compile('^[0-9]*$')
Sjoerd Mullender89686241995-10-19 09:35:31 +0000168 errordict = {}
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000169 errorfirst = {}
Sjoerd Mullender89686241995-10-19 09:35:31 +0000170 errorlast = {}
171 nok = nwarn = nbad = 0
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000172
173 # find all numeric file names and sort them
Georg Brandl8efadf52008-05-16 15:23:30 +0000174 files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000175 files.sort(sort_numeric)
Tim Peters70c43782001-01-17 08:48:39 +0000176
Sjoerd Mullender89686241995-10-19 09:35:31 +0000177 for fn in files:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000178 # Lets try to parse the file.
179 fp = open(fn)
Georg Brandl9f0f9602008-06-12 22:23:59 +0000180 m = email.message_from_file(fp, _class=ErrorMessage)
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000181 sender = m.getaddr('From')
Collin Winter6afaeb72007-08-03 17:06:41 +0000182 print('%s\t%-40s\t'%(fn, sender[1]), end=' ')
Sjoerd Mullender89686241995-10-19 09:35:31 +0000183
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000184 if m.is_warning():
Guido van Rossume33bba81998-10-09 13:27:49 +0000185 fp.close()
Collin Winter6afaeb72007-08-03 17:06:41 +0000186 print('warning only')
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000187 nwarn = nwarn + 1
188 if modify:
Guido van Rossume33bba81998-10-09 13:27:49 +0000189 os.rename(fn, ','+fn)
190## os.unlink(fn)
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000191 continue
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000192
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000193 try:
194 errors = m.get_errors()
195 except Unparseable:
Collin Winter6afaeb72007-08-03 17:06:41 +0000196 print('** Not parseable')
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000197 nbad = nbad + 1
Guido van Rossume33bba81998-10-09 13:27:49 +0000198 fp.close()
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000199 continue
Collin Winter6afaeb72007-08-03 17:06:41 +0000200 print(len(errors), 'errors')
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000201
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000202 # Remember them
203 for e in errors:
204 try:
205 mm, dd = m.getdate('date')[1:1+2]
206 date = '%s %02d' % (calendar.month_abbr[mm], dd)
207 except:
208 date = '??????'
Georg Brandl8efadf52008-05-16 15:23:30 +0000209 if e not in errordict:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000210 errordict[e] = 1
211 errorfirst[e] = '%s (%s)' % (fn, date)
212 else:
213 errordict[e] = errordict[e] + 1
214 errorlast[e] = '%s (%s)' % (fn, date)
Guido van Rossum8e7a54f1996-07-21 02:50:30 +0000215
Guido van Rossume33bba81998-10-09 13:27:49 +0000216 fp.close()
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000217 nok = nok + 1
218 if modify:
Guido van Rossume33bba81998-10-09 13:27:49 +0000219 os.rename(fn, ','+fn)
220## os.unlink(fn)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000221
Collin Winter6afaeb72007-08-03 17:06:41 +0000222 print('--------------')
223 print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
224 print(nbad,'files unparseable')
225 print('--------------')
Guido van Rossume33bba81998-10-09 13:27:49 +0000226 list = []
Sjoerd Mullender89686241995-10-19 09:35:31 +0000227 for e in errordict.keys():
Guido van Rossum67dd17f2000-02-29 13:00:16 +0000228 list.append((errordict[e], errorfirst[e], errorlast[e], e))
Guido van Rossume33bba81998-10-09 13:27:49 +0000229 list.sort()
230 for num, first, last, e in list:
Collin Winter6afaeb72007-08-03 17:06:41 +0000231 print('%d %s - %s\t%s' % (num, first, last, e))
Sjoerd Mullender89686241995-10-19 09:35:31 +0000232
233def main():
234 modify = 0
235 if len(sys.argv) > 1 and sys.argv[1] == '-d':
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000236 modify = 1
237 del sys.argv[1]
Sjoerd Mullender89686241995-10-19 09:35:31 +0000238 if len(sys.argv) > 1:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000239 for folder in sys.argv[1:]:
240 parsedir(folder, modify)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000241 else:
Guido van Rossumed5b3d81998-03-24 05:30:29 +0000242 parsedir('/ufs/jack/Mail/errorsinbox', modify)
Sjoerd Mullender89686241995-10-19 09:35:31 +0000243
244if __name__ == '__main__' or sys.argv[0] == __name__:
245 main()