blob: 853949b99564adef3a273ff67ad79d664112fadc [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossum62448671996-09-17 21:33:15 +00002
3"""Classes to handle Unix style, MMDF style, and MH style mailboxes."""
4
5
Guido van Rossumc7b68821994-04-28 09:53:33 +00006import rfc822
Jack Jansen97157791995-10-23 13:59:53 +00007import os
Guido van Rossumc7b68821994-04-28 09:53:33 +00008
Barry Warsawffd05ee2002-03-01 22:39:14 +00009__all__ = ["UnixMailbox","MmdfMailbox","MHMailbox","Maildir","BabylMailbox",
10 "PortableUnixMailbox"]
Skip Montanaro17ab1232001-01-24 06:27:27 +000011
Guido van Rossumc7b68821994-04-28 09:53:33 +000012class _Mailbox:
Barry Warsaw81ad67c2001-01-31 22:13:15 +000013 def __init__(self, fp, factory=rfc822.Message):
Fred Drakedbbf76b2000-07-09 16:44:26 +000014 self.fp = fp
15 self.seekp = 0
Barry Warsaw81ad67c2001-01-31 22:13:15 +000016 self.factory = factory
Guido van Rossum8ca84201998-03-26 20:56:10 +000017
Fred Drake72987a42001-05-02 20:20:53 +000018 def __iter__(self):
Guido van Rossum93a696f2001-09-13 01:29:13 +000019 return iter(self.next, None)
Fred Drake72987a42001-05-02 20:20:53 +000020
Fred Drakedbbf76b2000-07-09 16:44:26 +000021 def next(self):
22 while 1:
23 self.fp.seek(self.seekp)
24 try:
25 self._search_start()
26 except EOFError:
27 self.seekp = self.fp.tell()
28 return None
29 start = self.fp.tell()
30 self._search_end()
31 self.seekp = stop = self.fp.tell()
Fred Drake8152d322000-12-12 23:20:45 +000032 if start != stop:
Fred Drakedbbf76b2000-07-09 16:44:26 +000033 break
Barry Warsaw81ad67c2001-01-31 22:13:15 +000034 return self.factory(_Subfile(self.fp, start, stop))
Guido van Rossumc7b68821994-04-28 09:53:33 +000035
Guido van Rossumc7b68821994-04-28 09:53:33 +000036
37class _Subfile:
Fred Drakedbbf76b2000-07-09 16:44:26 +000038 def __init__(self, fp, start, stop):
39 self.fp = fp
40 self.start = start
41 self.stop = stop
42 self.pos = self.start
Guido van Rossum8ca84201998-03-26 20:56:10 +000043
Fred Drakedbbf76b2000-07-09 16:44:26 +000044 def read(self, length = None):
45 if self.pos >= self.stop:
46 return ''
47 remaining = self.stop - self.pos
48 if length is None or length < 0:
49 length = remaining
50 elif length > remaining:
51 length = remaining
52 self.fp.seek(self.pos)
53 data = self.fp.read(length)
54 self.pos = self.fp.tell()
55 return data
Guido van Rossumc7b68821994-04-28 09:53:33 +000056
Fred Drakedbbf76b2000-07-09 16:44:26 +000057 def readline(self, length = None):
58 if self.pos >= self.stop:
59 return ''
60 if length is None:
61 length = self.stop - self.pos
62 self.fp.seek(self.pos)
63 data = self.fp.readline(length)
64 self.pos = self.fp.tell()
65 return data
Guido van Rossumc7b68821994-04-28 09:53:33 +000066
Fred Drakedbbf76b2000-07-09 16:44:26 +000067 def readlines(self, sizehint = -1):
68 lines = []
69 while 1:
70 line = self.readline()
71 if not line:
72 break
73 lines.append(line)
74 if sizehint >= 0:
75 sizehint = sizehint - len(line)
76 if sizehint <= 0:
77 break
78 return lines
Guido van Rossumc7b68821994-04-28 09:53:33 +000079
Fred Drakedbbf76b2000-07-09 16:44:26 +000080 def tell(self):
81 return self.pos - self.start
Guido van Rossume256a0f1999-03-24 16:20:45 +000082
Fred Drakedbbf76b2000-07-09 16:44:26 +000083 def seek(self, pos, whence=0):
84 if whence == 0:
85 self.pos = self.start + pos
86 elif whence == 1:
87 self.pos = self.pos + pos
88 elif whence == 2:
89 self.pos = self.stop + pos
Guido van Rossumc7b68821994-04-28 09:53:33 +000090
Fred Drakedbbf76b2000-07-09 16:44:26 +000091 def close(self):
92 del self.fp
Guido van Rossumc7b68821994-04-28 09:53:33 +000093
Guido van Rossumc7b68821994-04-28 09:53:33 +000094
Barry Warsawffd05ee2002-03-01 22:39:14 +000095# Recommended to use PortableUnixMailbox instead!
Guido van Rossumc7b68821994-04-28 09:53:33 +000096class UnixMailbox(_Mailbox):
Fred Drakedbbf76b2000-07-09 16:44:26 +000097 def _search_start(self):
98 while 1:
99 pos = self.fp.tell()
100 line = self.fp.readline()
101 if not line:
102 raise EOFError
103 if line[:5] == 'From ' and self._isrealfromline(line):
104 self.fp.seek(pos)
105 return
Guido van Rossum8ca84201998-03-26 20:56:10 +0000106
Fred Drakedbbf76b2000-07-09 16:44:26 +0000107 def _search_end(self):
108 self.fp.readline() # Throw away header line
109 while 1:
110 pos = self.fp.tell()
111 line = self.fp.readline()
112 if not line:
113 return
114 if line[:5] == 'From ' and self._isrealfromline(line):
115 self.fp.seek(pos)
116 return
Guido van Rossumc7b68821994-04-28 09:53:33 +0000117
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000118 # An overridable mechanism to test for From-line-ness. You can either
119 # specify a different regular expression or define a whole new
120 # _isrealfromline() method. Note that this only gets called for lines
121 # starting with the 5 characters "From ".
122 #
123 # BAW: According to
124 #http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html
125 # the only portable, reliable way to find message delimiters in a BSD (i.e
126 # Unix mailbox) style folder is to search for "\n\nFrom .*\n", or at the
127 # beginning of the file, "^From .*\n". While _fromlinepattern below seems
128 # like a good idea, in practice, there are too many variations for more
129 # strict parsing of the line to be completely accurate.
130 #
131 # _strict_isrealfromline() is the old version which tries to do stricter
132 # parsing of the From_ line. _portable_isrealfromline() simply returns
133 # true, since it's never called if the line doesn't already start with
134 # "From ".
135 #
136 # This algorithm, and the way it interacts with _search_start() and
137 # _search_end() may not be completely correct, because it doesn't check
138 # that the two characters preceding "From " are \n\n or the beginning of
139 # the file. Fixing this would require a more extensive rewrite than is
140 # necessary. For convenience, we've added a StrictUnixMailbox class which
141 # uses the older, more strict _fromlinepattern regular expression.
Guido van Rossumc7b68821994-04-28 09:53:33 +0000142
Fred Drakedbbf76b2000-07-09 16:44:26 +0000143 _fromlinepattern = r"From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d\s+" \
144 r"\d?\d:\d\d(:\d\d)?(\s+[^\s]+)?\s+\d\d\d\d\s*$"
145 _regexp = None
Guido van Rossumfbe63de1998-04-03 16:04:05 +0000146
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000147 def _strict_isrealfromline(self, line):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000148 if not self._regexp:
149 import re
150 self._regexp = re.compile(self._fromlinepattern)
151 return self._regexp.match(line)
Guido van Rossumfbe63de1998-04-03 16:04:05 +0000152
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000153 def _portable_isrealfromline(self, line):
Tim Petersbc0e9102002-04-04 22:55:58 +0000154 return True
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000155
156 _isrealfromline = _strict_isrealfromline
157
158
159class PortableUnixMailbox(UnixMailbox):
160 _isrealfromline = UnixMailbox._portable_isrealfromline
161
Guido van Rossumfbe63de1998-04-03 16:04:05 +0000162
Guido van Rossumc7b68821994-04-28 09:53:33 +0000163class MmdfMailbox(_Mailbox):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000164 def _search_start(self):
165 while 1:
166 line = self.fp.readline()
167 if not line:
168 raise EOFError
169 if line[:5] == '\001\001\001\001\n':
170 return
Guido van Rossum8ca84201998-03-26 20:56:10 +0000171
Fred Drakedbbf76b2000-07-09 16:44:26 +0000172 def _search_end(self):
173 while 1:
174 pos = self.fp.tell()
175 line = self.fp.readline()
176 if not line:
177 return
178 if line == '\001\001\001\001\n':
179 self.fp.seek(pos)
180 return
Guido van Rossumc7b68821994-04-28 09:53:33 +0000181
Guido van Rossumc7b68821994-04-28 09:53:33 +0000182
Jack Jansen97157791995-10-23 13:59:53 +0000183class MHMailbox:
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000184 def __init__(self, dirname, factory=rfc822.Message):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000185 import re
Guido van Rossum0707fea2000-08-10 03:05:26 +0000186 pat = re.compile('^[1-9][0-9]*$')
Fred Drakedbbf76b2000-07-09 16:44:26 +0000187 self.dirname = dirname
Sjoerd Mullenderd2653a92000-08-11 07:48:36 +0000188 # the three following lines could be combined into:
189 # list = map(long, filter(pat.match, os.listdir(self.dirname)))
190 list = os.listdir(self.dirname)
191 list = filter(pat.match, list)
Guido van Rossum0707fea2000-08-10 03:05:26 +0000192 list = map(long, list)
193 list.sort()
194 # This only works in Python 1.6 or later;
195 # before that str() added 'L':
196 self.boxes = map(str, list)
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000197 self.factory = factory
Jack Jansen97157791995-10-23 13:59:53 +0000198
Fred Drake72987a42001-05-02 20:20:53 +0000199 def __iter__(self):
Guido van Rossum93a696f2001-09-13 01:29:13 +0000200 return iter(self.next, None)
Fred Drake72987a42001-05-02 20:20:53 +0000201
Fred Drakedbbf76b2000-07-09 16:44:26 +0000202 def next(self):
203 if not self.boxes:
204 return None
205 fn = self.boxes[0]
206 del self.boxes[0]
207 fp = open(os.path.join(self.dirname, fn))
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000208 return self.factory(fp)
Guido van Rossum8ca84201998-03-26 20:56:10 +0000209
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000210
211class Maildir:
Fred Drakedbbf76b2000-07-09 16:44:26 +0000212 # Qmail directory mailbox
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000213
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000214 def __init__(self, dirname, factory=rfc822.Message):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000215 self.dirname = dirname
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000216 self.factory = factory
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000217
Fred Drakedbbf76b2000-07-09 16:44:26 +0000218 # check for new mail
219 newdir = os.path.join(self.dirname, 'new')
Fred Draked9a8dec2000-09-22 18:41:50 +0000220 boxes = [os.path.join(newdir, f)
221 for f in os.listdir(newdir) if f[0] != '.']
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000222
Fred Drakedbbf76b2000-07-09 16:44:26 +0000223 # Now check for current mail in this maildir
224 curdir = os.path.join(self.dirname, 'cur')
Fred Draked9a8dec2000-09-22 18:41:50 +0000225 boxes += [os.path.join(curdir, f)
226 for f in os.listdir(curdir) if f[0] != '.']
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000227
Fred Drakede3518e2000-10-23 13:37:01 +0000228 self.boxes = boxes
229
Fred Drake72987a42001-05-02 20:20:53 +0000230 def __iter__(self):
Guido van Rossum93a696f2001-09-13 01:29:13 +0000231 return iter(self.next, None)
Fred Drake72987a42001-05-02 20:20:53 +0000232
Fred Drakedbbf76b2000-07-09 16:44:26 +0000233 def next(self):
234 if not self.boxes:
235 return None
236 fn = self.boxes[0]
237 del self.boxes[0]
Fred Drakee108a022000-09-14 14:44:43 +0000238 fp = open(fn)
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000239 return self.factory(fp)
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000240
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000241
Guido van Rossumfdf58fe1997-05-15 14:33:09 +0000242class BabylMailbox(_Mailbox):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000243 def _search_start(self):
244 while 1:
245 line = self.fp.readline()
246 if not line:
247 raise EOFError
248 if line == '*** EOOH ***\n':
249 return
Guido van Rossumfdf58fe1997-05-15 14:33:09 +0000250
Fred Drakedbbf76b2000-07-09 16:44:26 +0000251 def _search_end(self):
252 while 1:
253 pos = self.fp.tell()
254 line = self.fp.readline()
255 if not line:
256 return
257 if line == '\037\014\n':
258 self.fp.seek(pos)
259 return
Guido van Rossumfdf58fe1997-05-15 14:33:09 +0000260
261
Guido van Rossum62448671996-09-17 21:33:15 +0000262def _test():
Fred Drakedbbf76b2000-07-09 16:44:26 +0000263 import sys
Jack Jansen97157791995-10-23 13:59:53 +0000264
Fred Drakedbbf76b2000-07-09 16:44:26 +0000265 args = sys.argv[1:]
266 if not args:
267 for key in 'MAILDIR', 'MAIL', 'LOGNAME', 'USER':
268 if os.environ.has_key(key):
269 mbox = os.environ[key]
270 break
Fred Drake13a2c272000-02-10 17:17:14 +0000271 else:
Fred Drakedbbf76b2000-07-09 16:44:26 +0000272 print "$MAIL, $LOGNAME nor $USER set -- who are you?"
273 return
274 else:
275 mbox = args[0]
276 if mbox[:1] == '+':
277 mbox = os.environ['HOME'] + '/Mail/' + mbox[1:]
278 elif not '/' in mbox:
Neil Schemenauer03f3ee62002-03-24 01:38:38 +0000279 if os.path.isfile('/var/mail/' + mbox):
280 mbox = '/var/mail/' + mbox
281 else:
282 mbox = '/usr/mail/' + mbox
Fred Drakedbbf76b2000-07-09 16:44:26 +0000283 if os.path.isdir(mbox):
284 if os.path.isdir(os.path.join(mbox, 'cur')):
285 mb = Maildir(mbox)
Fred Drake13a2c272000-02-10 17:17:14 +0000286 else:
Fred Drakedbbf76b2000-07-09 16:44:26 +0000287 mb = MHMailbox(mbox)
288 else:
289 fp = open(mbox, 'r')
Barry Warsawffd05ee2002-03-01 22:39:14 +0000290 mb = PortableUnixMailbox(fp)
Fred Drakedbbf76b2000-07-09 16:44:26 +0000291
292 msgs = []
293 while 1:
294 msg = mb.next()
295 if msg is None:
296 break
297 msgs.append(msg)
298 if len(args) <= 1:
299 msg.fp = None
300 if len(args) > 1:
Fred Drakede3518e2000-10-23 13:37:01 +0000301 num = int(args[1])
Fred Drakedbbf76b2000-07-09 16:44:26 +0000302 print 'Message %d body:'%num
303 msg = msgs[num-1]
304 msg.rewindbody()
305 sys.stdout.write(msg.fp.read())
306 else:
307 print 'Mailbox',mbox,'has',len(msgs),'messages:'
308 for msg in msgs:
309 f = msg.getheader('from') or ""
310 s = msg.getheader('subject') or ""
311 d = msg.getheader('date') or ""
Fred Drakecc4adf22000-09-30 23:59:04 +0000312 print '-%20.20s %20.20s %-30.30s'%(f, d[5:], s)
Guido van Rossum62448671996-09-17 21:33:15 +0000313
314
315if __name__ == '__main__':
Fred Drakedbbf76b2000-07-09 16:44:26 +0000316 _test()