blob: 29322ed16be47732879cbd615aa57fcf7c0839b6 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossum62448671996-09-17 21:33:15 +00002
3"""Classes to handle Unix style, MMDF style, and MH style mailboxes."""
4
5
Guido van Rossumc7b68821994-04-28 09:53:33 +00006import rfc822
Jack Jansen97157791995-10-23 13:59:53 +00007import os
Guido van Rossumc7b68821994-04-28 09:53:33 +00008
Skip Montanaro17ab1232001-01-24 06:27:27 +00009__all__ = ["UnixMailbox","MmdfMailbox","MHMailbox","Maildir","BabylMailbox"]
10
Guido van Rossumc7b68821994-04-28 09:53:33 +000011class _Mailbox:
Barry Warsaw81ad67c2001-01-31 22:13:15 +000012 def __init__(self, fp, factory=rfc822.Message):
Fred Drakedbbf76b2000-07-09 16:44:26 +000013 self.fp = fp
14 self.seekp = 0
Barry Warsaw81ad67c2001-01-31 22:13:15 +000015 self.factory = factory
Guido van Rossum8ca84201998-03-26 20:56:10 +000016
Fred Drakedbbf76b2000-07-09 16:44:26 +000017 def next(self):
18 while 1:
19 self.fp.seek(self.seekp)
20 try:
21 self._search_start()
22 except EOFError:
23 self.seekp = self.fp.tell()
24 return None
25 start = self.fp.tell()
26 self._search_end()
27 self.seekp = stop = self.fp.tell()
Fred Drake8152d322000-12-12 23:20:45 +000028 if start != stop:
Fred Drakedbbf76b2000-07-09 16:44:26 +000029 break
Barry Warsaw81ad67c2001-01-31 22:13:15 +000030 return self.factory(_Subfile(self.fp, start, stop))
Guido van Rossumc7b68821994-04-28 09:53:33 +000031
Guido van Rossumc7b68821994-04-28 09:53:33 +000032
33class _Subfile:
Fred Drakedbbf76b2000-07-09 16:44:26 +000034 def __init__(self, fp, start, stop):
35 self.fp = fp
36 self.start = start
37 self.stop = stop
38 self.pos = self.start
Guido van Rossum8ca84201998-03-26 20:56:10 +000039
Fred Drakedbbf76b2000-07-09 16:44:26 +000040 def read(self, length = None):
41 if self.pos >= self.stop:
42 return ''
43 remaining = self.stop - self.pos
44 if length is None or length < 0:
45 length = remaining
46 elif length > remaining:
47 length = remaining
48 self.fp.seek(self.pos)
49 data = self.fp.read(length)
50 self.pos = self.fp.tell()
51 return data
Guido van Rossumc7b68821994-04-28 09:53:33 +000052
Fred Drakedbbf76b2000-07-09 16:44:26 +000053 def readline(self, length = None):
54 if self.pos >= self.stop:
55 return ''
56 if length is None:
57 length = self.stop - self.pos
58 self.fp.seek(self.pos)
59 data = self.fp.readline(length)
60 self.pos = self.fp.tell()
61 return data
Guido van Rossumc7b68821994-04-28 09:53:33 +000062
Fred Drakedbbf76b2000-07-09 16:44:26 +000063 def readlines(self, sizehint = -1):
64 lines = []
65 while 1:
66 line = self.readline()
67 if not line:
68 break
69 lines.append(line)
70 if sizehint >= 0:
71 sizehint = sizehint - len(line)
72 if sizehint <= 0:
73 break
74 return lines
Guido van Rossumc7b68821994-04-28 09:53:33 +000075
Fred Drakedbbf76b2000-07-09 16:44:26 +000076 def tell(self):
77 return self.pos - self.start
Guido van Rossume256a0f1999-03-24 16:20:45 +000078
Fred Drakedbbf76b2000-07-09 16:44:26 +000079 def seek(self, pos, whence=0):
80 if whence == 0:
81 self.pos = self.start + pos
82 elif whence == 1:
83 self.pos = self.pos + pos
84 elif whence == 2:
85 self.pos = self.stop + pos
Guido van Rossumc7b68821994-04-28 09:53:33 +000086
Fred Drakedbbf76b2000-07-09 16:44:26 +000087 def close(self):
88 del self.fp
Guido van Rossumc7b68821994-04-28 09:53:33 +000089
Guido van Rossumc7b68821994-04-28 09:53:33 +000090
91class UnixMailbox(_Mailbox):
Fred Drakedbbf76b2000-07-09 16:44:26 +000092 def _search_start(self):
93 while 1:
94 pos = self.fp.tell()
95 line = self.fp.readline()
96 if not line:
97 raise EOFError
98 if line[:5] == 'From ' and self._isrealfromline(line):
99 self.fp.seek(pos)
100 return
Guido van Rossum8ca84201998-03-26 20:56:10 +0000101
Fred Drakedbbf76b2000-07-09 16:44:26 +0000102 def _search_end(self):
103 self.fp.readline() # Throw away header line
104 while 1:
105 pos = self.fp.tell()
106 line = self.fp.readline()
107 if not line:
108 return
109 if line[:5] == 'From ' and self._isrealfromline(line):
110 self.fp.seek(pos)
111 return
Guido van Rossumc7b68821994-04-28 09:53:33 +0000112
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000113 # An overridable mechanism to test for From-line-ness. You can either
114 # specify a different regular expression or define a whole new
115 # _isrealfromline() method. Note that this only gets called for lines
116 # starting with the 5 characters "From ".
117 #
118 # BAW: According to
119 #http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html
120 # the only portable, reliable way to find message delimiters in a BSD (i.e
121 # Unix mailbox) style folder is to search for "\n\nFrom .*\n", or at the
122 # beginning of the file, "^From .*\n". While _fromlinepattern below seems
123 # like a good idea, in practice, there are too many variations for more
124 # strict parsing of the line to be completely accurate.
125 #
126 # _strict_isrealfromline() is the old version which tries to do stricter
127 # parsing of the From_ line. _portable_isrealfromline() simply returns
128 # true, since it's never called if the line doesn't already start with
129 # "From ".
130 #
131 # This algorithm, and the way it interacts with _search_start() and
132 # _search_end() may not be completely correct, because it doesn't check
133 # that the two characters preceding "From " are \n\n or the beginning of
134 # the file. Fixing this would require a more extensive rewrite than is
135 # necessary. For convenience, we've added a StrictUnixMailbox class which
136 # uses the older, more strict _fromlinepattern regular expression.
Guido van Rossumc7b68821994-04-28 09:53:33 +0000137
Fred Drakedbbf76b2000-07-09 16:44:26 +0000138 _fromlinepattern = r"From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d\s+" \
139 r"\d?\d:\d\d(:\d\d)?(\s+[^\s]+)?\s+\d\d\d\d\s*$"
140 _regexp = None
Guido van Rossumfbe63de1998-04-03 16:04:05 +0000141
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000142 def _strict_isrealfromline(self, line):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000143 if not self._regexp:
144 import re
145 self._regexp = re.compile(self._fromlinepattern)
146 return self._regexp.match(line)
Guido van Rossumfbe63de1998-04-03 16:04:05 +0000147
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000148 def _portable_isrealfromline(self, line):
149 return 1
150
151 _isrealfromline = _strict_isrealfromline
152
153
154class PortableUnixMailbox(UnixMailbox):
155 _isrealfromline = UnixMailbox._portable_isrealfromline
156
Guido van Rossumfbe63de1998-04-03 16:04:05 +0000157
Guido van Rossumc7b68821994-04-28 09:53:33 +0000158class MmdfMailbox(_Mailbox):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000159 def _search_start(self):
160 while 1:
161 line = self.fp.readline()
162 if not line:
163 raise EOFError
164 if line[:5] == '\001\001\001\001\n':
165 return
Guido van Rossum8ca84201998-03-26 20:56:10 +0000166
Fred Drakedbbf76b2000-07-09 16:44:26 +0000167 def _search_end(self):
168 while 1:
169 pos = self.fp.tell()
170 line = self.fp.readline()
171 if not line:
172 return
173 if line == '\001\001\001\001\n':
174 self.fp.seek(pos)
175 return
Guido van Rossumc7b68821994-04-28 09:53:33 +0000176
Guido van Rossumc7b68821994-04-28 09:53:33 +0000177
Jack Jansen97157791995-10-23 13:59:53 +0000178class MHMailbox:
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000179 def __init__(self, dirname, factory=rfc822.Message):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000180 import re
Guido van Rossum0707fea2000-08-10 03:05:26 +0000181 pat = re.compile('^[1-9][0-9]*$')
Fred Drakedbbf76b2000-07-09 16:44:26 +0000182 self.dirname = dirname
Sjoerd Mullenderd2653a92000-08-11 07:48:36 +0000183 # the three following lines could be combined into:
184 # list = map(long, filter(pat.match, os.listdir(self.dirname)))
185 list = os.listdir(self.dirname)
186 list = filter(pat.match, list)
Guido van Rossum0707fea2000-08-10 03:05:26 +0000187 list = map(long, list)
188 list.sort()
189 # This only works in Python 1.6 or later;
190 # before that str() added 'L':
191 self.boxes = map(str, list)
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000192 self.factory = factory
Jack Jansen97157791995-10-23 13:59:53 +0000193
Fred Drakedbbf76b2000-07-09 16:44:26 +0000194 def next(self):
195 if not self.boxes:
196 return None
197 fn = self.boxes[0]
198 del self.boxes[0]
199 fp = open(os.path.join(self.dirname, fn))
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000200 return self.factory(fp)
Guido van Rossum8ca84201998-03-26 20:56:10 +0000201
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000202
203class Maildir:
Fred Drakedbbf76b2000-07-09 16:44:26 +0000204 # Qmail directory mailbox
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000205
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000206 def __init__(self, dirname, factory=rfc822.Message):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000207 self.dirname = dirname
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000208 self.factory = factory
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000209
Fred Drakedbbf76b2000-07-09 16:44:26 +0000210 # check for new mail
211 newdir = os.path.join(self.dirname, 'new')
Fred Draked9a8dec2000-09-22 18:41:50 +0000212 boxes = [os.path.join(newdir, f)
213 for f in os.listdir(newdir) if f[0] != '.']
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000214
Fred Drakedbbf76b2000-07-09 16:44:26 +0000215 # Now check for current mail in this maildir
216 curdir = os.path.join(self.dirname, 'cur')
Fred Draked9a8dec2000-09-22 18:41:50 +0000217 boxes += [os.path.join(curdir, f)
218 for f in os.listdir(curdir) if f[0] != '.']
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000219
Fred Drakede3518e2000-10-23 13:37:01 +0000220 self.boxes = boxes
221
Fred Drakedbbf76b2000-07-09 16:44:26 +0000222 def next(self):
223 if not self.boxes:
224 return None
225 fn = self.boxes[0]
226 del self.boxes[0]
Fred Drakee108a022000-09-14 14:44:43 +0000227 fp = open(fn)
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000228 return self.factory(fp)
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000229
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000230
Guido van Rossumfdf58fe1997-05-15 14:33:09 +0000231class BabylMailbox(_Mailbox):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000232 def _search_start(self):
233 while 1:
234 line = self.fp.readline()
235 if not line:
236 raise EOFError
237 if line == '*** EOOH ***\n':
238 return
Guido van Rossumfdf58fe1997-05-15 14:33:09 +0000239
Fred Drakedbbf76b2000-07-09 16:44:26 +0000240 def _search_end(self):
241 while 1:
242 pos = self.fp.tell()
243 line = self.fp.readline()
244 if not line:
245 return
246 if line == '\037\014\n':
247 self.fp.seek(pos)
248 return
Guido van Rossumfdf58fe1997-05-15 14:33:09 +0000249
250
Guido van Rossum62448671996-09-17 21:33:15 +0000251def _test():
Fred Drakedbbf76b2000-07-09 16:44:26 +0000252 import time
253 import sys
Fred Drakedbbf76b2000-07-09 16:44:26 +0000254 import os
Jack Jansen97157791995-10-23 13:59:53 +0000255
Fred Drakedbbf76b2000-07-09 16:44:26 +0000256 args = sys.argv[1:]
257 if not args:
258 for key in 'MAILDIR', 'MAIL', 'LOGNAME', 'USER':
259 if os.environ.has_key(key):
260 mbox = os.environ[key]
261 break
Fred Drake13a2c272000-02-10 17:17:14 +0000262 else:
Fred Drakedbbf76b2000-07-09 16:44:26 +0000263 print "$MAIL, $LOGNAME nor $USER set -- who are you?"
264 return
265 else:
266 mbox = args[0]
267 if mbox[:1] == '+':
268 mbox = os.environ['HOME'] + '/Mail/' + mbox[1:]
269 elif not '/' in mbox:
270 mbox = '/usr/mail/' + mbox
271 if os.path.isdir(mbox):
272 if os.path.isdir(os.path.join(mbox, 'cur')):
273 mb = Maildir(mbox)
Fred Drake13a2c272000-02-10 17:17:14 +0000274 else:
Fred Drakedbbf76b2000-07-09 16:44:26 +0000275 mb = MHMailbox(mbox)
276 else:
277 fp = open(mbox, 'r')
278 mb = UnixMailbox(fp)
279
280 msgs = []
281 while 1:
282 msg = mb.next()
283 if msg is None:
284 break
285 msgs.append(msg)
286 if len(args) <= 1:
287 msg.fp = None
288 if len(args) > 1:
Fred Drakede3518e2000-10-23 13:37:01 +0000289 num = int(args[1])
Fred Drakedbbf76b2000-07-09 16:44:26 +0000290 print 'Message %d body:'%num
291 msg = msgs[num-1]
292 msg.rewindbody()
293 sys.stdout.write(msg.fp.read())
294 else:
295 print 'Mailbox',mbox,'has',len(msgs),'messages:'
296 for msg in msgs:
297 f = msg.getheader('from') or ""
298 s = msg.getheader('subject') or ""
299 d = msg.getheader('date') or ""
Fred Drakecc4adf22000-09-30 23:59:04 +0000300 print '-%20.20s %20.20s %-30.30s'%(f, d[5:], s)
Guido van Rossum62448671996-09-17 21:33:15 +0000301
302
303if __name__ == '__main__':
Fred Drakedbbf76b2000-07-09 16:44:26 +0000304 _test()