blob: 2f96106c6e807b2a365039a28a88dab4898d1f16 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossum62448671996-09-17 21:33:15 +00002
3"""Classes to handle Unix style, MMDF style, and MH style mailboxes."""
4
5
Guido van Rossumc7b68821994-04-28 09:53:33 +00006import rfc822
Jack Jansen97157791995-10-23 13:59:53 +00007import os
Guido van Rossumc7b68821994-04-28 09:53:33 +00008
Skip Montanaro17ab1232001-01-24 06:27:27 +00009__all__ = ["UnixMailbox","MmdfMailbox","MHMailbox","Maildir","BabylMailbox"]
10
Guido van Rossumc7b68821994-04-28 09:53:33 +000011class _Mailbox:
Barry Warsaw81ad67c2001-01-31 22:13:15 +000012 def __init__(self, fp, factory=rfc822.Message):
Fred Drakedbbf76b2000-07-09 16:44:26 +000013 self.fp = fp
14 self.seekp = 0
Barry Warsaw81ad67c2001-01-31 22:13:15 +000015 self.factory = factory
Guido van Rossum8ca84201998-03-26 20:56:10 +000016
Fred Drakedbbf76b2000-07-09 16:44:26 +000017 def seek(self, pos, whence=0):
18 if whence==1: # Relative to current position
19 self.pos = self.pos + pos
20 if whence==2: # Relative to file's end
21 self.pos = self.stop + pos
22 else: # Default - absolute position
23 self.pos = self.start + pos
Guido van Rossumc7b68821994-04-28 09:53:33 +000024
Fred Drakedbbf76b2000-07-09 16:44:26 +000025 def next(self):
26 while 1:
27 self.fp.seek(self.seekp)
28 try:
29 self._search_start()
30 except EOFError:
31 self.seekp = self.fp.tell()
32 return None
33 start = self.fp.tell()
34 self._search_end()
35 self.seekp = stop = self.fp.tell()
Fred Drake8152d322000-12-12 23:20:45 +000036 if start != stop:
Fred Drakedbbf76b2000-07-09 16:44:26 +000037 break
Barry Warsaw81ad67c2001-01-31 22:13:15 +000038 return self.factory(_Subfile(self.fp, start, stop))
Guido van Rossumc7b68821994-04-28 09:53:33 +000039
Guido van Rossumc7b68821994-04-28 09:53:33 +000040
41class _Subfile:
Fred Drakedbbf76b2000-07-09 16:44:26 +000042 def __init__(self, fp, start, stop):
43 self.fp = fp
44 self.start = start
45 self.stop = stop
46 self.pos = self.start
Guido van Rossum8ca84201998-03-26 20:56:10 +000047
Fred Drakedbbf76b2000-07-09 16:44:26 +000048 def read(self, length = None):
49 if self.pos >= self.stop:
50 return ''
51 remaining = self.stop - self.pos
52 if length is None or length < 0:
53 length = remaining
54 elif length > remaining:
55 length = remaining
56 self.fp.seek(self.pos)
57 data = self.fp.read(length)
58 self.pos = self.fp.tell()
59 return data
Guido van Rossumc7b68821994-04-28 09:53:33 +000060
Fred Drakedbbf76b2000-07-09 16:44:26 +000061 def readline(self, length = None):
62 if self.pos >= self.stop:
63 return ''
64 if length is None:
65 length = self.stop - self.pos
66 self.fp.seek(self.pos)
67 data = self.fp.readline(length)
68 self.pos = self.fp.tell()
69 return data
Guido van Rossumc7b68821994-04-28 09:53:33 +000070
Fred Drakedbbf76b2000-07-09 16:44:26 +000071 def readlines(self, sizehint = -1):
72 lines = []
73 while 1:
74 line = self.readline()
75 if not line:
76 break
77 lines.append(line)
78 if sizehint >= 0:
79 sizehint = sizehint - len(line)
80 if sizehint <= 0:
81 break
82 return lines
Guido van Rossumc7b68821994-04-28 09:53:33 +000083
Fred Drakedbbf76b2000-07-09 16:44:26 +000084 def tell(self):
85 return self.pos - self.start
Guido van Rossume256a0f1999-03-24 16:20:45 +000086
Fred Drakedbbf76b2000-07-09 16:44:26 +000087 def seek(self, pos, whence=0):
88 if whence == 0:
89 self.pos = self.start + pos
90 elif whence == 1:
91 self.pos = self.pos + pos
92 elif whence == 2:
93 self.pos = self.stop + pos
Guido van Rossumc7b68821994-04-28 09:53:33 +000094
Fred Drakedbbf76b2000-07-09 16:44:26 +000095 def close(self):
96 del self.fp
Guido van Rossumc7b68821994-04-28 09:53:33 +000097
Guido van Rossumc7b68821994-04-28 09:53:33 +000098
99class UnixMailbox(_Mailbox):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000100 def _search_start(self):
101 while 1:
102 pos = self.fp.tell()
103 line = self.fp.readline()
104 if not line:
105 raise EOFError
106 if line[:5] == 'From ' and self._isrealfromline(line):
107 self.fp.seek(pos)
108 return
Guido van Rossum8ca84201998-03-26 20:56:10 +0000109
Fred Drakedbbf76b2000-07-09 16:44:26 +0000110 def _search_end(self):
111 self.fp.readline() # Throw away header line
112 while 1:
113 pos = self.fp.tell()
114 line = self.fp.readline()
115 if not line:
116 return
117 if line[:5] == 'From ' and self._isrealfromline(line):
118 self.fp.seek(pos)
119 return
Guido van Rossumc7b68821994-04-28 09:53:33 +0000120
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000121 # An overridable mechanism to test for From-line-ness. You can either
122 # specify a different regular expression or define a whole new
123 # _isrealfromline() method. Note that this only gets called for lines
124 # starting with the 5 characters "From ".
125 #
126 # BAW: According to
127 #http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html
128 # the only portable, reliable way to find message delimiters in a BSD (i.e
129 # Unix mailbox) style folder is to search for "\n\nFrom .*\n", or at the
130 # beginning of the file, "^From .*\n". While _fromlinepattern below seems
131 # like a good idea, in practice, there are too many variations for more
132 # strict parsing of the line to be completely accurate.
133 #
134 # _strict_isrealfromline() is the old version which tries to do stricter
135 # parsing of the From_ line. _portable_isrealfromline() simply returns
136 # true, since it's never called if the line doesn't already start with
137 # "From ".
138 #
139 # This algorithm, and the way it interacts with _search_start() and
140 # _search_end() may not be completely correct, because it doesn't check
141 # that the two characters preceding "From " are \n\n or the beginning of
142 # the file. Fixing this would require a more extensive rewrite than is
143 # necessary. For convenience, we've added a StrictUnixMailbox class which
144 # uses the older, more strict _fromlinepattern regular expression.
Guido van Rossumc7b68821994-04-28 09:53:33 +0000145
Fred Drakedbbf76b2000-07-09 16:44:26 +0000146 _fromlinepattern = r"From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d\s+" \
147 r"\d?\d:\d\d(:\d\d)?(\s+[^\s]+)?\s+\d\d\d\d\s*$"
148 _regexp = None
Guido van Rossumfbe63de1998-04-03 16:04:05 +0000149
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000150 def _strict_isrealfromline(self, line):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000151 if not self._regexp:
152 import re
153 self._regexp = re.compile(self._fromlinepattern)
154 return self._regexp.match(line)
Guido van Rossumfbe63de1998-04-03 16:04:05 +0000155
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000156 def _portable_isrealfromline(self, line):
157 return 1
158
159 _isrealfromline = _strict_isrealfromline
160
161
162class PortableUnixMailbox(UnixMailbox):
163 _isrealfromline = UnixMailbox._portable_isrealfromline
164
Guido van Rossumfbe63de1998-04-03 16:04:05 +0000165
Guido van Rossumc7b68821994-04-28 09:53:33 +0000166class MmdfMailbox(_Mailbox):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000167 def _search_start(self):
168 while 1:
169 line = self.fp.readline()
170 if not line:
171 raise EOFError
172 if line[:5] == '\001\001\001\001\n':
173 return
Guido van Rossum8ca84201998-03-26 20:56:10 +0000174
Fred Drakedbbf76b2000-07-09 16:44:26 +0000175 def _search_end(self):
176 while 1:
177 pos = self.fp.tell()
178 line = self.fp.readline()
179 if not line:
180 return
181 if line == '\001\001\001\001\n':
182 self.fp.seek(pos)
183 return
Guido van Rossumc7b68821994-04-28 09:53:33 +0000184
Guido van Rossumc7b68821994-04-28 09:53:33 +0000185
Jack Jansen97157791995-10-23 13:59:53 +0000186class MHMailbox:
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000187 def __init__(self, dirname, factory=rfc822.Message):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000188 import re
Guido van Rossum0707fea2000-08-10 03:05:26 +0000189 pat = re.compile('^[1-9][0-9]*$')
Fred Drakedbbf76b2000-07-09 16:44:26 +0000190 self.dirname = dirname
Sjoerd Mullenderd2653a92000-08-11 07:48:36 +0000191 # the three following lines could be combined into:
192 # list = map(long, filter(pat.match, os.listdir(self.dirname)))
193 list = os.listdir(self.dirname)
194 list = filter(pat.match, list)
Guido van Rossum0707fea2000-08-10 03:05:26 +0000195 list = map(long, list)
196 list.sort()
197 # This only works in Python 1.6 or later;
198 # before that str() added 'L':
199 self.boxes = map(str, list)
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000200 self.factory = factory
Jack Jansen97157791995-10-23 13:59:53 +0000201
Fred Drakedbbf76b2000-07-09 16:44:26 +0000202 def next(self):
203 if not self.boxes:
204 return None
205 fn = self.boxes[0]
206 del self.boxes[0]
207 fp = open(os.path.join(self.dirname, fn))
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000208 return self.factory(fp)
Guido van Rossum8ca84201998-03-26 20:56:10 +0000209
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000210
211class Maildir:
Fred Drakedbbf76b2000-07-09 16:44:26 +0000212 # Qmail directory mailbox
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000213
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000214 def __init__(self, dirname, factory=rfc822.Message):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000215 self.dirname = dirname
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000216 self.factory = factory
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000217
Fred Drakedbbf76b2000-07-09 16:44:26 +0000218 # check for new mail
219 newdir = os.path.join(self.dirname, 'new')
Fred Draked9a8dec2000-09-22 18:41:50 +0000220 boxes = [os.path.join(newdir, f)
221 for f in os.listdir(newdir) if f[0] != '.']
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000222
Fred Drakedbbf76b2000-07-09 16:44:26 +0000223 # Now check for current mail in this maildir
224 curdir = os.path.join(self.dirname, 'cur')
Fred Draked9a8dec2000-09-22 18:41:50 +0000225 boxes += [os.path.join(curdir, f)
226 for f in os.listdir(curdir) if f[0] != '.']
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000227
Fred Drakede3518e2000-10-23 13:37:01 +0000228 self.boxes = boxes
229
Fred Drakedbbf76b2000-07-09 16:44:26 +0000230 def next(self):
231 if not self.boxes:
232 return None
233 fn = self.boxes[0]
234 del self.boxes[0]
Fred Drakee108a022000-09-14 14:44:43 +0000235 fp = open(fn)
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000236 return self.factory(fp)
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000237
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000238
Guido van Rossumfdf58fe1997-05-15 14:33:09 +0000239class BabylMailbox(_Mailbox):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000240 def _search_start(self):
241 while 1:
242 line = self.fp.readline()
243 if not line:
244 raise EOFError
245 if line == '*** EOOH ***\n':
246 return
Guido van Rossumfdf58fe1997-05-15 14:33:09 +0000247
Fred Drakedbbf76b2000-07-09 16:44:26 +0000248 def _search_end(self):
249 while 1:
250 pos = self.fp.tell()
251 line = self.fp.readline()
252 if not line:
253 return
254 if line == '\037\014\n':
255 self.fp.seek(pos)
256 return
Guido van Rossumfdf58fe1997-05-15 14:33:09 +0000257
258
Guido van Rossum62448671996-09-17 21:33:15 +0000259def _test():
Fred Drakedbbf76b2000-07-09 16:44:26 +0000260 import time
261 import sys
Fred Drakedbbf76b2000-07-09 16:44:26 +0000262 import os
Jack Jansen97157791995-10-23 13:59:53 +0000263
Fred Drakedbbf76b2000-07-09 16:44:26 +0000264 args = sys.argv[1:]
265 if not args:
266 for key in 'MAILDIR', 'MAIL', 'LOGNAME', 'USER':
267 if os.environ.has_key(key):
268 mbox = os.environ[key]
269 break
Fred Drake13a2c272000-02-10 17:17:14 +0000270 else:
Fred Drakedbbf76b2000-07-09 16:44:26 +0000271 print "$MAIL, $LOGNAME nor $USER set -- who are you?"
272 return
273 else:
274 mbox = args[0]
275 if mbox[:1] == '+':
276 mbox = os.environ['HOME'] + '/Mail/' + mbox[1:]
277 elif not '/' in mbox:
278 mbox = '/usr/mail/' + mbox
279 if os.path.isdir(mbox):
280 if os.path.isdir(os.path.join(mbox, 'cur')):
281 mb = Maildir(mbox)
Fred Drake13a2c272000-02-10 17:17:14 +0000282 else:
Fred Drakedbbf76b2000-07-09 16:44:26 +0000283 mb = MHMailbox(mbox)
284 else:
285 fp = open(mbox, 'r')
286 mb = UnixMailbox(fp)
287
288 msgs = []
289 while 1:
290 msg = mb.next()
291 if msg is None:
292 break
293 msgs.append(msg)
294 if len(args) <= 1:
295 msg.fp = None
296 if len(args) > 1:
Fred Drakede3518e2000-10-23 13:37:01 +0000297 num = int(args[1])
Fred Drakedbbf76b2000-07-09 16:44:26 +0000298 print 'Message %d body:'%num
299 msg = msgs[num-1]
300 msg.rewindbody()
301 sys.stdout.write(msg.fp.read())
302 else:
303 print 'Mailbox',mbox,'has',len(msgs),'messages:'
304 for msg in msgs:
305 f = msg.getheader('from') or ""
306 s = msg.getheader('subject') or ""
307 d = msg.getheader('date') or ""
Fred Drakecc4adf22000-09-30 23:59:04 +0000308 print '-%20.20s %20.20s %-30.30s'%(f, d[5:], s)
Guido van Rossum62448671996-09-17 21:33:15 +0000309
310
311if __name__ == '__main__':
Fred Drakedbbf76b2000-07-09 16:44:26 +0000312 _test()