blob: 2c80a625e1ea55a01076ed1c1b716b56714fee3d [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossum62448671996-09-17 21:33:15 +00002
3"""Classes to handle Unix style, MMDF style, and MH style mailboxes."""
4
5
Guido van Rossumc7b68821994-04-28 09:53:33 +00006import rfc822
Jack Jansen97157791995-10-23 13:59:53 +00007import os
Guido van Rossumc7b68821994-04-28 09:53:33 +00008
Skip Montanaro17ab1232001-01-24 06:27:27 +00009__all__ = ["UnixMailbox","MmdfMailbox","MHMailbox","Maildir","BabylMailbox"]
10
Guido van Rossumc7b68821994-04-28 09:53:33 +000011class _Mailbox:
Barry Warsaw81ad67c2001-01-31 22:13:15 +000012 def __init__(self, fp, factory=rfc822.Message):
Fred Drakedbbf76b2000-07-09 16:44:26 +000013 self.fp = fp
14 self.seekp = 0
Barry Warsaw81ad67c2001-01-31 22:13:15 +000015 self.factory = factory
Guido van Rossum8ca84201998-03-26 20:56:10 +000016
Fred Drake72987a42001-05-02 20:20:53 +000017 def __iter__(self):
18 return self
19
Fred Drakedbbf76b2000-07-09 16:44:26 +000020 def next(self):
21 while 1:
22 self.fp.seek(self.seekp)
23 try:
24 self._search_start()
25 except EOFError:
26 self.seekp = self.fp.tell()
27 return None
28 start = self.fp.tell()
29 self._search_end()
30 self.seekp = stop = self.fp.tell()
Fred Drake8152d322000-12-12 23:20:45 +000031 if start != stop:
Fred Drakedbbf76b2000-07-09 16:44:26 +000032 break
Barry Warsaw81ad67c2001-01-31 22:13:15 +000033 return self.factory(_Subfile(self.fp, start, stop))
Guido van Rossumc7b68821994-04-28 09:53:33 +000034
Guido van Rossumc7b68821994-04-28 09:53:33 +000035
36class _Subfile:
Fred Drakedbbf76b2000-07-09 16:44:26 +000037 def __init__(self, fp, start, stop):
38 self.fp = fp
39 self.start = start
40 self.stop = stop
41 self.pos = self.start
Guido van Rossum8ca84201998-03-26 20:56:10 +000042
Fred Drakedbbf76b2000-07-09 16:44:26 +000043 def read(self, length = None):
44 if self.pos >= self.stop:
45 return ''
46 remaining = self.stop - self.pos
47 if length is None or length < 0:
48 length = remaining
49 elif length > remaining:
50 length = remaining
51 self.fp.seek(self.pos)
52 data = self.fp.read(length)
53 self.pos = self.fp.tell()
54 return data
Guido van Rossumc7b68821994-04-28 09:53:33 +000055
Fred Drakedbbf76b2000-07-09 16:44:26 +000056 def readline(self, length = None):
57 if self.pos >= self.stop:
58 return ''
59 if length is None:
60 length = self.stop - self.pos
61 self.fp.seek(self.pos)
62 data = self.fp.readline(length)
63 self.pos = self.fp.tell()
64 return data
Guido van Rossumc7b68821994-04-28 09:53:33 +000065
Fred Drakedbbf76b2000-07-09 16:44:26 +000066 def readlines(self, sizehint = -1):
67 lines = []
68 while 1:
69 line = self.readline()
70 if not line:
71 break
72 lines.append(line)
73 if sizehint >= 0:
74 sizehint = sizehint - len(line)
75 if sizehint <= 0:
76 break
77 return lines
Guido van Rossumc7b68821994-04-28 09:53:33 +000078
Fred Drakedbbf76b2000-07-09 16:44:26 +000079 def tell(self):
80 return self.pos - self.start
Guido van Rossume256a0f1999-03-24 16:20:45 +000081
Fred Drakedbbf76b2000-07-09 16:44:26 +000082 def seek(self, pos, whence=0):
83 if whence == 0:
84 self.pos = self.start + pos
85 elif whence == 1:
86 self.pos = self.pos + pos
87 elif whence == 2:
88 self.pos = self.stop + pos
Guido van Rossumc7b68821994-04-28 09:53:33 +000089
Fred Drakedbbf76b2000-07-09 16:44:26 +000090 def close(self):
91 del self.fp
Guido van Rossumc7b68821994-04-28 09:53:33 +000092
Guido van Rossumc7b68821994-04-28 09:53:33 +000093
94class UnixMailbox(_Mailbox):
Fred Drakedbbf76b2000-07-09 16:44:26 +000095 def _search_start(self):
96 while 1:
97 pos = self.fp.tell()
98 line = self.fp.readline()
99 if not line:
100 raise EOFError
101 if line[:5] == 'From ' and self._isrealfromline(line):
102 self.fp.seek(pos)
103 return
Guido van Rossum8ca84201998-03-26 20:56:10 +0000104
Fred Drakedbbf76b2000-07-09 16:44:26 +0000105 def _search_end(self):
106 self.fp.readline() # Throw away header line
107 while 1:
108 pos = self.fp.tell()
109 line = self.fp.readline()
110 if not line:
111 return
112 if line[:5] == 'From ' and self._isrealfromline(line):
113 self.fp.seek(pos)
114 return
Guido van Rossumc7b68821994-04-28 09:53:33 +0000115
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000116 # An overridable mechanism to test for From-line-ness. You can either
117 # specify a different regular expression or define a whole new
118 # _isrealfromline() method. Note that this only gets called for lines
119 # starting with the 5 characters "From ".
120 #
121 # BAW: According to
122 #http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html
123 # the only portable, reliable way to find message delimiters in a BSD (i.e
124 # Unix mailbox) style folder is to search for "\n\nFrom .*\n", or at the
125 # beginning of the file, "^From .*\n". While _fromlinepattern below seems
126 # like a good idea, in practice, there are too many variations for more
127 # strict parsing of the line to be completely accurate.
128 #
129 # _strict_isrealfromline() is the old version which tries to do stricter
130 # parsing of the From_ line. _portable_isrealfromline() simply returns
131 # true, since it's never called if the line doesn't already start with
132 # "From ".
133 #
134 # This algorithm, and the way it interacts with _search_start() and
135 # _search_end() may not be completely correct, because it doesn't check
136 # that the two characters preceding "From " are \n\n or the beginning of
137 # the file. Fixing this would require a more extensive rewrite than is
138 # necessary. For convenience, we've added a StrictUnixMailbox class which
139 # uses the older, more strict _fromlinepattern regular expression.
Guido van Rossumc7b68821994-04-28 09:53:33 +0000140
Fred Drakedbbf76b2000-07-09 16:44:26 +0000141 _fromlinepattern = r"From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d\s+" \
142 r"\d?\d:\d\d(:\d\d)?(\s+[^\s]+)?\s+\d\d\d\d\s*$"
143 _regexp = None
Guido van Rossumfbe63de1998-04-03 16:04:05 +0000144
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000145 def _strict_isrealfromline(self, line):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000146 if not self._regexp:
147 import re
148 self._regexp = re.compile(self._fromlinepattern)
149 return self._regexp.match(line)
Guido van Rossumfbe63de1998-04-03 16:04:05 +0000150
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000151 def _portable_isrealfromline(self, line):
152 return 1
153
154 _isrealfromline = _strict_isrealfromline
155
156
157class PortableUnixMailbox(UnixMailbox):
158 _isrealfromline = UnixMailbox._portable_isrealfromline
159
Guido van Rossumfbe63de1998-04-03 16:04:05 +0000160
Guido van Rossumc7b68821994-04-28 09:53:33 +0000161class MmdfMailbox(_Mailbox):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000162 def _search_start(self):
163 while 1:
164 line = self.fp.readline()
165 if not line:
166 raise EOFError
167 if line[:5] == '\001\001\001\001\n':
168 return
Guido van Rossum8ca84201998-03-26 20:56:10 +0000169
Fred Drakedbbf76b2000-07-09 16:44:26 +0000170 def _search_end(self):
171 while 1:
172 pos = self.fp.tell()
173 line = self.fp.readline()
174 if not line:
175 return
176 if line == '\001\001\001\001\n':
177 self.fp.seek(pos)
178 return
Guido van Rossumc7b68821994-04-28 09:53:33 +0000179
Guido van Rossumc7b68821994-04-28 09:53:33 +0000180
Jack Jansen97157791995-10-23 13:59:53 +0000181class MHMailbox:
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000182 def __init__(self, dirname, factory=rfc822.Message):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000183 import re
Guido van Rossum0707fea2000-08-10 03:05:26 +0000184 pat = re.compile('^[1-9][0-9]*$')
Fred Drakedbbf76b2000-07-09 16:44:26 +0000185 self.dirname = dirname
Sjoerd Mullenderd2653a92000-08-11 07:48:36 +0000186 # the three following lines could be combined into:
187 # list = map(long, filter(pat.match, os.listdir(self.dirname)))
188 list = os.listdir(self.dirname)
189 list = filter(pat.match, list)
Guido van Rossum0707fea2000-08-10 03:05:26 +0000190 list = map(long, list)
191 list.sort()
192 # This only works in Python 1.6 or later;
193 # before that str() added 'L':
194 self.boxes = map(str, list)
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000195 self.factory = factory
Jack Jansen97157791995-10-23 13:59:53 +0000196
Fred Drake72987a42001-05-02 20:20:53 +0000197 def __iter__(self):
198 return self
199
Fred Drakedbbf76b2000-07-09 16:44:26 +0000200 def next(self):
201 if not self.boxes:
202 return None
203 fn = self.boxes[0]
204 del self.boxes[0]
205 fp = open(os.path.join(self.dirname, fn))
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000206 return self.factory(fp)
Guido van Rossum8ca84201998-03-26 20:56:10 +0000207
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000208
209class Maildir:
Fred Drakedbbf76b2000-07-09 16:44:26 +0000210 # Qmail directory mailbox
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000211
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000212 def __init__(self, dirname, factory=rfc822.Message):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000213 self.dirname = dirname
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000214 self.factory = factory
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000215
Fred Drakedbbf76b2000-07-09 16:44:26 +0000216 # check for new mail
217 newdir = os.path.join(self.dirname, 'new')
Fred Draked9a8dec2000-09-22 18:41:50 +0000218 boxes = [os.path.join(newdir, f)
219 for f in os.listdir(newdir) if f[0] != '.']
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000220
Fred Drakedbbf76b2000-07-09 16:44:26 +0000221 # Now check for current mail in this maildir
222 curdir = os.path.join(self.dirname, 'cur')
Fred Draked9a8dec2000-09-22 18:41:50 +0000223 boxes += [os.path.join(curdir, f)
224 for f in os.listdir(curdir) if f[0] != '.']
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000225
Fred Drakede3518e2000-10-23 13:37:01 +0000226 self.boxes = boxes
227
Fred Drake72987a42001-05-02 20:20:53 +0000228 def __iter__(self):
229 return self
230
Fred Drakedbbf76b2000-07-09 16:44:26 +0000231 def next(self):
232 if not self.boxes:
233 return None
234 fn = self.boxes[0]
235 del self.boxes[0]
Fred Drakee108a022000-09-14 14:44:43 +0000236 fp = open(fn)
Barry Warsaw81ad67c2001-01-31 22:13:15 +0000237 return self.factory(fp)
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000238
Guido van Rossum9a4d6371998-12-23 22:05:42 +0000239
Guido van Rossumfdf58fe1997-05-15 14:33:09 +0000240class BabylMailbox(_Mailbox):
Fred Drakedbbf76b2000-07-09 16:44:26 +0000241 def _search_start(self):
242 while 1:
243 line = self.fp.readline()
244 if not line:
245 raise EOFError
246 if line == '*** EOOH ***\n':
247 return
Guido van Rossumfdf58fe1997-05-15 14:33:09 +0000248
Fred Drakedbbf76b2000-07-09 16:44:26 +0000249 def _search_end(self):
250 while 1:
251 pos = self.fp.tell()
252 line = self.fp.readline()
253 if not line:
254 return
255 if line == '\037\014\n':
256 self.fp.seek(pos)
257 return
Guido van Rossumfdf58fe1997-05-15 14:33:09 +0000258
259
Guido van Rossum62448671996-09-17 21:33:15 +0000260def _test():
Fred Drakedbbf76b2000-07-09 16:44:26 +0000261 import time
262 import sys
Fred Drakedbbf76b2000-07-09 16:44:26 +0000263 import os
Jack Jansen97157791995-10-23 13:59:53 +0000264
Fred Drakedbbf76b2000-07-09 16:44:26 +0000265 args = sys.argv[1:]
266 if not args:
267 for key in 'MAILDIR', 'MAIL', 'LOGNAME', 'USER':
268 if os.environ.has_key(key):
269 mbox = os.environ[key]
270 break
Fred Drake13a2c272000-02-10 17:17:14 +0000271 else:
Fred Drakedbbf76b2000-07-09 16:44:26 +0000272 print "$MAIL, $LOGNAME nor $USER set -- who are you?"
273 return
274 else:
275 mbox = args[0]
276 if mbox[:1] == '+':
277 mbox = os.environ['HOME'] + '/Mail/' + mbox[1:]
278 elif not '/' in mbox:
279 mbox = '/usr/mail/' + mbox
280 if os.path.isdir(mbox):
281 if os.path.isdir(os.path.join(mbox, 'cur')):
282 mb = Maildir(mbox)
Fred Drake13a2c272000-02-10 17:17:14 +0000283 else:
Fred Drakedbbf76b2000-07-09 16:44:26 +0000284 mb = MHMailbox(mbox)
285 else:
286 fp = open(mbox, 'r')
287 mb = UnixMailbox(fp)
288
289 msgs = []
290 while 1:
291 msg = mb.next()
292 if msg is None:
293 break
294 msgs.append(msg)
295 if len(args) <= 1:
296 msg.fp = None
297 if len(args) > 1:
Fred Drakede3518e2000-10-23 13:37:01 +0000298 num = int(args[1])
Fred Drakedbbf76b2000-07-09 16:44:26 +0000299 print 'Message %d body:'%num
300 msg = msgs[num-1]
301 msg.rewindbody()
302 sys.stdout.write(msg.fp.read())
303 else:
304 print 'Mailbox',mbox,'has',len(msgs),'messages:'
305 for msg in msgs:
306 f = msg.getheader('from') or ""
307 s = msg.getheader('subject') or ""
308 d = msg.getheader('date') or ""
Fred Drakecc4adf22000-09-30 23:59:04 +0000309 print '-%20.20s %20.20s %-30.30s'%(f, d[5:], s)
Guido van Rossum62448671996-09-17 21:33:15 +0000310
311
312if __name__ == '__main__':
Fred Drakedbbf76b2000-07-09 16:44:26 +0000313 _test()