blob: c8cba8582d6ff1ed7a4c7af6e42492fdfca3d13d [file] [log] [blame]
Guido van Rossumcc6764c1995-02-09 17:18:10 +00001"""Manage shelves of pickled objects.
2
3A "shelf" is a persistent, dictionary-like object. The difference
4with dbm databases is that the values (not the keys!) in a shelf can
5be essentially arbitrary Python objects -- anything that the "pickle"
6module can handle. This includes most class instances, recursive data
7types, and objects containing lots of shared sub-objects. The keys
8are ordinary strings.
9
10To summarize the interface (key is a string, data is an arbitrary
11object):
12
Fred Drake13a2c272000-02-10 17:17:14 +000013 import shelve
14 d = shelve.open(filename) # open, with (g)dbm filename -- no suffix
Guido van Rossumcc6764c1995-02-09 17:18:10 +000015
Fred Drake13a2c272000-02-10 17:17:14 +000016 d[key] = data # store data at key (overwrites old data if
17 # using an existing key)
Tim Peters0eadaac2003-04-24 16:02:54 +000018 data = d[key] # retrieve a COPY of the data at key (raise
Martin v. Löwis153c9e42003-04-19 20:59:03 +000019 # KeyError if no such key) -- NOTE that this
20 # access returns a *copy* of the entry!
Fred Drake13a2c272000-02-10 17:17:14 +000021 del d[key] # delete data stored at key (raises KeyError
22 # if no such key)
Martin v. Löwise4913c92002-10-18 08:58:14 +000023 flag = d.has_key(key) # true if the key exists; same as "key in d"
Fred Drake13a2c272000-02-10 17:17:14 +000024 list = d.keys() # a list of all existing keys (slow!)
Guido van Rossumcc6764c1995-02-09 17:18:10 +000025
Fred Drake13a2c272000-02-10 17:17:14 +000026 d.close() # close it
Guido van Rossumcc6764c1995-02-09 17:18:10 +000027
28Dependent on the implementation, closing a persistent dictionary may
29or may not be necessary to flush changes to disk.
Martin v. Löwis153c9e42003-04-19 20:59:03 +000030
31Normally, d[key] returns a COPY of the entry. This needs care when
32mutable entries are mutated: for example, if d[key] is a list,
33 d[key].append(anitem)
34does NOT modify the entry d[key] itself, as stored in the persistent
35mapping -- it only modifies the copy, which is then immediately
36discarded, so that the append has NO effect whatsoever. To append an
37item to d[key] in a way that will affect the persistent mapping, use:
38 data = d[key]
39 data.append(anitem)
40 d[key] = data
41
42To avoid the problem with mutable entries, you may pass the keyword
43argument writeback=True in the call to shelve.open. When you use:
44 d = shelve.open(filename, writeback=True)
45then d keeps a cache of all entries you access, and writes them all back
46to the persistent mapping when you call d.close(). This ensures that
47such usage as d[key].append(anitem) works as intended.
48
49However, using keyword argument writeback=True may consume vast amount
50of memory for the cache, and it may make d.close() very slow, if you
51access many of d's entries after opening it in this way: d has no way to
52check which of the entries you access are mutable and/or which ones you
53actually mutate, so it must cache, and write back at close, all of the
54entries that you access. You can call d.sync() to write back all the
55entries in the cache, and empty the cache (d.sync() also synchronizes
56the persistent dictionary on disk, if feasible).
Guido van Rossumcc6764c1995-02-09 17:18:10 +000057"""
Guido van Rossuma48061a1995-01-10 00:31:14 +000058
Guido van Rossum914c9381997-06-06 21:12:45 +000059# Try using cPickle and cStringIO if available.
60
61try:
Tim Peters495ad3c2001-01-15 01:36:40 +000062 from cPickle import Pickler, Unpickler
Guido van Rossum914c9381997-06-06 21:12:45 +000063except ImportError:
Tim Peters495ad3c2001-01-15 01:36:40 +000064 from pickle import Pickler, Unpickler
Guido van Rossum914c9381997-06-06 21:12:45 +000065
66try:
Tim Peters495ad3c2001-01-15 01:36:40 +000067 from cStringIO import StringIO
Guido van Rossum914c9381997-06-06 21:12:45 +000068except ImportError:
Tim Peters495ad3c2001-01-15 01:36:40 +000069 from StringIO import StringIO
Guido van Rossuma48061a1995-01-10 00:31:14 +000070
Raymond Hettinger79947162002-11-15 06:46:14 +000071import UserDict
72
Skip Montanaro0de65802001-02-15 22:15:14 +000073__all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"]
Guido van Rossumcc6764c1995-02-09 17:18:10 +000074
Raymond Hettinger8c664e82008-07-25 18:43:33 +000075class _ClosedDict(UserDict.DictMixin):
76 'Marker for a closed dict. Access attempts raise a ValueError.'
77
78 def closed(self, *args):
79 raise ValueError('invalid operation on closed shelf')
80 __getitem__ = __setitem__ = __delitem__ = keys = closed
81
82 def __repr__(self):
83 return '<Closed Dictionary>'
84
Raymond Hettinger79947162002-11-15 06:46:14 +000085class Shelf(UserDict.DictMixin):
Tim Peters495ad3c2001-01-15 01:36:40 +000086 """Base class for shelf implementations.
Guido van Rossumcc6764c1995-02-09 17:18:10 +000087
Tim Peters495ad3c2001-01-15 01:36:40 +000088 This is initialized with a dictionary-like object.
89 See the module's __doc__ string for an overview of the interface.
90 """
Guido van Rossuma48061a1995-01-10 00:31:14 +000091
Raymond Hettinger1bc82f82004-12-05 03:58:17 +000092 def __init__(self, dict, protocol=None, writeback=False):
Tim Peters495ad3c2001-01-15 01:36:40 +000093 self.dict = dict
Martin v. Löwis153c9e42003-04-19 20:59:03 +000094 if protocol is None:
95 protocol = 0
96 self._protocol = protocol
97 self.writeback = writeback
98 self.cache = {}
Guido van Rossum2f7df121999-08-11 01:54:05 +000099
Tim Peters495ad3c2001-01-15 01:36:40 +0000100 def keys(self):
101 return self.dict.keys()
Guido van Rossuma48061a1995-01-10 00:31:14 +0000102
Tim Peters495ad3c2001-01-15 01:36:40 +0000103 def __len__(self):
104 return len(self.dict)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000105
Tim Peters495ad3c2001-01-15 01:36:40 +0000106 def has_key(self, key):
Brett Cannon753ecb12008-08-04 21:17:15 +0000107 return key in self.dict
Tim Peters495ad3c2001-01-15 01:36:40 +0000108
Martin v. Löwise4913c92002-10-18 08:58:14 +0000109 def __contains__(self, key):
Brett Cannon753ecb12008-08-04 21:17:15 +0000110 return key in self.dict
Martin v. Löwise4913c92002-10-18 08:58:14 +0000111
Tim Peters495ad3c2001-01-15 01:36:40 +0000112 def get(self, key, default=None):
Brett Cannon753ecb12008-08-04 21:17:15 +0000113 if key in self.dict:
Tim Peters495ad3c2001-01-15 01:36:40 +0000114 return self[key]
115 return default
116
117 def __getitem__(self, key):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000118 try:
119 value = self.cache[key]
120 except KeyError:
121 f = StringIO(self.dict[key])
122 value = Unpickler(f).load()
123 if self.writeback:
124 self.cache[key] = value
125 return value
Tim Peters495ad3c2001-01-15 01:36:40 +0000126
127 def __setitem__(self, key, value):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000128 if self.writeback:
129 self.cache[key] = value
Tim Peters495ad3c2001-01-15 01:36:40 +0000130 f = StringIO()
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000131 p = Pickler(f, self._protocol)
Tim Peters495ad3c2001-01-15 01:36:40 +0000132 p.dump(value)
133 self.dict[key] = f.getvalue()
134
135 def __delitem__(self, key):
136 del self.dict[key]
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000137 try:
138 del self.cache[key]
139 except KeyError:
140 pass
Tim Peters495ad3c2001-01-15 01:36:40 +0000141
142 def close(self):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000143 self.sync()
Tim Peters495ad3c2001-01-15 01:36:40 +0000144 try:
145 self.dict.close()
Raymond Hettinger68dcd342003-05-27 06:30:52 +0000146 except AttributeError:
Tim Peters495ad3c2001-01-15 01:36:40 +0000147 pass
R. David Murray031ae6e2010-02-11 01:56:42 +0000148 # Catch errors that may happen when close is called from __del__
149 # because CPython is in interpreter shutdown.
150 try:
R. David Murray63e4fd72010-02-10 22:42:04 +0000151 self.dict = _ClosedDict()
R. David Murray031ae6e2010-02-11 01:56:42 +0000152 except (NameError, TypeError):
153 self.dict = None
Tim Peters495ad3c2001-01-15 01:36:40 +0000154
155 def __del__(self):
Georg Brandl2605ca82006-06-14 06:08:31 +0000156 if not hasattr(self, 'writeback'):
157 # __init__ didn't succeed, so don't bother closing
158 return
Tim Peters495ad3c2001-01-15 01:36:40 +0000159 self.close()
160
161 def sync(self):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000162 if self.writeback and self.cache:
163 self.writeback = False
164 for key, entry in self.cache.iteritems():
165 self[key] = entry
166 self.writeback = True
167 self.cache = {}
Tim Peters495ad3c2001-01-15 01:36:40 +0000168 if hasattr(self.dict, 'sync'):
169 self.dict.sync()
170
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000171
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000172class BsdDbShelf(Shelf):
Tim Peters495ad3c2001-01-15 01:36:40 +0000173 """Shelf implementation using the "BSD" db interface.
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000174
Tim Peters495ad3c2001-01-15 01:36:40 +0000175 This adds methods first(), next(), previous(), last() and
176 set_location() that have no counterpart in [g]dbm databases.
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000177
Tim Peters495ad3c2001-01-15 01:36:40 +0000178 The actual database must be opened using one of the "bsddb"
179 modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or
180 bsddb.rnopen) and passed to the constructor.
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000181
Tim Peters495ad3c2001-01-15 01:36:40 +0000182 See the module's __doc__ string for an overview of the interface.
183 """
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000184
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000185 def __init__(self, dict, protocol=None, writeback=False):
186 Shelf.__init__(self, dict, protocol, writeback)
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000187
Tim Peters495ad3c2001-01-15 01:36:40 +0000188 def set_location(self, key):
189 (key, value) = self.dict.set_location(key)
190 f = StringIO(value)
191 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000192
Tim Peters495ad3c2001-01-15 01:36:40 +0000193 def next(self):
194 (key, value) = self.dict.next()
195 f = StringIO(value)
196 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000197
Tim Peters495ad3c2001-01-15 01:36:40 +0000198 def previous(self):
199 (key, value) = self.dict.previous()
200 f = StringIO(value)
201 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000202
Tim Peters495ad3c2001-01-15 01:36:40 +0000203 def first(self):
204 (key, value) = self.dict.first()
205 f = StringIO(value)
206 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000207
Tim Peters495ad3c2001-01-15 01:36:40 +0000208 def last(self):
209 (key, value) = self.dict.last()
210 f = StringIO(value)
211 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000212
213
214class DbfilenameShelf(Shelf):
Tim Peters495ad3c2001-01-15 01:36:40 +0000215 """Shelf implementation using the "anydbm" generic dbm interface.
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000216
Tim Peters495ad3c2001-01-15 01:36:40 +0000217 This is initialized with the filename for the dbm database.
218 See the module's __doc__ string for an overview of the interface.
219 """
220
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000221 def __init__(self, filename, flag='c', protocol=None, writeback=False):
Tim Peters495ad3c2001-01-15 01:36:40 +0000222 import anydbm
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000223 Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback)
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000224
225
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000226def open(filename, flag='c', protocol=None, writeback=False):
Tim Peters495ad3c2001-01-15 01:36:40 +0000227 """Open a persistent dictionary for reading and writing.
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000228
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000229 The filename parameter is the base filename for the underlying
230 database. As a side-effect, an extension may be added to the
231 filename and more than one file may be created. The optional flag
232 parameter has the same interpretation as the flag parameter of
233 anydbm.open(). The optional protocol parameter specifies the
234 version of the pickle protocol (0, 1, or 2).
235
Tim Peters495ad3c2001-01-15 01:36:40 +0000236 See the module's __doc__ string for an overview of the interface.
237 """
238
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000239 return DbfilenameShelf(filename, flag, protocol, writeback)