blob: 5e680bc3c68b6a0d624a91c8f1559773bf289c43 [file] [log] [blame]
Guido van Rossumcc6764c1995-02-09 17:18:10 +00001"""Manage shelves of pickled objects.
2
3A "shelf" is a persistent, dictionary-like object. The difference
4with dbm databases is that the values (not the keys!) in a shelf can
5be essentially arbitrary Python objects -- anything that the "pickle"
6module can handle. This includes most class instances, recursive data
7types, and objects containing lots of shared sub-objects. The keys
8are ordinary strings.
9
10To summarize the interface (key is a string, data is an arbitrary
11object):
12
Fred Drake13a2c272000-02-10 17:17:14 +000013 import shelve
14 d = shelve.open(filename) # open, with (g)dbm filename -- no suffix
Guido van Rossumcc6764c1995-02-09 17:18:10 +000015
Fred Drake13a2c272000-02-10 17:17:14 +000016 d[key] = data # store data at key (overwrites old data if
17 # using an existing key)
Tim Peters0eadaac2003-04-24 16:02:54 +000018 data = d[key] # retrieve a COPY of the data at key (raise
Martin v. Löwis153c9e42003-04-19 20:59:03 +000019 # KeyError if no such key) -- NOTE that this
20 # access returns a *copy* of the entry!
Fred Drake13a2c272000-02-10 17:17:14 +000021 del d[key] # delete data stored at key (raises KeyError
22 # if no such key)
Martin v. Löwise4913c92002-10-18 08:58:14 +000023 flag = d.has_key(key) # true if the key exists; same as "key in d"
Fred Drake13a2c272000-02-10 17:17:14 +000024 list = d.keys() # a list of all existing keys (slow!)
Guido van Rossumcc6764c1995-02-09 17:18:10 +000025
Fred Drake13a2c272000-02-10 17:17:14 +000026 d.close() # close it
Guido van Rossumcc6764c1995-02-09 17:18:10 +000027
28Dependent on the implementation, closing a persistent dictionary may
29or may not be necessary to flush changes to disk.
Martin v. Löwis153c9e42003-04-19 20:59:03 +000030
31Normally, d[key] returns a COPY of the entry. This needs care when
32mutable entries are mutated: for example, if d[key] is a list,
33 d[key].append(anitem)
34does NOT modify the entry d[key] itself, as stored in the persistent
35mapping -- it only modifies the copy, which is then immediately
36discarded, so that the append has NO effect whatsoever. To append an
37item to d[key] in a way that will affect the persistent mapping, use:
38 data = d[key]
39 data.append(anitem)
40 d[key] = data
41
42To avoid the problem with mutable entries, you may pass the keyword
43argument writeback=True in the call to shelve.open. When you use:
44 d = shelve.open(filename, writeback=True)
45then d keeps a cache of all entries you access, and writes them all back
46to the persistent mapping when you call d.close(). This ensures that
47such usage as d[key].append(anitem) works as intended.
48
49However, using keyword argument writeback=True may consume vast amount
50of memory for the cache, and it may make d.close() very slow, if you
51access many of d's entries after opening it in this way: d has no way to
52check which of the entries you access are mutable and/or which ones you
53actually mutate, so it must cache, and write back at close, all of the
54entries that you access. You can call d.sync() to write back all the
55entries in the cache, and empty the cache (d.sync() also synchronizes
56the persistent dictionary on disk, if feasible).
Guido van Rossumcc6764c1995-02-09 17:18:10 +000057"""
Guido van Rossuma48061a1995-01-10 00:31:14 +000058
Guido van Rossum914c9381997-06-06 21:12:45 +000059# Try using cPickle and cStringIO if available.
60
61try:
Tim Peters495ad3c2001-01-15 01:36:40 +000062 from cPickle import Pickler, Unpickler
Guido van Rossum914c9381997-06-06 21:12:45 +000063except ImportError:
Tim Peters495ad3c2001-01-15 01:36:40 +000064 from pickle import Pickler, Unpickler
Guido van Rossum914c9381997-06-06 21:12:45 +000065
66try:
Tim Peters495ad3c2001-01-15 01:36:40 +000067 from cStringIO import StringIO
Guido van Rossum914c9381997-06-06 21:12:45 +000068except ImportError:
Tim Peters495ad3c2001-01-15 01:36:40 +000069 from StringIO import StringIO
Guido van Rossuma48061a1995-01-10 00:31:14 +000070
Raymond Hettinger79947162002-11-15 06:46:14 +000071import UserDict
Martin v. Löwis153c9e42003-04-19 20:59:03 +000072import warnings
Raymond Hettinger79947162002-11-15 06:46:14 +000073
Skip Montanaro0de65802001-02-15 22:15:14 +000074__all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"]
Guido van Rossumcc6764c1995-02-09 17:18:10 +000075
Raymond Hettinger79947162002-11-15 06:46:14 +000076class Shelf(UserDict.DictMixin):
Tim Peters495ad3c2001-01-15 01:36:40 +000077 """Base class for shelf implementations.
Guido van Rossumcc6764c1995-02-09 17:18:10 +000078
Tim Peters495ad3c2001-01-15 01:36:40 +000079 This is initialized with a dictionary-like object.
80 See the module's __doc__ string for an overview of the interface.
81 """
Guido van Rossuma48061a1995-01-10 00:31:14 +000082
Martin v. Löwis153c9e42003-04-19 20:59:03 +000083 def __init__(self, dict, protocol=None, writeback=False, binary=None):
Tim Peters495ad3c2001-01-15 01:36:40 +000084 self.dict = dict
Martin v. Löwis153c9e42003-04-19 20:59:03 +000085 if protocol is not None and binary is not None:
86 raise ValueError, "can't specify both 'protocol' and 'binary'"
87 if binary is not None:
88 warnings.warn("The 'binary' argument to Shelf() is deprecated",
89 PendingDeprecationWarning)
90 protocol = int(binary)
91 if protocol is None:
92 protocol = 0
93 self._protocol = protocol
94 self.writeback = writeback
95 self.cache = {}
Guido van Rossum2f7df121999-08-11 01:54:05 +000096
Tim Peters495ad3c2001-01-15 01:36:40 +000097 def keys(self):
98 return self.dict.keys()
Guido van Rossuma48061a1995-01-10 00:31:14 +000099
Tim Peters495ad3c2001-01-15 01:36:40 +0000100 def __len__(self):
101 return len(self.dict)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000102
Tim Peters495ad3c2001-01-15 01:36:40 +0000103 def has_key(self, key):
104 return self.dict.has_key(key)
105
Martin v. Löwise4913c92002-10-18 08:58:14 +0000106 def __contains__(self, key):
107 return self.dict.has_key(key)
108
Tim Peters495ad3c2001-01-15 01:36:40 +0000109 def get(self, key, default=None):
110 if self.dict.has_key(key):
111 return self[key]
112 return default
113
114 def __getitem__(self, key):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000115 try:
116 value = self.cache[key]
117 except KeyError:
118 f = StringIO(self.dict[key])
119 value = Unpickler(f).load()
120 if self.writeback:
121 self.cache[key] = value
122 return value
Tim Peters495ad3c2001-01-15 01:36:40 +0000123
124 def __setitem__(self, key, value):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000125 if self.writeback:
126 self.cache[key] = value
Tim Peters495ad3c2001-01-15 01:36:40 +0000127 f = StringIO()
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000128 p = Pickler(f, self._protocol)
Tim Peters495ad3c2001-01-15 01:36:40 +0000129 p.dump(value)
130 self.dict[key] = f.getvalue()
131
132 def __delitem__(self, key):
133 del self.dict[key]
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000134 try:
135 del self.cache[key]
136 except KeyError:
137 pass
Tim Peters495ad3c2001-01-15 01:36:40 +0000138
139 def close(self):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000140 self.sync()
Tim Peters495ad3c2001-01-15 01:36:40 +0000141 try:
142 self.dict.close()
Raymond Hettinger68dcd342003-05-27 06:30:52 +0000143 except AttributeError:
Tim Peters495ad3c2001-01-15 01:36:40 +0000144 pass
145 self.dict = 0
146
147 def __del__(self):
148 self.close()
149
150 def sync(self):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000151 if self.writeback and self.cache:
152 self.writeback = False
153 for key, entry in self.cache.iteritems():
154 self[key] = entry
155 self.writeback = True
156 self.cache = {}
Tim Peters495ad3c2001-01-15 01:36:40 +0000157 if hasattr(self.dict, 'sync'):
158 self.dict.sync()
159
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000160
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000161class BsdDbShelf(Shelf):
Tim Peters495ad3c2001-01-15 01:36:40 +0000162 """Shelf implementation using the "BSD" db interface.
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000163
Tim Peters495ad3c2001-01-15 01:36:40 +0000164 This adds methods first(), next(), previous(), last() and
165 set_location() that have no counterpart in [g]dbm databases.
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000166
Tim Peters495ad3c2001-01-15 01:36:40 +0000167 The actual database must be opened using one of the "bsddb"
168 modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or
169 bsddb.rnopen) and passed to the constructor.
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000170
Tim Peters495ad3c2001-01-15 01:36:40 +0000171 See the module's __doc__ string for an overview of the interface.
172 """
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000173
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000174 def __init__(self, dict, protocol=None, writeback=False, binary=None):
175 Shelf.__init__(self, dict, protocol, writeback, binary)
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000176
Tim Peters495ad3c2001-01-15 01:36:40 +0000177 def set_location(self, key):
178 (key, value) = self.dict.set_location(key)
179 f = StringIO(value)
180 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000181
Tim Peters495ad3c2001-01-15 01:36:40 +0000182 def next(self):
183 (key, value) = self.dict.next()
184 f = StringIO(value)
185 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000186
Tim Peters495ad3c2001-01-15 01:36:40 +0000187 def previous(self):
188 (key, value) = self.dict.previous()
189 f = StringIO(value)
190 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000191
Tim Peters495ad3c2001-01-15 01:36:40 +0000192 def first(self):
193 (key, value) = self.dict.first()
194 f = StringIO(value)
195 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000196
Tim Peters495ad3c2001-01-15 01:36:40 +0000197 def last(self):
198 (key, value) = self.dict.last()
199 f = StringIO(value)
200 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000201
202
203class DbfilenameShelf(Shelf):
Tim Peters495ad3c2001-01-15 01:36:40 +0000204 """Shelf implementation using the "anydbm" generic dbm interface.
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000205
Tim Peters495ad3c2001-01-15 01:36:40 +0000206 This is initialized with the filename for the dbm database.
207 See the module's __doc__ string for an overview of the interface.
208 """
209
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000210 def __init__(self, filename, flag='c', protocol=None, writeback=False, binary=None):
Tim Peters495ad3c2001-01-15 01:36:40 +0000211 import anydbm
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000212 Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback, binary)
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000213
214
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000215def open(filename, flag='c', protocol=None, writeback=False, binary=None):
Tim Peters495ad3c2001-01-15 01:36:40 +0000216 """Open a persistent dictionary for reading and writing.
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000217
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000218 The filename parameter is the base filename for the underlying
219 database. As a side-effect, an extension may be added to the
220 filename and more than one file may be created. The optional flag
221 parameter has the same interpretation as the flag parameter of
222 anydbm.open(). The optional protocol parameter specifies the
223 version of the pickle protocol (0, 1, or 2).
224
225 The optional binary parameter is deprecated and may be set to True
226 to force the use of binary pickles for serializing data values.
Skip Montanarob3b22f32003-01-21 01:53:09 +0000227
Tim Peters495ad3c2001-01-15 01:36:40 +0000228 See the module's __doc__ string for an overview of the interface.
229 """
230
Raymond Hettinger092b2a92003-05-20 05:15:55 +0000231 return DbfilenameShelf(filename, flag, protocol, writeback, binary)